1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
78 MachineBasicBlock::iterator MBBI;
80 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
81 : Offset(o), Position(p), MBBI(i), Merged(false) {};
83 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
84 typedef MemOpQueue::iterator MemOpQueueIter;
86 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
87 int Offset, unsigned Base, bool BaseKill, int Opcode,
88 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
89 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
90 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
91 int Opcode, unsigned Size,
92 ARMCC::CondCodes Pred, unsigned PredReg,
93 unsigned Scratch, MemOpQueue &MemOps,
94 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
96 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
97 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
98 MachineBasicBlock::iterator &MBBI);
99 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
100 MachineBasicBlock::iterator MBBI,
101 const TargetInstrInfo *TII,
103 MachineBasicBlock::iterator &I);
104 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
105 MachineBasicBlock::iterator MBBI,
107 MachineBasicBlock::iterator &I);
108 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
109 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
111 char ARMLoadStoreOpt::ID = 0;
114 static int getLoadStoreMultipleOpcode(int Opcode) {
142 default: llvm_unreachable("Unhandled opcode!");
147 static bool isT2i32Load(unsigned Opc) {
148 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
151 static bool isi32Load(unsigned Opc) {
152 return Opc == ARM::LDR || isT2i32Load(Opc);
155 static bool isT2i32Store(unsigned Opc) {
156 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
159 static bool isi32Store(unsigned Opc) {
160 return Opc == ARM::STR || isT2i32Store(Opc);
163 /// MergeOps - Create and insert a LDM or STM with Base as base register and
164 /// registers in Regs as the register operands that would be loaded / stored.
165 /// It returns true if the transformation is done.
167 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
168 MachineBasicBlock::iterator MBBI,
169 int Offset, unsigned Base, bool BaseKill,
170 int Opcode, ARMCC::CondCodes Pred,
171 unsigned PredReg, unsigned Scratch, DebugLoc dl,
172 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
173 // Only a single register to load / store. Don't bother.
174 unsigned NumRegs = Regs.size();
178 ARM_AM::AMSubMode Mode = ARM_AM::ia;
179 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
180 if (isAM4 && Offset == 4) {
182 // Thumb2 does not support ldmib / stmib.
185 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
187 // Thumb2 does not support ldmda / stmda.
190 } else if (isAM4 && Offset == -4 * (int)NumRegs) {
192 } else if (Offset != 0) {
193 // If starting offset isn't zero, insert a MI to materialize a new base.
194 // But only do so if it is cost effective, i.e. merging more than two
200 if (isi32Load(Opcode))
201 // If it is a load, then just use one of the destination register to
202 // use as the new base.
203 NewBase = Regs[NumRegs-1].first;
205 // Use the scratch register to use as a new base.
210 int BaseOpc = !isThumb2
212 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
216 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
219 int ImmedOffset = isThumb2
220 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
221 if (ImmedOffset == -1)
222 // FIXME: Try t2ADDri12 or t2SUBri12?
223 return false; // Probably not worth it then.
225 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
226 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
227 .addImm(Pred).addReg(PredReg).addReg(0);
229 BaseKill = true; // New base is always killed right its use.
232 bool isDPR = Opcode == ARM::VLDRD || Opcode == ARM::VSTRD;
233 bool isDef = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
234 Opcode = getLoadStoreMultipleOpcode(Opcode);
235 MachineInstrBuilder MIB = (isAM4)
236 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
237 .addReg(Base, getKillRegState(BaseKill))
238 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
239 : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
240 .addReg(Base, getKillRegState(BaseKill))
241 .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
242 .addImm(Pred).addReg(PredReg);
243 MIB.addReg(0); // Add optional writeback (0 for now).
244 for (unsigned i = 0; i != NumRegs; ++i)
245 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
246 | getKillRegState(Regs[i].second));
251 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
252 /// load / store multiple instructions.
254 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
255 unsigned Base, int Opcode, unsigned Size,
256 ARMCC::CondCodes Pred, unsigned PredReg,
257 unsigned Scratch, MemOpQueue &MemOps,
258 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
259 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
260 int Offset = MemOps[SIndex].Offset;
261 int SOffset = Offset;
262 unsigned Pos = MemOps[SIndex].Position;
263 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
264 DebugLoc dl = Loc->getDebugLoc();
265 unsigned PReg = Loc->getOperand(0).getReg();
266 unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
267 bool isKill = Loc->getOperand(0).isKill();
269 SmallVector<std::pair<unsigned,bool>, 8> Regs;
270 Regs.push_back(std::make_pair(PReg, isKill));
271 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
272 int NewOffset = MemOps[i].Offset;
273 unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
274 unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
275 isKill = MemOps[i].MBBI->getOperand(0).isKill();
276 // AM4 - register numbers in ascending order.
277 // AM5 - consecutive register numbers in ascending order.
278 if (NewOffset == Offset + (int)Size &&
279 ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
281 Regs.push_back(std::make_pair(Reg, isKill));
284 // Can't merge this in. Try merge the earlier ones first.
285 if (MergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg,
286 Scratch, dl, Regs)) {
287 Merges.push_back(prior(Loc));
288 for (unsigned j = SIndex; j < i; ++j) {
289 MBB.erase(MemOps[j].MBBI);
290 MemOps[j].Merged = true;
293 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
298 if (MemOps[i].Position > Pos) {
299 Pos = MemOps[i].Position;
300 Loc = MemOps[i].MBBI;
304 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
305 if (MergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg,
306 Scratch, dl, Regs)) {
307 Merges.push_back(prior(Loc));
308 for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
309 MBB.erase(MemOps[i].MBBI);
310 MemOps[i].Merged = true;
317 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
318 unsigned Bytes, unsigned Limit,
319 ARMCC::CondCodes Pred, unsigned PredReg){
320 unsigned MyPredReg = 0;
323 if (MI->getOpcode() != ARM::t2SUBri &&
324 MI->getOpcode() != ARM::t2SUBrSPi &&
325 MI->getOpcode() != ARM::t2SUBrSPi12 &&
326 MI->getOpcode() != ARM::tSUBspi &&
327 MI->getOpcode() != ARM::SUBri)
330 // Make sure the offset fits in 8 bits.
331 if (Bytes <= 0 || (Limit && Bytes >= Limit))
334 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
335 return (MI->getOperand(0).getReg() == Base &&
336 MI->getOperand(1).getReg() == Base &&
337 (MI->getOperand(2).getImm()*Scale) == Bytes &&
338 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
339 MyPredReg == PredReg);
342 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
343 unsigned Bytes, unsigned Limit,
344 ARMCC::CondCodes Pred, unsigned PredReg){
345 unsigned MyPredReg = 0;
348 if (MI->getOpcode() != ARM::t2ADDri &&
349 MI->getOpcode() != ARM::t2ADDrSPi &&
350 MI->getOpcode() != ARM::t2ADDrSPi12 &&
351 MI->getOpcode() != ARM::tADDspi &&
352 MI->getOpcode() != ARM::ADDri)
355 if (Bytes <= 0 || (Limit && Bytes >= Limit))
356 // Make sure the offset fits in 8 bits.
359 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
360 return (MI->getOperand(0).getReg() == Base &&
361 MI->getOperand(1).getReg() == Base &&
362 (MI->getOperand(2).getImm()*Scale) == Bytes &&
363 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
364 MyPredReg == PredReg);
367 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
368 switch (MI->getOpcode()) {
386 return (MI->getNumOperands() - 5) * 4;
391 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
395 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
396 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
398 /// stmia rn, <ra, rb, rc>
399 /// rn := rn + 4 * 3;
401 /// stmia rn!, <ra, rb, rc>
403 /// rn := rn - 4 * 3;
404 /// ldmia rn, <ra, rb, rc>
406 /// ldmdb rn!, <ra, rb, rc>
407 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
408 MachineBasicBlock::iterator MBBI,
410 MachineBasicBlock::iterator &I) {
411 MachineInstr *MI = MBBI;
412 unsigned Base = MI->getOperand(0).getReg();
413 unsigned Bytes = getLSMultipleTransferSize(MI);
414 unsigned PredReg = 0;
415 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
416 int Opcode = MI->getOpcode();
417 bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
418 Opcode == ARM::STM || Opcode == ARM::t2STM;
421 if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
424 // Can't use the updating AM4 sub-mode if the base register is also a dest
425 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
426 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
427 if (MI->getOperand(i).getReg() == Base)
431 ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
432 if (MBBI != MBB.begin()) {
433 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
434 if (Mode == ARM_AM::ia &&
435 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
436 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
437 MI->getOperand(4).setReg(Base);
438 MI->getOperand(4).setIsDef();
441 } else if (Mode == ARM_AM::ib &&
442 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
443 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
444 MI->getOperand(4).setReg(Base); // WB to base
445 MI->getOperand(4).setIsDef();
451 if (MBBI != MBB.end()) {
452 MachineBasicBlock::iterator NextMBBI = next(MBBI);
453 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
454 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
455 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
456 MI->getOperand(4).setReg(Base); // WB to base
457 MI->getOperand(4).setIsDef();
464 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
465 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
466 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
467 MI->getOperand(4).setReg(Base); // WB to base
468 MI->getOperand(4).setIsDef();
478 // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
479 if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
482 ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
483 unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
484 if (MBBI != MBB.begin()) {
485 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
486 if (Mode == ARM_AM::ia &&
487 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
488 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
489 MI->getOperand(4).setReg(Base); // WB to base
490 MI->getOperand(4).setIsDef();
496 if (MBBI != MBB.end()) {
497 MachineBasicBlock::iterator NextMBBI = next(MBBI);
498 if (Mode == ARM_AM::ia &&
499 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
500 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
501 MI->getOperand(4).setReg(Base); // WB to base
502 MI->getOperand(4).setIsDef();
516 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
518 case ARM::LDR: return ARM::LDR_PRE;
519 case ARM::STR: return ARM::STR_PRE;
520 case ARM::VLDRS: return ARM::VLDMS;
521 case ARM::VLDRD: return ARM::VLDMD;
522 case ARM::VSTRS: return ARM::VSTMS;
523 case ARM::VSTRD: return ARM::VSTMD;
526 return ARM::t2LDR_PRE;
529 return ARM::t2STR_PRE;
530 default: llvm_unreachable("Unhandled opcode!");
535 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
537 case ARM::LDR: return ARM::LDR_POST;
538 case ARM::STR: return ARM::STR_POST;
539 case ARM::VLDRS: return ARM::VLDMS;
540 case ARM::VLDRD: return ARM::VLDMD;
541 case ARM::VSTRS: return ARM::VSTMS;
542 case ARM::VSTRD: return ARM::VSTMD;
545 return ARM::t2LDR_POST;
548 return ARM::t2STR_POST;
549 default: llvm_unreachable("Unhandled opcode!");
554 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
555 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
556 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
557 MachineBasicBlock::iterator MBBI,
558 const TargetInstrInfo *TII,
560 MachineBasicBlock::iterator &I) {
561 MachineInstr *MI = MBBI;
562 unsigned Base = MI->getOperand(1).getReg();
563 bool BaseKill = MI->getOperand(1).isKill();
564 unsigned Bytes = getLSMultipleTransferSize(MI);
565 int Opcode = MI->getOpcode();
566 DebugLoc dl = MI->getDebugLoc();
567 bool isAM5 = Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
568 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS;
569 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
570 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
572 else if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
574 else if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
575 if (MI->getOperand(2).getImm() != 0)
578 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
579 // Can't do the merge if the destination register is the same as the would-be
580 // writeback register.
581 if (isLd && MI->getOperand(0).getReg() == Base)
584 unsigned PredReg = 0;
585 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
586 bool DoMerge = false;
587 ARM_AM::AddrOpc AddSub = ARM_AM::add;
589 // AM2 - 12 bits, thumb2 - 8 bits.
590 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
591 if (MBBI != MBB.begin()) {
592 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
593 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
595 AddSub = ARM_AM::sub;
596 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
598 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
600 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
606 if (!DoMerge && MBBI != MBB.end()) {
607 MachineBasicBlock::iterator NextMBBI = next(MBBI);
609 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
611 AddSub = ARM_AM::sub;
612 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
613 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
615 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
629 bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
632 Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
634 : ARM_AM::ia, true, (isDPR ? 2 : 1));
636 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
638 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
642 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
643 .addReg(Base, getKillRegState(BaseKill))
644 .addImm(Offset).addImm(Pred).addReg(PredReg)
645 .addReg(Base, getDefRegState(true)) // WB base register
646 .addReg(MI->getOperand(0).getReg(), RegState::Define);
648 // LDR_PRE, LDR_POST,
649 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
650 .addReg(Base, RegState::Define)
651 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
653 // t2LDR_PRE, t2LDR_POST
654 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
655 .addReg(Base, RegState::Define)
656 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
658 MachineOperand &MO = MI->getOperand(0);
661 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
662 .addImm(Pred).addReg(PredReg)
663 .addReg(Base, getDefRegState(true)) // WB base register
664 .addReg(MO.getReg(), getKillRegState(MO.isKill()));
667 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
668 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
669 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
671 // t2STR_PRE, t2STR_POST
672 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
673 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
674 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
681 /// isMemoryOp - Returns true if instruction is a memory operations (that this
682 /// pass is capable of operating on).
683 static bool isMemoryOp(const MachineInstr *MI) {
684 int Opcode = MI->getOpcode();
689 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
692 return MI->getOperand(1).isReg();
695 return MI->getOperand(1).isReg();
700 return MI->getOperand(1).isReg();
705 /// AdvanceRS - Advance register scavenger to just before the earliest memory
706 /// op that is being merged.
707 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
708 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
709 unsigned Position = MemOps[0].Position;
710 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
711 if (MemOps[i].Position < Position) {
712 Position = MemOps[i].Position;
713 Loc = MemOps[i].MBBI;
717 if (Loc != MBB.begin())
718 RS->forward(prior(Loc));
721 static int getMemoryOpOffset(const MachineInstr *MI) {
722 int Opcode = MI->getOpcode();
723 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
724 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
725 unsigned NumOperands = MI->getDesc().getNumOperands();
726 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
728 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
729 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
730 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
734 ? ARM_AM::getAM2Offset(OffField)
735 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
736 : ARM_AM::getAM5Offset(OffField) * 4);
738 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
741 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
744 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
750 static void InsertLDR_STR(MachineBasicBlock &MBB,
751 MachineBasicBlock::iterator &MBBI,
752 int OffImm, bool isDef,
753 DebugLoc dl, unsigned NewOpc,
754 unsigned Reg, bool RegDeadKill, bool RegUndef,
755 unsigned BaseReg, bool BaseKill, bool BaseUndef,
756 unsigned OffReg, bool OffKill, bool OffUndef,
757 ARMCC::CondCodes Pred, unsigned PredReg,
758 const TargetInstrInfo *TII, bool isT2) {
762 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
764 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
767 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
769 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
770 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
772 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
773 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
775 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
777 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
778 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
780 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
781 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
785 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
786 MachineBasicBlock::iterator &MBBI) {
787 MachineInstr *MI = &*MBBI;
788 unsigned Opcode = MI->getOpcode();
789 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
790 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
791 unsigned EvenReg = MI->getOperand(0).getReg();
792 unsigned OddReg = MI->getOperand(1).getReg();
793 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
794 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
795 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
798 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
799 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
800 bool EvenDeadKill = isLd ?
801 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
802 bool EvenUndef = MI->getOperand(0).isUndef();
803 bool OddDeadKill = isLd ?
804 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
805 bool OddUndef = MI->getOperand(1).isUndef();
806 const MachineOperand &BaseOp = MI->getOperand(2);
807 unsigned BaseReg = BaseOp.getReg();
808 bool BaseKill = BaseOp.isKill();
809 bool BaseUndef = BaseOp.isUndef();
810 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
811 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
812 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
813 int OffImm = getMemoryOpOffset(MI);
814 unsigned PredReg = 0;
815 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
817 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
818 // Ascending register numbers and no offset. It's safe to change it to a
820 unsigned NewOpc = (isLd)
821 ? (isT2 ? ARM::t2LDM : ARM::LDM)
822 : (isT2 ? ARM::t2STM : ARM::STM);
824 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
825 .addReg(BaseReg, getKillRegState(BaseKill))
826 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
827 .addImm(Pred).addReg(PredReg)
829 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
830 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
833 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
834 .addReg(BaseReg, getKillRegState(BaseKill))
835 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
836 .addImm(Pred).addReg(PredReg)
839 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
841 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
845 // Split into two instructions.
846 assert((!isT2 || !OffReg) &&
847 "Thumb2 ldrd / strd does not encode offset register!");
848 unsigned NewOpc = (isLd)
849 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
850 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
851 DebugLoc dl = MBBI->getDebugLoc();
852 // If this is a load and base register is killed, it may have been
853 // re-defed by the load, make sure the first load does not clobber it.
855 (BaseKill || OffKill) &&
856 (TRI->regsOverlap(EvenReg, BaseReg) ||
857 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
858 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
859 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
860 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
861 OddReg, OddDeadKill, false,
862 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
863 Pred, PredReg, TII, isT2);
864 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
865 EvenReg, EvenDeadKill, false,
866 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
867 Pred, PredReg, TII, isT2);
869 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
870 EvenReg, EvenDeadKill, EvenUndef,
871 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
872 Pred, PredReg, TII, isT2);
873 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
874 OddReg, OddDeadKill, OddUndef,
875 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
876 Pred, PredReg, TII, isT2);
890 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
891 /// ops of the same base and incrementing offset into LDM / STM ops.
892 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
893 unsigned NumMerges = 0;
894 unsigned NumMemOps = 0;
896 unsigned CurrBase = 0;
898 unsigned CurrSize = 0;
899 ARMCC::CondCodes CurrPred = ARMCC::AL;
900 unsigned CurrPredReg = 0;
901 unsigned Position = 0;
902 SmallVector<MachineBasicBlock::iterator,4> Merges;
904 RS->enterBasicBlock(&MBB);
905 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
907 if (FixInvalidRegPairOp(MBB, MBBI))
910 bool Advance = false;
911 bool TryMerge = false;
912 bool Clobber = false;
914 bool isMemOp = isMemoryOp(MBBI);
916 int Opcode = MBBI->getOpcode();
917 unsigned Size = getLSMultipleTransferSize(MBBI);
918 unsigned Base = MBBI->getOperand(1).getReg();
919 unsigned PredReg = 0;
920 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
921 int Offset = getMemoryOpOffset(MBBI);
924 // r5 := ldr [r5, #4]
925 // r6 := ldr [r5, #8]
927 // The second ldr has effectively broken the chain even though it
928 // looks like the later ldr(s) use the same base register. Try to
929 // merge the ldr's so far, including this one. But don't try to
930 // combine the following ldr(s).
931 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
932 if (CurrBase == 0 && !Clobber) {
933 // Start of a new chain.
938 CurrPredReg = PredReg;
939 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
948 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
949 // No need to match PredReg.
950 // Continue adding to the queue.
951 if (Offset > MemOps.back().Offset) {
952 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
956 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
958 if (Offset < I->Offset) {
959 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
963 } else if (Offset == I->Offset) {
964 // Collision! This can't be merged!
977 // Reach the end of the block, try merging the memory instructions.
984 // Try to find a free register to use as a new base in case it's needed.
985 // First advance to the instruction just before the start of the chain.
986 AdvanceRS(MBB, MemOps);
987 // Find a scratch register.
988 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
989 // Process the load / store instructions.
990 RS->forward(prior(MBBI));
994 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
995 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
997 // Try folding preceeding/trailing base inc/dec into the generated
999 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1000 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1002 NumMerges += Merges.size();
1004 // Try folding preceeding/trailing base inc/dec into those load/store
1005 // that were not merged to form LDM/STM ops.
1006 for (unsigned i = 0; i != NumMemOps; ++i)
1007 if (!MemOps[i].Merged)
1008 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1011 // RS may be pointing to an instruction that's deleted.
1012 RS->skipTo(prior(MBBI));
1013 } else if (NumMemOps == 1) {
1014 // Try folding preceeding/trailing base inc/dec into the single
1016 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1018 RS->forward(prior(MBBI));
1025 CurrPred = ARMCC::AL;
1032 // If iterator hasn't been advanced and this is not a memory op, skip it.
1033 // It can't start a new chain anyway.
1034 if (!Advance && !isMemOp && MBBI != E) {
1040 return NumMerges > 0;
1044 struct OffsetCompare {
1045 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1046 int LOffset = getMemoryOpOffset(LHS);
1047 int ROffset = getMemoryOpOffset(RHS);
1048 assert(LHS == RHS || LOffset != ROffset);
1049 return LOffset > ROffset;
1054 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
1055 /// (bx lr) into the preceeding stack restore so it directly restore the value
1057 /// ldmfd sp!, {r7, lr}
1060 /// ldmfd sp!, {r7, pc}
1061 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1062 if (MBB.empty()) return false;
1064 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1065 if (MBBI != MBB.begin() &&
1066 (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
1067 MachineInstr *PrevMI = prior(MBBI);
1068 if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) {
1069 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1070 if (MO.getReg() != ARM::LR)
1072 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1073 PrevMI->setDesc(TII->get(NewOpc));
1082 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1083 const TargetMachine &TM = Fn.getTarget();
1084 AFI = Fn.getInfo<ARMFunctionInfo>();
1085 TII = TM.getInstrInfo();
1086 TRI = TM.getRegisterInfo();
1087 RS = new RegScavenger();
1088 isThumb2 = AFI->isThumb2Function();
1090 bool Modified = false;
1091 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1093 MachineBasicBlock &MBB = *MFI;
1094 Modified |= LoadStoreMultipleOpti(MBB);
1095 Modified |= MergeReturnIntoLDM(MBB);
1103 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1104 /// load / stores from consecutive locations close to make it more
1105 /// likely they will be combined later.
1108 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1110 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
1112 const TargetData *TD;
1113 const TargetInstrInfo *TII;
1114 const TargetRegisterInfo *TRI;
1115 const ARMSubtarget *STI;
1116 MachineRegisterInfo *MRI;
1117 MachineFunction *MF;
1119 virtual bool runOnMachineFunction(MachineFunction &Fn);
1121 virtual const char *getPassName() const {
1122 return "ARM pre- register allocation load / store optimization pass";
1126 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1127 unsigned &NewOpc, unsigned &EvenReg,
1128 unsigned &OddReg, unsigned &BaseReg,
1129 unsigned &OffReg, int &Offset,
1130 unsigned &PredReg, ARMCC::CondCodes &Pred,
1132 bool RescheduleOps(MachineBasicBlock *MBB,
1133 SmallVector<MachineInstr*, 4> &Ops,
1134 unsigned Base, bool isLd,
1135 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1136 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1138 char ARMPreAllocLoadStoreOpt::ID = 0;
1141 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1142 TD = Fn.getTarget().getTargetData();
1143 TII = Fn.getTarget().getInstrInfo();
1144 TRI = Fn.getTarget().getRegisterInfo();
1145 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1146 MRI = &Fn.getRegInfo();
1149 bool Modified = false;
1150 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1152 Modified |= RescheduleLoadStoreInstrs(MFI);
1157 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1158 MachineBasicBlock::iterator I,
1159 MachineBasicBlock::iterator E,
1160 SmallPtrSet<MachineInstr*, 4> &MemOps,
1161 SmallSet<unsigned, 4> &MemRegs,
1162 const TargetRegisterInfo *TRI) {
1163 // Are there stores / loads / calls between them?
1164 // FIXME: This is overly conservative. We should make use of alias information
1166 SmallSet<unsigned, 4> AddedRegPressure;
1168 if (MemOps.count(&*I))
1170 const TargetInstrDesc &TID = I->getDesc();
1171 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1173 if (isLd && TID.mayStore())
1178 // It's not safe to move the first 'str' down.
1181 // str r4, [r0, #+4]
1185 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1186 MachineOperand &MO = I->getOperand(j);
1189 unsigned Reg = MO.getReg();
1190 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1192 if (Reg != Base && !MemRegs.count(Reg))
1193 AddedRegPressure.insert(Reg);
1197 // Estimate register pressure increase due to the transformation.
1198 if (MemRegs.size() <= 4)
1199 // Ok if we are moving small number of instructions.
1201 return AddedRegPressure.size() <= MemRegs.size() * 2;
1205 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1207 unsigned &NewOpc, unsigned &EvenReg,
1208 unsigned &OddReg, unsigned &BaseReg,
1209 unsigned &OffReg, int &Offset,
1211 ARMCC::CondCodes &Pred,
1213 // Make sure we're allowed to generate LDRD/STRD.
1214 if (!STI->hasV5TEOps())
1217 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1219 unsigned Opcode = Op0->getOpcode();
1220 if (Opcode == ARM::LDR)
1222 else if (Opcode == ARM::STR)
1224 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1225 NewOpc = ARM::t2LDRDi8;
1228 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1229 NewOpc = ARM::t2STRDi8;
1235 // Make sure the offset registers match.
1237 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1240 // Must sure the base address satisfies i64 ld / st alignment requirement.
1241 if (!Op0->hasOneMemOperand() ||
1242 !(*Op0->memoperands_begin())->getValue() ||
1243 (*Op0->memoperands_begin())->isVolatile())
1246 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1247 Function *Func = MF->getFunction();
1248 unsigned ReqAlign = STI->hasV6Ops()
1249 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1250 : 8; // Pre-v6 need 8-byte align
1251 if (Align < ReqAlign)
1254 // Then make sure the immediate offset fits.
1255 int OffImm = getMemoryOpOffset(Op0);
1259 // Can't fall back to t2LDRi8 / t2STRi8.
1262 int Limit = (1 << 8) * Scale;
1263 if (OffImm >= Limit || (OffImm & (Scale-1)))
1268 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1270 AddSub = ARM_AM::sub;
1273 int Limit = (1 << 8) * Scale;
1274 if (OffImm >= Limit || (OffImm & (Scale-1)))
1276 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1278 EvenReg = Op0->getOperand(0).getReg();
1279 OddReg = Op1->getOperand(0).getReg();
1280 if (EvenReg == OddReg)
1282 BaseReg = Op0->getOperand(1).getReg();
1284 OffReg = Op0->getOperand(2).getReg();
1285 Pred = llvm::getInstrPredicate(Op0, PredReg);
1286 dl = Op0->getDebugLoc();
1290 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1291 SmallVector<MachineInstr*, 4> &Ops,
1292 unsigned Base, bool isLd,
1293 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1294 bool RetVal = false;
1296 // Sort by offset (in reverse order).
1297 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1299 // The loads / stores of the same base are in order. Scan them from first to
1300 // last and check for the followins:
1301 // 1. Any def of base.
1303 while (Ops.size() > 1) {
1304 unsigned FirstLoc = ~0U;
1305 unsigned LastLoc = 0;
1306 MachineInstr *FirstOp = 0;
1307 MachineInstr *LastOp = 0;
1309 unsigned LastOpcode = 0;
1310 unsigned LastBytes = 0;
1311 unsigned NumMove = 0;
1312 for (int i = Ops.size() - 1; i >= 0; --i) {
1313 MachineInstr *Op = Ops[i];
1314 unsigned Loc = MI2LocMap[Op];
1315 if (Loc <= FirstLoc) {
1319 if (Loc >= LastLoc) {
1324 unsigned Opcode = Op->getOpcode();
1325 if (LastOpcode && Opcode != LastOpcode)
1328 int Offset = getMemoryOpOffset(Op);
1329 unsigned Bytes = getLSMultipleTransferSize(Op);
1331 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1334 LastOffset = Offset;
1336 LastOpcode = Opcode;
1337 if (++NumMove == 8) // FIXME: Tune this limit.
1344 SmallPtrSet<MachineInstr*, 4> MemOps;
1345 SmallSet<unsigned, 4> MemRegs;
1346 for (int i = NumMove-1; i >= 0; --i) {
1347 MemOps.insert(Ops[i]);
1348 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1351 // Be conservative, if the instructions are too far apart, don't
1352 // move them. We want to limit the increase of register pressure.
1353 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1355 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1356 MemOps, MemRegs, TRI);
1358 for (unsigned i = 0; i != NumMove; ++i)
1361 // This is the new location for the loads / stores.
1362 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1363 while (InsertPos != MBB->end() && MemOps.count(InsertPos))
1366 // If we are moving a pair of loads / stores, see if it makes sense
1367 // to try to allocate a pair of registers that can form register pairs.
1368 MachineInstr *Op0 = Ops.back();
1369 MachineInstr *Op1 = Ops[Ops.size()-2];
1370 unsigned EvenReg = 0, OddReg = 0;
1371 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1372 ARMCC::CondCodes Pred = ARMCC::AL;
1374 unsigned NewOpc = 0;
1377 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1378 EvenReg, OddReg, BaseReg, OffReg,
1379 Offset, PredReg, Pred, isT2)) {
1383 // Form the pair instruction.
1385 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1386 dl, TII->get(NewOpc))
1387 .addReg(EvenReg, RegState::Define)
1388 .addReg(OddReg, RegState::Define)
1392 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1395 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1396 dl, TII->get(NewOpc))
1402 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1408 // Add register allocation hints to form register pairs.
1409 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1410 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1412 for (unsigned i = 0; i != NumMove; ++i) {
1413 MachineInstr *Op = Ops.back();
1415 MBB->splice(InsertPos, MBB, Op);
1419 NumLdStMoved += NumMove;
1429 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1430 bool RetVal = false;
1432 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1433 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1434 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1435 SmallVector<unsigned, 4> LdBases;
1436 SmallVector<unsigned, 4> StBases;
1439 MachineBasicBlock::iterator MBBI = MBB->begin();
1440 MachineBasicBlock::iterator E = MBB->end();
1442 for (; MBBI != E; ++MBBI) {
1443 MachineInstr *MI = MBBI;
1444 const TargetInstrDesc &TID = MI->getDesc();
1445 if (TID.isCall() || TID.isTerminator()) {
1446 // Stop at barriers.
1451 MI2LocMap[MI] = Loc++;
1452 if (!isMemoryOp(MI))
1454 unsigned PredReg = 0;
1455 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1458 int Opc = MI->getOpcode();
1459 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1460 unsigned Base = MI->getOperand(1).getReg();
1461 int Offset = getMemoryOpOffset(MI);
1463 bool StopHere = false;
1465 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1466 Base2LdsMap.find(Base);
1467 if (BI != Base2LdsMap.end()) {
1468 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1469 if (Offset == getMemoryOpOffset(BI->second[i])) {
1475 BI->second.push_back(MI);
1477 SmallVector<MachineInstr*, 4> MIs;
1479 Base2LdsMap[Base] = MIs;
1480 LdBases.push_back(Base);
1483 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1484 Base2StsMap.find(Base);
1485 if (BI != Base2StsMap.end()) {
1486 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1487 if (Offset == getMemoryOpOffset(BI->second[i])) {
1493 BI->second.push_back(MI);
1495 SmallVector<MachineInstr*, 4> MIs;
1497 Base2StsMap[Base] = MIs;
1498 StBases.push_back(Base);
1503 // Found a duplicate (a base+offset combination that's seen earlier).
1510 // Re-schedule loads.
1511 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1512 unsigned Base = LdBases[i];
1513 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1515 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1518 // Re-schedule stores.
1519 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1520 unsigned Base = StBases[i];
1521 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1523 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1527 Base2LdsMap.clear();
1528 Base2StsMap.clear();
1538 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1539 /// optimization pass.
1540 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1542 return new ARMPreAllocLoadStoreOpt();
1543 return new ARMLoadStoreOpt();