1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
80 MachineBasicBlock::iterator MBBI;
82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
83 MachineBasicBlock::iterator i)
84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
87 typedef MemOpQueue::iterator MemOpQueueIter;
89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
90 int Offset, unsigned Base, bool BaseKill, int Opcode,
91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
93 void MergeOpsUpdate(MachineBasicBlock &MBB,
102 ARMCC::CondCodes Pred,
106 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
108 int Opcode, unsigned Size,
109 ARMCC::CondCodes Pred, unsigned PredReg,
110 unsigned Scratch, MemOpQueue &MemOps,
111 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
115 MachineBasicBlock::iterator &MBBI);
116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
117 MachineBasicBlock::iterator MBBI,
118 const TargetInstrInfo *TII,
120 MachineBasicBlock::iterator &I);
121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator MBBI,
124 MachineBasicBlock::iterator &I);
125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128 char ARMLoadStoreOpt::ID = 0;
131 static int getLoadStoreMultipleOpcode(int Opcode) {
159 default: llvm_unreachable("Unhandled opcode!");
164 static bool isT2i32Load(unsigned Opc) {
165 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
168 static bool isi32Load(unsigned Opc) {
169 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
172 static bool isT2i32Store(unsigned Opc) {
173 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
176 static bool isi32Store(unsigned Opc) {
177 return Opc == ARM::STR || isT2i32Store(Opc);
180 /// MergeOps - Create and insert a LDM or STM with Base as base register and
181 /// registers in Regs as the register operands that would be loaded / stored.
182 /// It returns true if the transformation is done.
184 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
185 MachineBasicBlock::iterator MBBI,
186 int Offset, unsigned Base, bool BaseKill,
187 int Opcode, ARMCC::CondCodes Pred,
188 unsigned PredReg, unsigned Scratch, DebugLoc dl,
189 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
190 // Only a single register to load / store. Don't bother.
191 unsigned NumRegs = Regs.size();
195 ARM_AM::AMSubMode Mode = ARM_AM::ia;
196 // VFP and Thumb2 do not support IB or DA modes.
197 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
198 bool haveIBAndDA = isNotVFP && !isThumb2;
199 if (Offset == 4 && haveIBAndDA)
201 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
203 else if (Offset == -4 * (int)NumRegs && isNotVFP)
204 // VLDM/VSTM do not support DB mode without also updating the base reg.
206 else if (Offset != 0) {
207 // If starting offset isn't zero, insert a MI to materialize a new base.
208 // But only do so if it is cost effective, i.e. merging more than two
214 if (isi32Load(Opcode))
215 // If it is a load, then just use one of the destination register to
216 // use as the new base.
217 NewBase = Regs[NumRegs-1].first;
219 // Use the scratch register to use as a new base.
224 int BaseOpc = !isThumb2
226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
233 int ImmedOffset = isThumb2
234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
235 if (ImmedOffset == -1)
236 // FIXME: Try t2ADDri12 or t2SUBri12?
237 return false; // Probably not worth it then.
239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
241 .addImm(Pred).addReg(PredReg).addReg(0);
243 BaseKill = true; // New base is always killed right its use.
246 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
247 Opcode == ARM::VLDRD);
248 Opcode = getLoadStoreMultipleOpcode(Opcode);
249 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
250 .addReg(Base, getKillRegState(BaseKill))
251 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg);
252 for (unsigned i = 0; i != NumRegs; ++i)
253 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
254 | getKillRegState(Regs[i].second));
259 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
261 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
263 unsigned memOpsBegin, unsigned memOpsEnd,
264 unsigned insertAfter, int Offset,
265 unsigned Base, bool BaseKill,
267 ARMCC::CondCodes Pred, unsigned PredReg,
270 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
271 // First calculate which of the registers should be killed by the merged
273 const unsigned insertPos = memOps[insertAfter].Position;
275 SmallSet<unsigned, 4> UnavailRegs;
276 SmallSet<unsigned, 4> KilledRegs;
277 DenseMap<unsigned, unsigned> Killer;
278 for (unsigned i = 0; i < memOpsBegin; ++i) {
279 if (memOps[i].Position < insertPos && memOps[i].isKill) {
280 unsigned Reg = memOps[i].Reg;
281 if (memOps[i].Merged)
282 UnavailRegs.insert(Reg);
284 KilledRegs.insert(Reg);
289 for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
290 if (memOps[i].Position < insertPos && memOps[i].isKill) {
291 unsigned Reg = memOps[i].Reg;
292 KilledRegs.insert(Reg);
297 SmallVector<std::pair<unsigned, bool>, 8> Regs;
298 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
299 unsigned Reg = memOps[i].Reg;
300 if (UnavailRegs.count(Reg))
301 // Register is killed before and it's not easy / possible to update the
302 // kill marker on already merged instructions. Abort.
305 // If we are inserting the merged operation after an unmerged operation that
306 // uses the same register, make sure to transfer any kill flag.
307 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
308 Regs.push_back(std::make_pair(Reg, isKill));
311 // Try to do the merge.
312 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
314 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
315 Pred, PredReg, Scratch, dl, Regs))
318 // Merge succeeded, update records.
319 Merges.push_back(prior(Loc));
320 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
321 // Remove kill flags from any unmerged memops that come before insertPos.
322 if (Regs[i-memOpsBegin].second) {
323 unsigned Reg = Regs[i-memOpsBegin].first;
324 if (KilledRegs.count(Reg)) {
325 unsigned j = Killer[Reg];
326 memOps[j].MBBI->getOperand(0).setIsKill(false);
327 memOps[j].isKill = false;
330 MBB.erase(memOps[i].MBBI);
331 memOps[i].Merged = true;
335 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
336 /// load / store multiple instructions.
338 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
339 unsigned Base, int Opcode, unsigned Size,
340 ARMCC::CondCodes Pred, unsigned PredReg,
341 unsigned Scratch, MemOpQueue &MemOps,
342 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
343 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
344 int Offset = MemOps[SIndex].Offset;
345 int SOffset = Offset;
346 unsigned insertAfter = SIndex;
347 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
348 DebugLoc dl = Loc->getDebugLoc();
349 const MachineOperand &PMO = Loc->getOperand(0);
350 unsigned PReg = PMO.getReg();
351 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
352 : getARMRegisterNumbering(PReg);
355 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
356 int NewOffset = MemOps[i].Offset;
357 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
358 unsigned Reg = MO.getReg();
359 unsigned RegNum = MO.isUndef() ? UINT_MAX
360 : getARMRegisterNumbering(Reg);
361 // Register numbers must be in ascending order. For VFP, the registers
362 // must also be consecutive and there is a limit of 16 double-word
363 // registers per instruction.
364 if (Reg != ARM::SP &&
365 NewOffset == Offset + (int)Size &&
366 ((isNotVFP && RegNum > PRegNum)
367 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
372 // Can't merge this in. Try merge the earlier ones first.
373 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
374 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
375 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
380 if (MemOps[i].Position > MemOps[insertAfter].Position)
384 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
385 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
386 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
390 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
391 unsigned Bytes, unsigned Limit,
392 ARMCC::CondCodes Pred, unsigned PredReg){
393 unsigned MyPredReg = 0;
396 if (MI->getOpcode() != ARM::t2SUBri &&
397 MI->getOpcode() != ARM::t2SUBrSPi &&
398 MI->getOpcode() != ARM::t2SUBrSPi12 &&
399 MI->getOpcode() != ARM::tSUBspi &&
400 MI->getOpcode() != ARM::SUBri)
403 // Make sure the offset fits in 8 bits.
404 if (Bytes == 0 || (Limit && Bytes >= Limit))
407 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
408 return (MI->getOperand(0).getReg() == Base &&
409 MI->getOperand(1).getReg() == Base &&
410 (MI->getOperand(2).getImm()*Scale) == Bytes &&
411 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
412 MyPredReg == PredReg);
415 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
416 unsigned Bytes, unsigned Limit,
417 ARMCC::CondCodes Pred, unsigned PredReg){
418 unsigned MyPredReg = 0;
421 if (MI->getOpcode() != ARM::t2ADDri &&
422 MI->getOpcode() != ARM::t2ADDrSPi &&
423 MI->getOpcode() != ARM::t2ADDrSPi12 &&
424 MI->getOpcode() != ARM::tADDspi &&
425 MI->getOpcode() != ARM::ADDri)
428 if (Bytes == 0 || (Limit && Bytes >= Limit))
429 // Make sure the offset fits in 8 bits.
432 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
433 return (MI->getOperand(0).getReg() == Base &&
434 MI->getOperand(1).getReg() == Base &&
435 (MI->getOperand(2).getImm()*Scale) == Bytes &&
436 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
437 MyPredReg == PredReg);
440 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
441 switch (MI->getOpcode()) {
461 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
464 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
468 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) {
470 case ARM::LDM: return ARM::LDM_UPD;
471 case ARM::STM: return ARM::STM_UPD;
472 case ARM::t2LDM: return ARM::t2LDM_UPD;
473 case ARM::t2STM: return ARM::t2STM_UPD;
474 case ARM::VLDMS: return ARM::VLDMS_UPD;
475 case ARM::VLDMD: return ARM::VLDMD_UPD;
476 case ARM::VSTMS: return ARM::VSTMS_UPD;
477 case ARM::VSTMD: return ARM::VSTMD_UPD;
478 default: llvm_unreachable("Unhandled opcode!");
483 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
484 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
486 /// stmia rn, <ra, rb, rc>
487 /// rn := rn + 4 * 3;
489 /// stmia rn!, <ra, rb, rc>
491 /// rn := rn - 4 * 3;
492 /// ldmia rn, <ra, rb, rc>
494 /// ldmdb rn!, <ra, rb, rc>
495 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
496 MachineBasicBlock::iterator MBBI,
498 MachineBasicBlock::iterator &I) {
499 MachineInstr *MI = MBBI;
500 unsigned Base = MI->getOperand(0).getReg();
501 bool BaseKill = MI->getOperand(0).isKill();
502 unsigned Bytes = getLSMultipleTransferSize(MI);
503 unsigned PredReg = 0;
504 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
505 int Opcode = MI->getOpcode();
506 DebugLoc dl = MI->getDebugLoc();
508 bool DoMerge = false;
509 ARM_AM::AMSubMode Mode = ARM_AM::ia;
511 // Can't use an updating ld/st if the base register is also a dest
512 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
513 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
514 if (MI->getOperand(i).getReg() == Base)
517 Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
519 // Try merging with the previous instruction.
520 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
521 if (MBBI != BeginMBBI) {
522 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
523 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
525 if (Mode == ARM_AM::ia &&
526 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
529 } else if (Mode == ARM_AM::ib &&
530 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
538 // Try merging with the next instruction.
539 MachineBasicBlock::iterator EndMBBI = MBB.end();
540 if (!DoMerge && MBBI != EndMBBI) {
541 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
542 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
544 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
545 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
547 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
548 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
563 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
564 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
565 .addReg(Base, getDefRegState(true)) // WB base register
566 .addReg(Base, getKillRegState(BaseKill))
567 .addImm(ARM_AM::getAM4ModeImm(Mode))
568 .addImm(Pred).addReg(PredReg);
569 // Transfer the rest of operands.
570 for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
571 MIB.addOperand(MI->getOperand(OpNum));
572 // Transfer memoperands.
573 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
579 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
581 case ARM::LDRi12: return ARM::LDR_PRE;
582 case ARM::STR: return ARM::STR_PRE;
583 case ARM::VLDRS: return ARM::VLDMS_UPD;
584 case ARM::VLDRD: return ARM::VLDMD_UPD;
585 case ARM::VSTRS: return ARM::VSTMS_UPD;
586 case ARM::VSTRD: return ARM::VSTMD_UPD;
589 return ARM::t2LDR_PRE;
592 return ARM::t2STR_PRE;
593 default: llvm_unreachable("Unhandled opcode!");
598 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
600 case ARM::LDRi12: return ARM::LDR_POST;
601 case ARM::STR: return ARM::STR_POST;
602 case ARM::VLDRS: return ARM::VLDMS_UPD;
603 case ARM::VLDRD: return ARM::VLDMD_UPD;
604 case ARM::VSTRS: return ARM::VSTMS_UPD;
605 case ARM::VSTRD: return ARM::VSTMD_UPD;
608 return ARM::t2LDR_POST;
611 return ARM::t2STR_POST;
612 default: llvm_unreachable("Unhandled opcode!");
617 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
618 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
619 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
620 MachineBasicBlock::iterator MBBI,
621 const TargetInstrInfo *TII,
623 MachineBasicBlock::iterator &I) {
624 MachineInstr *MI = MBBI;
625 unsigned Base = MI->getOperand(1).getReg();
626 bool BaseKill = MI->getOperand(1).isKill();
627 unsigned Bytes = getLSMultipleTransferSize(MI);
628 int Opcode = MI->getOpcode();
629 DebugLoc dl = MI->getDebugLoc();
630 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
631 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
632 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STR);
633 // FIXME: This special handling of LDRi12 is hackery until all of the ARM
634 // LDR/STR insns are moved away from the addrmode2 mega-instruction to
635 // the split (LDRi12/LDRrs) style instructions.
636 if (Opcode == ARM::LDRi12 || isT2i32Load(Opcode) || isT2i32Store(Opcode))
637 if (MI->getOperand(2).getImm() != 0)
639 if (isAM2 && Opcode != ARM::LDRi12
640 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
642 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
645 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
646 // Can't do the merge if the destination register is the same as the would-be
647 // writeback register.
648 if (isLd && MI->getOperand(0).getReg() == Base)
651 unsigned PredReg = 0;
652 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
653 bool DoMerge = false;
654 ARM_AM::AddrOpc AddSub = ARM_AM::add;
656 // AM2 - 12 bits, thumb2 - 8 bits.
657 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
659 // Try merging with the previous instruction.
660 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
661 if (MBBI != BeginMBBI) {
662 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
663 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
665 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
667 AddSub = ARM_AM::sub;
669 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
673 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
678 // Try merging with the next instruction.
679 MachineBasicBlock::iterator EndMBBI = MBB.end();
680 if (!DoMerge && MBBI != EndMBBI) {
681 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
682 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
685 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
687 AddSub = ARM_AM::sub;
688 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
692 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
706 Offset = ARM_AM::getAM4ModeImm(AddSub == ARM_AM::sub ?
707 ARM_AM::db : ARM_AM::ia);
709 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
711 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
714 // VLDM[SD}_UPD, VSTM[SD]_UPD
715 // (There are no base-updating versions of VLDR/VSTR instructions, but the
716 // updating load/store-multiple instructions can be used with only one
718 MachineOperand &MO = MI->getOperand(0);
719 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
720 .addReg(Base, getDefRegState(true)) // WB base register
721 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
723 .addImm(Pred).addReg(PredReg)
724 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
725 getKillRegState(MO.isKill())));
728 // LDR_PRE, LDR_POST,
729 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
730 .addReg(Base, RegState::Define)
731 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
733 // t2LDR_PRE, t2LDR_POST
734 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
735 .addReg(Base, RegState::Define)
736 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
738 MachineOperand &MO = MI->getOperand(0);
741 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
742 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
743 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
745 // t2STR_PRE, t2STR_POST
746 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
747 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
748 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
755 /// isMemoryOp - Returns true if instruction is a memory operations (that this
756 /// pass is capable of operating on).
757 static bool isMemoryOp(const MachineInstr *MI) {
758 // When no memory operands are present, conservatively assume unaligned,
759 // volatile, unfoldable.
760 if (!MI->hasOneMemOperand())
763 const MachineMemOperand *MMO = *MI->memoperands_begin();
765 // Don't touch volatile memory accesses - we may be changing their order.
766 if (MMO->isVolatile())
769 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
771 if (MMO->getAlignment() < 4)
774 // str <undef> could probably be eliminated entirely, but for now we just want
775 // to avoid making a mess of it.
776 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
777 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
778 MI->getOperand(0).isUndef())
781 // Likewise don't mess with references to undefined addresses.
782 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
783 MI->getOperand(1).isUndef())
786 int Opcode = MI->getOpcode();
790 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
793 return MI->getOperand(1).isReg();
796 return MI->getOperand(1).isReg();
802 return MI->getOperand(1).isReg();
807 /// AdvanceRS - Advance register scavenger to just before the earliest memory
808 /// op that is being merged.
809 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
810 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
811 unsigned Position = MemOps[0].Position;
812 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
813 if (MemOps[i].Position < Position) {
814 Position = MemOps[i].Position;
815 Loc = MemOps[i].MBBI;
819 if (Loc != MBB.begin())
820 RS->forward(prior(Loc));
823 static int getMemoryOpOffset(const MachineInstr *MI) {
824 int Opcode = MI->getOpcode();
825 bool isAM2 = Opcode == ARM::STR;
826 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
827 unsigned NumOperands = MI->getDesc().getNumOperands();
828 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
830 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
831 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
832 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
833 Opcode == ARM::LDRi12)
837 ? ARM_AM::getAM2Offset(OffField)
838 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
839 : ARM_AM::getAM5Offset(OffField) * 4);
841 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
844 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
847 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
853 static void InsertLDR_STR(MachineBasicBlock &MBB,
854 MachineBasicBlock::iterator &MBBI,
855 int OffImm, bool isDef,
856 DebugLoc dl, unsigned NewOpc,
857 unsigned Reg, bool RegDeadKill, bool RegUndef,
858 unsigned BaseReg, bool BaseKill, bool BaseUndef,
859 unsigned OffReg, bool OffKill, bool OffUndef,
860 ARMCC::CondCodes Pred, unsigned PredReg,
861 const TargetInstrInfo *TII, bool isT2) {
865 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
867 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
870 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
872 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
873 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
875 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
876 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
878 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
880 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
881 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
883 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
884 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
888 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
889 MachineBasicBlock::iterator &MBBI) {
890 MachineInstr *MI = &*MBBI;
891 unsigned Opcode = MI->getOpcode();
892 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
893 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
894 unsigned EvenReg = MI->getOperand(0).getReg();
895 unsigned OddReg = MI->getOperand(1).getReg();
896 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
897 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
898 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
901 MachineBasicBlock::iterator NewBBI = MBBI;
902 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
903 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
904 bool EvenDeadKill = isLd ?
905 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
906 bool EvenUndef = MI->getOperand(0).isUndef();
907 bool OddDeadKill = isLd ?
908 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
909 bool OddUndef = MI->getOperand(1).isUndef();
910 const MachineOperand &BaseOp = MI->getOperand(2);
911 unsigned BaseReg = BaseOp.getReg();
912 bool BaseKill = BaseOp.isKill();
913 bool BaseUndef = BaseOp.isUndef();
914 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
915 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
916 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
917 int OffImm = getMemoryOpOffset(MI);
918 unsigned PredReg = 0;
919 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
921 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
922 // Ascending register numbers and no offset. It's safe to change it to a
924 unsigned NewOpc = (isLd)
925 ? (isT2 ? ARM::t2LDM : ARM::LDM)
926 : (isT2 ? ARM::t2STM : ARM::STM);
928 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
929 .addReg(BaseReg, getKillRegState(BaseKill))
930 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
931 .addImm(Pred).addReg(PredReg)
932 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
933 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
936 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
937 .addReg(BaseReg, getKillRegState(BaseKill))
938 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
939 .addImm(Pred).addReg(PredReg)
941 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
943 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
946 NewBBI = llvm::prior(MBBI);
948 // Split into two instructions.
949 assert((!isT2 || !OffReg) &&
950 "Thumb2 ldrd / strd does not encode offset register!");
951 unsigned NewOpc = (isLd)
952 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
953 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
954 DebugLoc dl = MBBI->getDebugLoc();
955 // If this is a load and base register is killed, it may have been
956 // re-defed by the load, make sure the first load does not clobber it.
958 (BaseKill || OffKill) &&
959 (TRI->regsOverlap(EvenReg, BaseReg) ||
960 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
961 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
962 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
963 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
964 OddReg, OddDeadKill, false,
965 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
966 Pred, PredReg, TII, isT2);
967 NewBBI = llvm::prior(MBBI);
968 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
969 EvenReg, EvenDeadKill, false,
970 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
971 Pred, PredReg, TII, isT2);
973 if (OddReg == EvenReg && EvenDeadKill) {
974 // If the two source operands are the same, the kill marker is
975 // probably on the first one. e.g.
976 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
977 EvenDeadKill = false;
980 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
981 EvenReg, EvenDeadKill, EvenUndef,
982 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
983 Pred, PredReg, TII, isT2);
984 NewBBI = llvm::prior(MBBI);
985 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
986 OddReg, OddDeadKill, OddUndef,
987 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
988 Pred, PredReg, TII, isT2);
1003 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1004 /// ops of the same base and incrementing offset into LDM / STM ops.
1005 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1006 unsigned NumMerges = 0;
1007 unsigned NumMemOps = 0;
1009 unsigned CurrBase = 0;
1011 unsigned CurrSize = 0;
1012 ARMCC::CondCodes CurrPred = ARMCC::AL;
1013 unsigned CurrPredReg = 0;
1014 unsigned Position = 0;
1015 SmallVector<MachineBasicBlock::iterator,4> Merges;
1017 RS->enterBasicBlock(&MBB);
1018 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1020 if (FixInvalidRegPairOp(MBB, MBBI))
1023 bool Advance = false;
1024 bool TryMerge = false;
1025 bool Clobber = false;
1027 bool isMemOp = isMemoryOp(MBBI);
1029 int Opcode = MBBI->getOpcode();
1030 unsigned Size = getLSMultipleTransferSize(MBBI);
1031 const MachineOperand &MO = MBBI->getOperand(0);
1032 unsigned Reg = MO.getReg();
1033 bool isKill = MO.isDef() ? false : MO.isKill();
1034 unsigned Base = MBBI->getOperand(1).getReg();
1035 unsigned PredReg = 0;
1036 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1037 int Offset = getMemoryOpOffset(MBBI);
1040 // r5 := ldr [r5, #4]
1041 // r6 := ldr [r5, #8]
1043 // The second ldr has effectively broken the chain even though it
1044 // looks like the later ldr(s) use the same base register. Try to
1045 // merge the ldr's so far, including this one. But don't try to
1046 // combine the following ldr(s).
1047 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1048 if (CurrBase == 0 && !Clobber) {
1049 // Start of a new chain.
1054 CurrPredReg = PredReg;
1055 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1064 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1065 // No need to match PredReg.
1066 // Continue adding to the queue.
1067 if (Offset > MemOps.back().Offset) {
1068 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1073 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1075 if (Offset < I->Offset) {
1076 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1081 } else if (Offset == I->Offset) {
1082 // Collision! This can't be merged!
1091 if (MBBI->isDebugValue()) {
1094 // Reach the end of the block, try merging the memory instructions.
1096 } else if (Advance) {
1100 // Reach the end of the block, try merging the memory instructions.
1106 if (NumMemOps > 1) {
1107 // Try to find a free register to use as a new base in case it's needed.
1108 // First advance to the instruction just before the start of the chain.
1109 AdvanceRS(MBB, MemOps);
1110 // Find a scratch register.
1111 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1112 // Process the load / store instructions.
1113 RS->forward(prior(MBBI));
1117 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1118 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1120 // Try folding preceeding/trailing base inc/dec into the generated
1122 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1123 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1125 NumMerges += Merges.size();
1127 // Try folding preceeding/trailing base inc/dec into those load/store
1128 // that were not merged to form LDM/STM ops.
1129 for (unsigned i = 0; i != NumMemOps; ++i)
1130 if (!MemOps[i].Merged)
1131 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1134 // RS may be pointing to an instruction that's deleted.
1135 RS->skipTo(prior(MBBI));
1136 } else if (NumMemOps == 1) {
1137 // Try folding preceeding/trailing base inc/dec into the single
1139 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1141 RS->forward(prior(MBBI));
1148 CurrPred = ARMCC::AL;
1155 // If iterator hasn't been advanced and this is not a memory op, skip it.
1156 // It can't start a new chain anyway.
1157 if (!Advance && !isMemOp && MBBI != E) {
1163 return NumMerges > 0;
1167 struct OffsetCompare {
1168 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1169 int LOffset = getMemoryOpOffset(LHS);
1170 int ROffset = getMemoryOpOffset(RHS);
1171 assert(LHS == RHS || LOffset != ROffset);
1172 return LOffset > ROffset;
1177 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1178 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1179 /// directly restore the value of LR into pc.
1180 /// ldmfd sp!, {..., lr}
1183 /// ldmfd sp!, {..., lr}
1186 /// ldmfd sp!, {..., pc}
1187 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1188 if (MBB.empty()) return false;
1190 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1191 if (MBBI != MBB.begin() &&
1192 (MBBI->getOpcode() == ARM::BX_RET ||
1193 MBBI->getOpcode() == ARM::tBX_RET ||
1194 MBBI->getOpcode() == ARM::MOVPCLR)) {
1195 MachineInstr *PrevMI = prior(MBBI);
1196 if (PrevMI->getOpcode() == ARM::LDM_UPD ||
1197 PrevMI->getOpcode() == ARM::t2LDM_UPD) {
1198 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1199 if (MO.getReg() != ARM::LR)
1201 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1202 PrevMI->setDesc(TII->get(NewOpc));
1204 PrevMI->copyImplicitOps(&*MBBI);
1212 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1213 const TargetMachine &TM = Fn.getTarget();
1214 AFI = Fn.getInfo<ARMFunctionInfo>();
1215 TII = TM.getInstrInfo();
1216 TRI = TM.getRegisterInfo();
1217 RS = new RegScavenger();
1218 isThumb2 = AFI->isThumb2Function();
1220 bool Modified = false;
1221 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1223 MachineBasicBlock &MBB = *MFI;
1224 Modified |= LoadStoreMultipleOpti(MBB);
1225 Modified |= MergeReturnIntoLDM(MBB);
1233 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1234 /// load / stores from consecutive locations close to make it more
1235 /// likely they will be combined later.
1238 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1240 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1242 const TargetData *TD;
1243 const TargetInstrInfo *TII;
1244 const TargetRegisterInfo *TRI;
1245 const ARMSubtarget *STI;
1246 MachineRegisterInfo *MRI;
1247 MachineFunction *MF;
1249 virtual bool runOnMachineFunction(MachineFunction &Fn);
1251 virtual const char *getPassName() const {
1252 return "ARM pre- register allocation load / store optimization pass";
1256 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1257 unsigned &NewOpc, unsigned &EvenReg,
1258 unsigned &OddReg, unsigned &BaseReg,
1259 unsigned &OffReg, int &Offset,
1260 unsigned &PredReg, ARMCC::CondCodes &Pred,
1262 bool RescheduleOps(MachineBasicBlock *MBB,
1263 SmallVector<MachineInstr*, 4> &Ops,
1264 unsigned Base, bool isLd,
1265 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1266 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1268 char ARMPreAllocLoadStoreOpt::ID = 0;
1271 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1272 TD = Fn.getTarget().getTargetData();
1273 TII = Fn.getTarget().getInstrInfo();
1274 TRI = Fn.getTarget().getRegisterInfo();
1275 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1276 MRI = &Fn.getRegInfo();
1279 bool Modified = false;
1280 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1282 Modified |= RescheduleLoadStoreInstrs(MFI);
1287 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1288 MachineBasicBlock::iterator I,
1289 MachineBasicBlock::iterator E,
1290 SmallPtrSet<MachineInstr*, 4> &MemOps,
1291 SmallSet<unsigned, 4> &MemRegs,
1292 const TargetRegisterInfo *TRI) {
1293 // Are there stores / loads / calls between them?
1294 // FIXME: This is overly conservative. We should make use of alias information
1296 SmallSet<unsigned, 4> AddedRegPressure;
1298 if (I->isDebugValue() || MemOps.count(&*I))
1300 const TargetInstrDesc &TID = I->getDesc();
1301 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1303 if (isLd && TID.mayStore())
1308 // It's not safe to move the first 'str' down.
1311 // str r4, [r0, #+4]
1315 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1316 MachineOperand &MO = I->getOperand(j);
1319 unsigned Reg = MO.getReg();
1320 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1322 if (Reg != Base && !MemRegs.count(Reg))
1323 AddedRegPressure.insert(Reg);
1327 // Estimate register pressure increase due to the transformation.
1328 if (MemRegs.size() <= 4)
1329 // Ok if we are moving small number of instructions.
1331 return AddedRegPressure.size() <= MemRegs.size() * 2;
1335 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1337 unsigned &NewOpc, unsigned &EvenReg,
1338 unsigned &OddReg, unsigned &BaseReg,
1339 unsigned &OffReg, int &Offset,
1341 ARMCC::CondCodes &Pred,
1343 // Make sure we're allowed to generate LDRD/STRD.
1344 if (!STI->hasV5TEOps())
1347 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1349 unsigned Opcode = Op0->getOpcode();
1350 if (Opcode == ARM::LDRi12)
1352 else if (Opcode == ARM::STR)
1354 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1355 NewOpc = ARM::t2LDRDi8;
1358 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1359 NewOpc = ARM::t2STRDi8;
1365 // Make sure the offset registers match.
1366 if (!isT2 && Opcode != ARM::LDRi12 &&
1367 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1370 // Make sure the base address satisfies i64 ld / st alignment requirement.
1371 if (!Op0->hasOneMemOperand() ||
1372 !(*Op0->memoperands_begin())->getValue() ||
1373 (*Op0->memoperands_begin())->isVolatile())
1376 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1377 const Function *Func = MF->getFunction();
1378 unsigned ReqAlign = STI->hasV6Ops()
1379 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1380 : 8; // Pre-v6 need 8-byte align
1381 if (Align < ReqAlign)
1384 // Then make sure the immediate offset fits.
1385 int OffImm = getMemoryOpOffset(Op0);
1389 // Can't fall back to t2LDRi8 / t2STRi8.
1392 int Limit = (1 << 8) * Scale;
1393 if (OffImm >= Limit || (OffImm & (Scale-1)))
1398 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1400 AddSub = ARM_AM::sub;
1403 int Limit = (1 << 8) * Scale;
1404 if (OffImm >= Limit || (OffImm & (Scale-1)))
1406 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1408 EvenReg = Op0->getOperand(0).getReg();
1409 OddReg = Op1->getOperand(0).getReg();
1410 if (EvenReg == OddReg)
1412 BaseReg = Op0->getOperand(1).getReg();
1413 if (!isT2 && Opcode != ARM::LDRi12)
1414 OffReg = Op0->getOperand(2).getReg();
1415 Pred = llvm::getInstrPredicate(Op0, PredReg);
1416 dl = Op0->getDebugLoc();
1420 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1421 SmallVector<MachineInstr*, 4> &Ops,
1422 unsigned Base, bool isLd,
1423 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1424 bool RetVal = false;
1426 // Sort by offset (in reverse order).
1427 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1429 // The loads / stores of the same base are in order. Scan them from first to
1430 // last and check for the following:
1431 // 1. Any def of base.
1433 while (Ops.size() > 1) {
1434 unsigned FirstLoc = ~0U;
1435 unsigned LastLoc = 0;
1436 MachineInstr *FirstOp = 0;
1437 MachineInstr *LastOp = 0;
1439 unsigned LastOpcode = 0;
1440 unsigned LastBytes = 0;
1441 unsigned NumMove = 0;
1442 for (int i = Ops.size() - 1; i >= 0; --i) {
1443 MachineInstr *Op = Ops[i];
1444 unsigned Loc = MI2LocMap[Op];
1445 if (Loc <= FirstLoc) {
1449 if (Loc >= LastLoc) {
1454 unsigned Opcode = Op->getOpcode();
1455 if (LastOpcode && Opcode != LastOpcode)
1458 int Offset = getMemoryOpOffset(Op);
1459 unsigned Bytes = getLSMultipleTransferSize(Op);
1461 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1464 LastOffset = Offset;
1466 LastOpcode = Opcode;
1467 if (++NumMove == 8) // FIXME: Tune this limit.
1474 SmallPtrSet<MachineInstr*, 4> MemOps;
1475 SmallSet<unsigned, 4> MemRegs;
1476 for (int i = NumMove-1; i >= 0; --i) {
1477 MemOps.insert(Ops[i]);
1478 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1481 // Be conservative, if the instructions are too far apart, don't
1482 // move them. We want to limit the increase of register pressure.
1483 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1485 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1486 MemOps, MemRegs, TRI);
1488 for (unsigned i = 0; i != NumMove; ++i)
1491 // This is the new location for the loads / stores.
1492 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1493 while (InsertPos != MBB->end()
1494 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1497 // If we are moving a pair of loads / stores, see if it makes sense
1498 // to try to allocate a pair of registers that can form register pairs.
1499 MachineInstr *Op0 = Ops.back();
1500 MachineInstr *Op1 = Ops[Ops.size()-2];
1501 unsigned EvenReg = 0, OddReg = 0;
1502 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1503 ARMCC::CondCodes Pred = ARMCC::AL;
1505 unsigned NewOpc = 0;
1508 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1509 EvenReg, OddReg, BaseReg, OffReg,
1510 Offset, PredReg, Pred, isT2)) {
1514 // Form the pair instruction.
1516 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1517 dl, TII->get(NewOpc))
1518 .addReg(EvenReg, RegState::Define)
1519 .addReg(OddReg, RegState::Define)
1521 // For now, we're converting from LDRi12 to an insn that still
1522 // uses addrmode2, so we need an explicit offset reg. It should
1523 // always by reg0 since we're transforming LDRi12s. The old
1524 // was just being paranoid in allowing for anything else.
1527 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1530 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1531 dl, TII->get(NewOpc))
1537 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1543 // Add register allocation hints to form register pairs.
1544 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1545 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1547 for (unsigned i = 0; i != NumMove; ++i) {
1548 MachineInstr *Op = Ops.back();
1550 MBB->splice(InsertPos, MBB, Op);
1554 NumLdStMoved += NumMove;
1564 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1565 bool RetVal = false;
1567 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1568 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1569 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1570 SmallVector<unsigned, 4> LdBases;
1571 SmallVector<unsigned, 4> StBases;
1574 MachineBasicBlock::iterator MBBI = MBB->begin();
1575 MachineBasicBlock::iterator E = MBB->end();
1577 for (; MBBI != E; ++MBBI) {
1578 MachineInstr *MI = MBBI;
1579 const TargetInstrDesc &TID = MI->getDesc();
1580 if (TID.isCall() || TID.isTerminator()) {
1581 // Stop at barriers.
1586 if (!MI->isDebugValue())
1587 MI2LocMap[MI] = ++Loc;
1589 if (!isMemoryOp(MI))
1591 unsigned PredReg = 0;
1592 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1595 int Opc = MI->getOpcode();
1596 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1597 unsigned Base = MI->getOperand(1).getReg();
1598 int Offset = getMemoryOpOffset(MI);
1600 bool StopHere = false;
1602 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1603 Base2LdsMap.find(Base);
1604 if (BI != Base2LdsMap.end()) {
1605 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1606 if (Offset == getMemoryOpOffset(BI->second[i])) {
1612 BI->second.push_back(MI);
1614 SmallVector<MachineInstr*, 4> MIs;
1616 Base2LdsMap[Base] = MIs;
1617 LdBases.push_back(Base);
1620 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1621 Base2StsMap.find(Base);
1622 if (BI != Base2StsMap.end()) {
1623 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1624 if (Offset == getMemoryOpOffset(BI->second[i])) {
1630 BI->second.push_back(MI);
1632 SmallVector<MachineInstr*, 4> MIs;
1634 Base2StsMap[Base] = MIs;
1635 StBases.push_back(Base);
1640 // Found a duplicate (a base+offset combination that's seen earlier).
1647 // Re-schedule loads.
1648 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1649 unsigned Base = LdBases[i];
1650 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1652 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1655 // Re-schedule stores.
1656 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1657 unsigned Base = StBases[i];
1658 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1660 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1664 Base2LdsMap.clear();
1665 Base2StsMap.clear();
1675 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1676 /// optimization pass.
1677 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1679 return new ARMPreAllocLoadStoreOpt();
1680 return new ARMLoadStoreOpt();