1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
80 MachineBasicBlock::iterator MBBI;
82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
83 MachineBasicBlock::iterator i)
84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
87 typedef MemOpQueue::iterator MemOpQueueIter;
89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
90 int Offset, unsigned Base, bool BaseKill, int Opcode,
91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
93 void MergeOpsUpdate(MachineBasicBlock &MBB,
102 ARMCC::CondCodes Pred,
106 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
108 int Opcode, unsigned Size,
109 ARMCC::CondCodes Pred, unsigned PredReg,
110 unsigned Scratch, MemOpQueue &MemOps,
111 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
115 MachineBasicBlock::iterator &MBBI);
116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
117 MachineBasicBlock::iterator MBBI,
118 const TargetInstrInfo *TII,
120 MachineBasicBlock::iterator &I);
121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator MBBI,
124 MachineBasicBlock::iterator &I);
125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128 char ARMLoadStoreOpt::ID = 0;
131 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
133 default: llvm_unreachable("Unhandled opcode!");
137 default: llvm_unreachable("Unhandled submode!");
138 case ARM_AM::ia: return ARM::LDMIA;
139 case ARM_AM::da: return ARM::LDMDA;
140 case ARM_AM::db: return ARM::LDMDB;
141 case ARM_AM::ib: return ARM::LDMIB;
147 default: llvm_unreachable("Unhandled submode!");
148 case ARM_AM::ia: return ARM::STMIA;
149 case ARM_AM::da: return ARM::STMDA;
150 case ARM_AM::db: return ARM::STMDB;
151 case ARM_AM::ib: return ARM::STMIB;
158 default: llvm_unreachable("Unhandled submode!");
159 case ARM_AM::ia: return ARM::t2LDMIA;
160 case ARM_AM::db: return ARM::t2LDMDB;
167 default: llvm_unreachable("Unhandled submode!");
168 case ARM_AM::ia: return ARM::t2STMIA;
169 case ARM_AM::db: return ARM::t2STMDB;
175 default: llvm_unreachable("Unhandled submode!");
176 case ARM_AM::ia: return ARM::VLDMSIA;
177 case ARM_AM::db: return ARM::VLDMSDB;
183 default: llvm_unreachable("Unhandled submode!");
184 case ARM_AM::ia: return ARM::VSTMSIA;
185 case ARM_AM::db: return ARM::VSTMSDB;
191 default: llvm_unreachable("Unhandled submode!");
192 case ARM_AM::ia: return ARM::VLDMDIA;
193 case ARM_AM::db: return ARM::VLDMDDB;
199 default: llvm_unreachable("Unhandled submode!");
200 case ARM_AM::ia: return ARM::VSTMDIA;
201 case ARM_AM::db: return ARM::VSTMDDB;
212 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
214 default: llvm_unreachable("Unhandled opcode!");
220 case ARM::t2LDMIA_RET:
222 case ARM::t2LDMIA_UPD:
224 case ARM::t2STMIA_UPD:
226 case ARM::VLDMSIA_UPD:
228 case ARM::VSTMSIA_UPD:
230 case ARM::VLDMDIA_UPD:
232 case ARM::VSTMDIA_UPD:
246 case ARM::t2LDMDB_UPD:
248 case ARM::t2STMDB_UPD:
250 case ARM::VLDMSDB_UPD:
252 case ARM::VSTMSDB_UPD:
254 case ARM::VLDMDDB_UPD:
256 case ARM::VSTMDDB_UPD:
266 return ARM_AM::bad_am_submode;
269 } // end namespace ARM_AM
270 } // end namespace llvm
272 static bool isT2i32Load(unsigned Opc) {
273 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
276 static bool isi32Load(unsigned Opc) {
277 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
280 static bool isT2i32Store(unsigned Opc) {
281 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
284 static bool isi32Store(unsigned Opc) {
285 return Opc == ARM::STRi12 || isT2i32Store(Opc);
288 /// MergeOps - Create and insert a LDM or STM with Base as base register and
289 /// registers in Regs as the register operands that would be loaded / stored.
290 /// It returns true if the transformation is done.
292 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
293 MachineBasicBlock::iterator MBBI,
294 int Offset, unsigned Base, bool BaseKill,
295 int Opcode, ARMCC::CondCodes Pred,
296 unsigned PredReg, unsigned Scratch, DebugLoc dl,
297 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
298 // Only a single register to load / store. Don't bother.
299 unsigned NumRegs = Regs.size();
303 ARM_AM::AMSubMode Mode = ARM_AM::ia;
304 // VFP and Thumb2 do not support IB or DA modes.
305 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
306 bool haveIBAndDA = isNotVFP && !isThumb2;
307 if (Offset == 4 && haveIBAndDA)
309 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
311 else if (Offset == -4 * (int)NumRegs && isNotVFP)
312 // VLDM/VSTM do not support DB mode without also updating the base reg.
314 else if (Offset != 0) {
315 // If starting offset isn't zero, insert a MI to materialize a new base.
316 // But only do so if it is cost effective, i.e. merging more than two
322 if (isi32Load(Opcode))
323 // If it is a load, then just use one of the destination register to
324 // use as the new base.
325 NewBase = Regs[NumRegs-1].first;
327 // Use the scratch register to use as a new base.
332 int BaseOpc = !isThumb2
334 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
338 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
341 int ImmedOffset = isThumb2
342 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
343 if (ImmedOffset == -1)
344 // FIXME: Try t2ADDri12 or t2SUBri12?
345 return false; // Probably not worth it then.
347 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
348 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
349 .addImm(Pred).addReg(PredReg).addReg(0);
351 BaseKill = true; // New base is always killed right its use.
354 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
355 Opcode == ARM::VLDRD);
356 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
357 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
358 .addReg(Base, getKillRegState(BaseKill))
359 .addImm(Pred).addReg(PredReg);
360 for (unsigned i = 0; i != NumRegs; ++i)
361 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
362 | getKillRegState(Regs[i].second));
367 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
369 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
371 unsigned memOpsBegin, unsigned memOpsEnd,
372 unsigned insertAfter, int Offset,
373 unsigned Base, bool BaseKill,
375 ARMCC::CondCodes Pred, unsigned PredReg,
378 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
379 // First calculate which of the registers should be killed by the merged
381 const unsigned insertPos = memOps[insertAfter].Position;
383 SmallSet<unsigned, 4> UnavailRegs;
384 SmallSet<unsigned, 4> KilledRegs;
385 DenseMap<unsigned, unsigned> Killer;
386 for (unsigned i = 0; i < memOpsBegin; ++i) {
387 if (memOps[i].Position < insertPos && memOps[i].isKill) {
388 unsigned Reg = memOps[i].Reg;
389 if (memOps[i].Merged)
390 UnavailRegs.insert(Reg);
392 KilledRegs.insert(Reg);
397 for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
398 if (memOps[i].Position < insertPos && memOps[i].isKill) {
399 unsigned Reg = memOps[i].Reg;
400 KilledRegs.insert(Reg);
405 SmallVector<std::pair<unsigned, bool>, 8> Regs;
406 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
407 unsigned Reg = memOps[i].Reg;
408 if (UnavailRegs.count(Reg))
409 // Register is killed before and it's not easy / possible to update the
410 // kill marker on already merged instructions. Abort.
413 // If we are inserting the merged operation after an unmerged operation that
414 // uses the same register, make sure to transfer any kill flag.
415 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
416 Regs.push_back(std::make_pair(Reg, isKill));
419 // Try to do the merge.
420 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
422 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
423 Pred, PredReg, Scratch, dl, Regs))
426 // Merge succeeded, update records.
427 Merges.push_back(prior(Loc));
428 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
429 // Remove kill flags from any unmerged memops that come before insertPos.
430 if (Regs[i-memOpsBegin].second) {
431 unsigned Reg = Regs[i-memOpsBegin].first;
432 if (KilledRegs.count(Reg)) {
433 unsigned j = Killer[Reg];
434 memOps[j].MBBI->getOperand(0).setIsKill(false);
435 memOps[j].isKill = false;
438 MBB.erase(memOps[i].MBBI);
439 memOps[i].Merged = true;
443 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
444 /// load / store multiple instructions.
446 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
447 unsigned Base, int Opcode, unsigned Size,
448 ARMCC::CondCodes Pred, unsigned PredReg,
449 unsigned Scratch, MemOpQueue &MemOps,
450 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
451 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
452 int Offset = MemOps[SIndex].Offset;
453 int SOffset = Offset;
454 unsigned insertAfter = SIndex;
455 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
456 DebugLoc dl = Loc->getDebugLoc();
457 const MachineOperand &PMO = Loc->getOperand(0);
458 unsigned PReg = PMO.getReg();
459 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
460 : getARMRegisterNumbering(PReg);
463 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
464 int NewOffset = MemOps[i].Offset;
465 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
466 unsigned Reg = MO.getReg();
467 unsigned RegNum = MO.isUndef() ? UINT_MAX
468 : getARMRegisterNumbering(Reg);
469 // Register numbers must be in ascending order. For VFP, the registers
470 // must also be consecutive and there is a limit of 16 double-word
471 // registers per instruction.
472 if (Reg != ARM::SP &&
473 NewOffset == Offset + (int)Size &&
474 ((isNotVFP && RegNum > PRegNum)
475 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
480 // Can't merge this in. Try merge the earlier ones first.
481 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
482 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
483 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
488 if (MemOps[i].Position > MemOps[insertAfter].Position)
492 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
493 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
494 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
498 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
499 unsigned Bytes, unsigned Limit,
500 ARMCC::CondCodes Pred, unsigned PredReg){
501 unsigned MyPredReg = 0;
504 if (MI->getOpcode() != ARM::t2SUBri &&
505 MI->getOpcode() != ARM::t2SUBrSPi &&
506 MI->getOpcode() != ARM::t2SUBrSPi12 &&
507 MI->getOpcode() != ARM::tSUBspi &&
508 MI->getOpcode() != ARM::SUBri)
511 // Make sure the offset fits in 8 bits.
512 if (Bytes == 0 || (Limit && Bytes >= Limit))
515 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
516 return (MI->getOperand(0).getReg() == Base &&
517 MI->getOperand(1).getReg() == Base &&
518 (MI->getOperand(2).getImm()*Scale) == Bytes &&
519 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
520 MyPredReg == PredReg);
523 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
524 unsigned Bytes, unsigned Limit,
525 ARMCC::CondCodes Pred, unsigned PredReg){
526 unsigned MyPredReg = 0;
529 if (MI->getOpcode() != ARM::t2ADDri &&
530 MI->getOpcode() != ARM::t2ADDrSPi &&
531 MI->getOpcode() != ARM::t2ADDrSPi12 &&
532 MI->getOpcode() != ARM::tADDspi &&
533 MI->getOpcode() != ARM::ADDri)
536 if (Bytes == 0 || (Limit && Bytes >= Limit))
537 // Make sure the offset fits in 8 bits.
540 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
541 return (MI->getOperand(0).getReg() == Base &&
542 MI->getOperand(1).getReg() == Base &&
543 (MI->getOperand(2).getImm()*Scale) == Bytes &&
544 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
545 MyPredReg == PredReg);
548 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
549 switch (MI->getOpcode()) {
579 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
584 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
588 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
589 ARM_AM::AMSubMode Mode) {
591 default: llvm_unreachable("Unhandled opcode!");
597 default: llvm_unreachable("Unhandled submode!");
598 case ARM_AM::ia: return ARM::LDMIA_UPD;
599 case ARM_AM::ib: return ARM::LDMIB_UPD;
600 case ARM_AM::da: return ARM::LDMDA_UPD;
601 case ARM_AM::db: return ARM::LDMDB_UPD;
609 default: llvm_unreachable("Unhandled submode!");
610 case ARM_AM::ia: return ARM::STMIA_UPD;
611 case ARM_AM::ib: return ARM::STMIB_UPD;
612 case ARM_AM::da: return ARM::STMDA_UPD;
613 case ARM_AM::db: return ARM::STMDB_UPD;
619 default: llvm_unreachable("Unhandled submode!");
620 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
621 case ARM_AM::db: return ARM::t2LDMDB_UPD;
627 default: llvm_unreachable("Unhandled submode!");
628 case ARM_AM::ia: return ARM::t2STMIA_UPD;
629 case ARM_AM::db: return ARM::t2STMDB_UPD;
635 default: llvm_unreachable("Unhandled submode!");
636 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
637 case ARM_AM::db: return ARM::VLDMSDB_UPD;
643 default: llvm_unreachable("Unhandled submode!");
644 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
645 case ARM_AM::db: return ARM::VLDMDDB_UPD;
651 default: llvm_unreachable("Unhandled submode!");
652 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
653 case ARM_AM::db: return ARM::VSTMSDB_UPD;
659 default: llvm_unreachable("Unhandled submode!");
660 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
661 case ARM_AM::db: return ARM::VSTMDDB_UPD;
669 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
670 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
672 /// stmia rn, <ra, rb, rc>
673 /// rn := rn + 4 * 3;
675 /// stmia rn!, <ra, rb, rc>
677 /// rn := rn - 4 * 3;
678 /// ldmia rn, <ra, rb, rc>
680 /// ldmdb rn!, <ra, rb, rc>
681 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
682 MachineBasicBlock::iterator MBBI,
684 MachineBasicBlock::iterator &I) {
685 MachineInstr *MI = MBBI;
686 unsigned Base = MI->getOperand(0).getReg();
687 bool BaseKill = MI->getOperand(0).isKill();
688 unsigned Bytes = getLSMultipleTransferSize(MI);
689 unsigned PredReg = 0;
690 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
691 int Opcode = MI->getOpcode();
692 DebugLoc dl = MI->getDebugLoc();
694 // Can't use an updating ld/st if the base register is also a dest
695 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
696 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
697 if (MI->getOperand(i).getReg() == Base)
700 bool DoMerge = false;
701 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
703 // Try merging with the previous instruction.
704 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
705 if (MBBI != BeginMBBI) {
706 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
707 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
709 if (Mode == ARM_AM::ia &&
710 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
713 } else if (Mode == ARM_AM::ib &&
714 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
722 // Try merging with the next instruction.
723 MachineBasicBlock::iterator EndMBBI = MBB.end();
724 if (!DoMerge && MBBI != EndMBBI) {
725 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
726 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
728 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
729 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
731 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
732 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
747 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
748 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
749 .addReg(Base, getDefRegState(true)) // WB base register
750 .addReg(Base, getKillRegState(BaseKill))
751 .addImm(Pred).addReg(PredReg);
753 // Transfer the rest of operands.
754 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
755 MIB.addOperand(MI->getOperand(OpNum));
757 // Transfer memoperands.
758 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
764 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
765 ARM_AM::AddrOpc Mode) {
772 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
774 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
776 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
778 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
781 return ARM::t2LDR_PRE;
784 return ARM::t2STR_PRE;
785 default: llvm_unreachable("Unhandled opcode!");
790 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
791 ARM_AM::AddrOpc Mode) {
794 return ARM::LDR_POST;
796 return ARM::STR_POST;
798 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
800 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
802 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
804 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
807 return ARM::t2LDR_POST;
810 return ARM::t2STR_POST;
811 default: llvm_unreachable("Unhandled opcode!");
816 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
817 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
818 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
819 MachineBasicBlock::iterator MBBI,
820 const TargetInstrInfo *TII,
822 MachineBasicBlock::iterator &I) {
823 MachineInstr *MI = MBBI;
824 unsigned Base = MI->getOperand(1).getReg();
825 bool BaseKill = MI->getOperand(1).isKill();
826 unsigned Bytes = getLSMultipleTransferSize(MI);
827 int Opcode = MI->getOpcode();
828 DebugLoc dl = MI->getDebugLoc();
829 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
830 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
831 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
832 if (isi32Load(Opcode) || isi32Store(Opcode))
833 if (MI->getOperand(2).getImm() != 0)
835 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
838 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
839 // Can't do the merge if the destination register is the same as the would-be
840 // writeback register.
841 if (isLd && MI->getOperand(0).getReg() == Base)
844 unsigned PredReg = 0;
845 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
846 bool DoMerge = false;
847 ARM_AM::AddrOpc AddSub = ARM_AM::add;
849 // AM2 - 12 bits, thumb2 - 8 bits.
850 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
852 // Try merging with the previous instruction.
853 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
854 if (MBBI != BeginMBBI) {
855 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
856 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
858 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
860 AddSub = ARM_AM::sub;
862 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
866 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
871 // Try merging with the next instruction.
872 MachineBasicBlock::iterator EndMBBI = MBB.end();
873 if (!DoMerge && MBBI != EndMBBI) {
874 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
875 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
878 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
880 AddSub = ARM_AM::sub;
881 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
885 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
899 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
901 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
904 // VLDM[SD}_UPD, VSTM[SD]_UPD
905 // (There are no base-updating versions of VLDR/VSTR instructions, but the
906 // updating load/store-multiple instructions can be used with only one
908 MachineOperand &MO = MI->getOperand(0);
909 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
910 .addReg(Base, getDefRegState(true)) // WB base register
911 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
912 .addImm(Pred).addReg(PredReg)
913 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
914 getKillRegState(MO.isKill())));
917 // LDR_PRE, LDR_POST,
918 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
919 .addReg(Base, RegState::Define)
920 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
922 // t2LDR_PRE, t2LDR_POST
923 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
924 .addReg(Base, RegState::Define)
925 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
927 MachineOperand &MO = MI->getOperand(0);
930 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
931 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
932 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
934 // t2STR_PRE, t2STR_POST
935 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
936 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
937 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
944 /// isMemoryOp - Returns true if instruction is a memory operations (that this
945 /// pass is capable of operating on).
946 static bool isMemoryOp(const MachineInstr *MI) {
947 // When no memory operands are present, conservatively assume unaligned,
948 // volatile, unfoldable.
949 if (!MI->hasOneMemOperand())
952 const MachineMemOperand *MMO = *MI->memoperands_begin();
954 // Don't touch volatile memory accesses - we may be changing their order.
955 if (MMO->isVolatile())
958 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
960 if (MMO->getAlignment() < 4)
963 // str <undef> could probably be eliminated entirely, but for now we just want
964 // to avoid making a mess of it.
965 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
966 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
967 MI->getOperand(0).isUndef())
970 // Likewise don't mess with references to undefined addresses.
971 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
972 MI->getOperand(1).isUndef())
975 int Opcode = MI->getOpcode();
980 return MI->getOperand(1).isReg();
983 return MI->getOperand(1).isReg();
990 return MI->getOperand(1).isReg();
995 /// AdvanceRS - Advance register scavenger to just before the earliest memory
996 /// op that is being merged.
997 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
998 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
999 unsigned Position = MemOps[0].Position;
1000 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1001 if (MemOps[i].Position < Position) {
1002 Position = MemOps[i].Position;
1003 Loc = MemOps[i].MBBI;
1007 if (Loc != MBB.begin())
1008 RS->forward(prior(Loc));
1011 static int getMemoryOpOffset(const MachineInstr *MI) {
1012 int Opcode = MI->getOpcode();
1013 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1014 unsigned NumOperands = MI->getDesc().getNumOperands();
1015 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1017 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1018 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1019 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1020 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1023 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1024 : ARM_AM::getAM5Offset(OffField) * 4;
1026 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1029 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1035 static void InsertLDR_STR(MachineBasicBlock &MBB,
1036 MachineBasicBlock::iterator &MBBI,
1037 int Offset, bool isDef,
1038 DebugLoc dl, unsigned NewOpc,
1039 unsigned Reg, bool RegDeadKill, bool RegUndef,
1040 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1041 bool OffKill, bool OffUndef,
1042 ARMCC::CondCodes Pred, unsigned PredReg,
1043 const TargetInstrInfo *TII, bool isT2) {
1045 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1047 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1048 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1049 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1051 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1053 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1054 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1055 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1059 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1060 MachineBasicBlock::iterator &MBBI) {
1061 MachineInstr *MI = &*MBBI;
1062 unsigned Opcode = MI->getOpcode();
1063 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1064 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1065 unsigned EvenReg = MI->getOperand(0).getReg();
1066 unsigned OddReg = MI->getOperand(1).getReg();
1067 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1068 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1069 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
1072 MachineBasicBlock::iterator NewBBI = MBBI;
1073 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1074 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1075 bool EvenDeadKill = isLd ?
1076 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1077 bool EvenUndef = MI->getOperand(0).isUndef();
1078 bool OddDeadKill = isLd ?
1079 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1080 bool OddUndef = MI->getOperand(1).isUndef();
1081 const MachineOperand &BaseOp = MI->getOperand(2);
1082 unsigned BaseReg = BaseOp.getReg();
1083 bool BaseKill = BaseOp.isKill();
1084 bool BaseUndef = BaseOp.isUndef();
1085 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1086 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1087 int OffImm = getMemoryOpOffset(MI);
1088 unsigned PredReg = 0;
1089 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1091 if (OddRegNum > EvenRegNum && OffImm == 0) {
1092 // Ascending register numbers and no offset. It's safe to change it to a
1094 unsigned NewOpc = (isLd)
1095 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1096 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1098 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1099 .addReg(BaseReg, getKillRegState(BaseKill))
1100 .addImm(Pred).addReg(PredReg)
1101 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1102 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1105 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1106 .addReg(BaseReg, getKillRegState(BaseKill))
1107 .addImm(Pred).addReg(PredReg)
1109 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1111 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1114 NewBBI = llvm::prior(MBBI);
1116 // Split into two instructions.
1117 unsigned NewOpc = (isLd)
1118 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1119 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1120 DebugLoc dl = MBBI->getDebugLoc();
1121 // If this is a load and base register is killed, it may have been
1122 // re-defed by the load, make sure the first load does not clobber it.
1124 (BaseKill || OffKill) &&
1125 (TRI->regsOverlap(EvenReg, BaseReg))) {
1126 assert(!TRI->regsOverlap(OddReg, BaseReg));
1127 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1128 OddReg, OddDeadKill, false,
1129 BaseReg, false, BaseUndef, false, OffUndef,
1130 Pred, PredReg, TII, isT2);
1131 NewBBI = llvm::prior(MBBI);
1132 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1133 EvenReg, EvenDeadKill, false,
1134 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1135 Pred, PredReg, TII, isT2);
1137 if (OddReg == EvenReg && EvenDeadKill) {
1138 // If the two source operands are the same, the kill marker is
1139 // probably on the first one. e.g.
1140 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1141 EvenDeadKill = false;
1144 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1145 EvenReg, EvenDeadKill, EvenUndef,
1146 BaseReg, false, BaseUndef, false, OffUndef,
1147 Pred, PredReg, TII, isT2);
1148 NewBBI = llvm::prior(MBBI);
1149 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1150 OddReg, OddDeadKill, OddUndef,
1151 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1152 Pred, PredReg, TII, isT2);
1167 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1168 /// ops of the same base and incrementing offset into LDM / STM ops.
1169 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1170 unsigned NumMerges = 0;
1171 unsigned NumMemOps = 0;
1173 unsigned CurrBase = 0;
1175 unsigned CurrSize = 0;
1176 ARMCC::CondCodes CurrPred = ARMCC::AL;
1177 unsigned CurrPredReg = 0;
1178 unsigned Position = 0;
1179 SmallVector<MachineBasicBlock::iterator,4> Merges;
1181 RS->enterBasicBlock(&MBB);
1182 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1184 if (FixInvalidRegPairOp(MBB, MBBI))
1187 bool Advance = false;
1188 bool TryMerge = false;
1189 bool Clobber = false;
1191 bool isMemOp = isMemoryOp(MBBI);
1193 int Opcode = MBBI->getOpcode();
1194 unsigned Size = getLSMultipleTransferSize(MBBI);
1195 const MachineOperand &MO = MBBI->getOperand(0);
1196 unsigned Reg = MO.getReg();
1197 bool isKill = MO.isDef() ? false : MO.isKill();
1198 unsigned Base = MBBI->getOperand(1).getReg();
1199 unsigned PredReg = 0;
1200 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1201 int Offset = getMemoryOpOffset(MBBI);
1204 // r5 := ldr [r5, #4]
1205 // r6 := ldr [r5, #8]
1207 // The second ldr has effectively broken the chain even though it
1208 // looks like the later ldr(s) use the same base register. Try to
1209 // merge the ldr's so far, including this one. But don't try to
1210 // combine the following ldr(s).
1211 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1212 if (CurrBase == 0 && !Clobber) {
1213 // Start of a new chain.
1218 CurrPredReg = PredReg;
1219 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1228 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1229 // No need to match PredReg.
1230 // Continue adding to the queue.
1231 if (Offset > MemOps.back().Offset) {
1232 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1237 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1239 if (Offset < I->Offset) {
1240 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1245 } else if (Offset == I->Offset) {
1246 // Collision! This can't be merged!
1255 if (MBBI->isDebugValue()) {
1258 // Reach the end of the block, try merging the memory instructions.
1260 } else if (Advance) {
1264 // Reach the end of the block, try merging the memory instructions.
1270 if (NumMemOps > 1) {
1271 // Try to find a free register to use as a new base in case it's needed.
1272 // First advance to the instruction just before the start of the chain.
1273 AdvanceRS(MBB, MemOps);
1274 // Find a scratch register.
1275 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1276 // Process the load / store instructions.
1277 RS->forward(prior(MBBI));
1281 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1282 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1284 // Try folding preceeding/trailing base inc/dec into the generated
1286 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1287 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1289 NumMerges += Merges.size();
1291 // Try folding preceeding/trailing base inc/dec into those load/store
1292 // that were not merged to form LDM/STM ops.
1293 for (unsigned i = 0; i != NumMemOps; ++i)
1294 if (!MemOps[i].Merged)
1295 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1298 // RS may be pointing to an instruction that's deleted.
1299 RS->skipTo(prior(MBBI));
1300 } else if (NumMemOps == 1) {
1301 // Try folding preceeding/trailing base inc/dec into the single
1303 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1305 RS->forward(prior(MBBI));
1312 CurrPred = ARMCC::AL;
1319 // If iterator hasn't been advanced and this is not a memory op, skip it.
1320 // It can't start a new chain anyway.
1321 if (!Advance && !isMemOp && MBBI != E) {
1327 return NumMerges > 0;
1331 struct OffsetCompare {
1332 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1333 int LOffset = getMemoryOpOffset(LHS);
1334 int ROffset = getMemoryOpOffset(RHS);
1335 assert(LHS == RHS || LOffset != ROffset);
1336 return LOffset > ROffset;
1341 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1342 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1343 /// directly restore the value of LR into pc.
1344 /// ldmfd sp!, {..., lr}
1347 /// ldmfd sp!, {..., lr}
1350 /// ldmfd sp!, {..., pc}
1351 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1352 if (MBB.empty()) return false;
1354 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1355 if (MBBI != MBB.begin() &&
1356 (MBBI->getOpcode() == ARM::BX_RET ||
1357 MBBI->getOpcode() == ARM::tBX_RET ||
1358 MBBI->getOpcode() == ARM::MOVPCLR)) {
1359 MachineInstr *PrevMI = prior(MBBI);
1360 unsigned Opcode = PrevMI->getOpcode();
1361 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1362 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1363 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1364 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1365 if (MO.getReg() != ARM::LR)
1367 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1368 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1369 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1370 PrevMI->setDesc(TII->get(NewOpc));
1372 PrevMI->copyImplicitOps(&*MBBI);
1380 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1381 const TargetMachine &TM = Fn.getTarget();
1382 AFI = Fn.getInfo<ARMFunctionInfo>();
1383 TII = TM.getInstrInfo();
1384 TRI = TM.getRegisterInfo();
1385 RS = new RegScavenger();
1386 isThumb2 = AFI->isThumb2Function();
1388 bool Modified = false;
1389 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1391 MachineBasicBlock &MBB = *MFI;
1392 Modified |= LoadStoreMultipleOpti(MBB);
1393 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1394 Modified |= MergeReturnIntoLDM(MBB);
1402 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1403 /// load / stores from consecutive locations close to make it more
1404 /// likely they will be combined later.
1407 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1409 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1411 const TargetData *TD;
1412 const TargetInstrInfo *TII;
1413 const TargetRegisterInfo *TRI;
1414 const ARMSubtarget *STI;
1415 MachineRegisterInfo *MRI;
1416 MachineFunction *MF;
1418 virtual bool runOnMachineFunction(MachineFunction &Fn);
1420 virtual const char *getPassName() const {
1421 return "ARM pre- register allocation load / store optimization pass";
1425 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1426 unsigned &NewOpc, unsigned &EvenReg,
1427 unsigned &OddReg, unsigned &BaseReg,
1429 unsigned &PredReg, ARMCC::CondCodes &Pred,
1431 bool RescheduleOps(MachineBasicBlock *MBB,
1432 SmallVector<MachineInstr*, 4> &Ops,
1433 unsigned Base, bool isLd,
1434 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1435 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1437 char ARMPreAllocLoadStoreOpt::ID = 0;
1440 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1441 TD = Fn.getTarget().getTargetData();
1442 TII = Fn.getTarget().getInstrInfo();
1443 TRI = Fn.getTarget().getRegisterInfo();
1444 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1445 MRI = &Fn.getRegInfo();
1448 bool Modified = false;
1449 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1451 Modified |= RescheduleLoadStoreInstrs(MFI);
1456 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1457 MachineBasicBlock::iterator I,
1458 MachineBasicBlock::iterator E,
1459 SmallPtrSet<MachineInstr*, 4> &MemOps,
1460 SmallSet<unsigned, 4> &MemRegs,
1461 const TargetRegisterInfo *TRI) {
1462 // Are there stores / loads / calls between them?
1463 // FIXME: This is overly conservative. We should make use of alias information
1465 SmallSet<unsigned, 4> AddedRegPressure;
1467 if (I->isDebugValue() || MemOps.count(&*I))
1469 const TargetInstrDesc &TID = I->getDesc();
1470 if (TID.isCall() || TID.isTerminator() || I->hasUnmodeledSideEffects())
1472 if (isLd && TID.mayStore())
1477 // It's not safe to move the first 'str' down.
1480 // str r4, [r0, #+4]
1484 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1485 MachineOperand &MO = I->getOperand(j);
1488 unsigned Reg = MO.getReg();
1489 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1491 if (Reg != Base && !MemRegs.count(Reg))
1492 AddedRegPressure.insert(Reg);
1496 // Estimate register pressure increase due to the transformation.
1497 if (MemRegs.size() <= 4)
1498 // Ok if we are moving small number of instructions.
1500 return AddedRegPressure.size() <= MemRegs.size() * 2;
1504 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1506 unsigned &NewOpc, unsigned &EvenReg,
1507 unsigned &OddReg, unsigned &BaseReg,
1508 int &Offset, unsigned &PredReg,
1509 ARMCC::CondCodes &Pred,
1511 // Make sure we're allowed to generate LDRD/STRD.
1512 if (!STI->hasV5TEOps())
1515 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1517 unsigned Opcode = Op0->getOpcode();
1518 if (Opcode == ARM::LDRi12)
1520 else if (Opcode == ARM::STRi12)
1522 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1523 NewOpc = ARM::t2LDRDi8;
1526 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1527 NewOpc = ARM::t2STRDi8;
1533 // Make sure the base address satisfies i64 ld / st alignment requirement.
1534 if (!Op0->hasOneMemOperand() ||
1535 !(*Op0->memoperands_begin())->getValue() ||
1536 (*Op0->memoperands_begin())->isVolatile())
1539 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1540 const Function *Func = MF->getFunction();
1541 unsigned ReqAlign = STI->hasV6Ops()
1542 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1543 : 8; // Pre-v6 need 8-byte align
1544 if (Align < ReqAlign)
1547 // Then make sure the immediate offset fits.
1548 int OffImm = getMemoryOpOffset(Op0);
1552 // Can't fall back to t2LDRi8 / t2STRi8.
1555 int Limit = (1 << 8) * Scale;
1556 if (OffImm >= Limit || (OffImm & (Scale-1)))
1561 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1563 AddSub = ARM_AM::sub;
1566 int Limit = (1 << 8) * Scale;
1567 if (OffImm >= Limit || (OffImm & (Scale-1)))
1569 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1571 EvenReg = Op0->getOperand(0).getReg();
1572 OddReg = Op1->getOperand(0).getReg();
1573 if (EvenReg == OddReg)
1575 BaseReg = Op0->getOperand(1).getReg();
1576 Pred = llvm::getInstrPredicate(Op0, PredReg);
1577 dl = Op0->getDebugLoc();
1581 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1582 SmallVector<MachineInstr*, 4> &Ops,
1583 unsigned Base, bool isLd,
1584 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1585 bool RetVal = false;
1587 // Sort by offset (in reverse order).
1588 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1590 // The loads / stores of the same base are in order. Scan them from first to
1591 // last and check for the following:
1592 // 1. Any def of base.
1594 while (Ops.size() > 1) {
1595 unsigned FirstLoc = ~0U;
1596 unsigned LastLoc = 0;
1597 MachineInstr *FirstOp = 0;
1598 MachineInstr *LastOp = 0;
1600 unsigned LastOpcode = 0;
1601 unsigned LastBytes = 0;
1602 unsigned NumMove = 0;
1603 for (int i = Ops.size() - 1; i >= 0; --i) {
1604 MachineInstr *Op = Ops[i];
1605 unsigned Loc = MI2LocMap[Op];
1606 if (Loc <= FirstLoc) {
1610 if (Loc >= LastLoc) {
1615 unsigned Opcode = Op->getOpcode();
1616 if (LastOpcode && Opcode != LastOpcode)
1619 int Offset = getMemoryOpOffset(Op);
1620 unsigned Bytes = getLSMultipleTransferSize(Op);
1622 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1625 LastOffset = Offset;
1627 LastOpcode = Opcode;
1628 if (++NumMove == 8) // FIXME: Tune this limit.
1635 SmallPtrSet<MachineInstr*, 4> MemOps;
1636 SmallSet<unsigned, 4> MemRegs;
1637 for (int i = NumMove-1; i >= 0; --i) {
1638 MemOps.insert(Ops[i]);
1639 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1642 // Be conservative, if the instructions are too far apart, don't
1643 // move them. We want to limit the increase of register pressure.
1644 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1646 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1647 MemOps, MemRegs, TRI);
1649 for (unsigned i = 0; i != NumMove; ++i)
1652 // This is the new location for the loads / stores.
1653 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1654 while (InsertPos != MBB->end()
1655 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1658 // If we are moving a pair of loads / stores, see if it makes sense
1659 // to try to allocate a pair of registers that can form register pairs.
1660 MachineInstr *Op0 = Ops.back();
1661 MachineInstr *Op1 = Ops[Ops.size()-2];
1662 unsigned EvenReg = 0, OddReg = 0;
1663 unsigned BaseReg = 0, PredReg = 0;
1664 ARMCC::CondCodes Pred = ARMCC::AL;
1666 unsigned NewOpc = 0;
1669 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1670 EvenReg, OddReg, BaseReg,
1671 Offset, PredReg, Pred, isT2)) {
1675 // Form the pair instruction.
1677 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1678 dl, TII->get(NewOpc))
1679 .addReg(EvenReg, RegState::Define)
1680 .addReg(OddReg, RegState::Define)
1682 // FIXME: We're converting from LDRi12 to an insn that still
1683 // uses addrmode2, so we need an explicit offset reg. It should
1684 // always by reg0 since we're transforming LDRi12s.
1687 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1690 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1691 dl, TII->get(NewOpc))
1695 // FIXME: We're converting from LDRi12 to an insn that still
1696 // uses addrmode2, so we need an explicit offset reg. It should
1697 // always by reg0 since we're transforming STRi12s.
1700 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1706 // Add register allocation hints to form register pairs.
1707 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1708 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1710 for (unsigned i = 0; i != NumMove; ++i) {
1711 MachineInstr *Op = Ops.back();
1713 MBB->splice(InsertPos, MBB, Op);
1717 NumLdStMoved += NumMove;
1727 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1728 bool RetVal = false;
1730 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1731 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1732 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1733 SmallVector<unsigned, 4> LdBases;
1734 SmallVector<unsigned, 4> StBases;
1737 MachineBasicBlock::iterator MBBI = MBB->begin();
1738 MachineBasicBlock::iterator E = MBB->end();
1740 for (; MBBI != E; ++MBBI) {
1741 MachineInstr *MI = MBBI;
1742 const TargetInstrDesc &TID = MI->getDesc();
1743 if (TID.isCall() || TID.isTerminator()) {
1744 // Stop at barriers.
1749 if (!MI->isDebugValue())
1750 MI2LocMap[MI] = ++Loc;
1752 if (!isMemoryOp(MI))
1754 unsigned PredReg = 0;
1755 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1758 int Opc = MI->getOpcode();
1759 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1760 unsigned Base = MI->getOperand(1).getReg();
1761 int Offset = getMemoryOpOffset(MI);
1763 bool StopHere = false;
1765 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1766 Base2LdsMap.find(Base);
1767 if (BI != Base2LdsMap.end()) {
1768 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1769 if (Offset == getMemoryOpOffset(BI->second[i])) {
1775 BI->second.push_back(MI);
1777 SmallVector<MachineInstr*, 4> MIs;
1779 Base2LdsMap[Base] = MIs;
1780 LdBases.push_back(Base);
1783 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1784 Base2StsMap.find(Base);
1785 if (BI != Base2StsMap.end()) {
1786 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1787 if (Offset == getMemoryOpOffset(BI->second[i])) {
1793 BI->second.push_back(MI);
1795 SmallVector<MachineInstr*, 4> MIs;
1797 Base2StsMap[Base] = MIs;
1798 StBases.push_back(Base);
1803 // Found a duplicate (a base+offset combination that's seen earlier).
1810 // Re-schedule loads.
1811 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1812 unsigned Base = LdBases[i];
1813 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1815 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1818 // Re-schedule stores.
1819 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1820 unsigned Base = StBases[i];
1821 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1823 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1827 Base2LdsMap.clear();
1828 Base2StsMap.clear();
1838 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1839 /// optimization pass.
1840 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1842 return new ARMPreAllocLoadStoreOpt();
1843 return new ARMLoadStoreOpt();