1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
80 MachineBasicBlock::iterator MBBI;
82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
83 MachineBasicBlock::iterator i)
84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
87 typedef MemOpQueue::iterator MemOpQueueIter;
89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
90 int Offset, unsigned Base, bool BaseKill, int Opcode,
91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
93 void MergeOpsUpdate(MachineBasicBlock &MBB,
102 ARMCC::CondCodes Pred,
106 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
108 int Opcode, unsigned Size,
109 ARMCC::CondCodes Pred, unsigned PredReg,
110 unsigned Scratch, MemOpQueue &MemOps,
111 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
115 MachineBasicBlock::iterator &MBBI);
116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
117 MachineBasicBlock::iterator MBBI,
118 const TargetInstrInfo *TII,
120 MachineBasicBlock::iterator &I);
121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator MBBI,
124 MachineBasicBlock::iterator &I);
125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128 char ARMLoadStoreOpt::ID = 0;
131 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
133 default: llvm_unreachable("Unhandled opcode!");
137 default: llvm_unreachable("Unhandled submode!");
138 case ARM_AM::ia: return ARM::LDMIA;
139 case ARM_AM::da: return ARM::LDMDA;
140 case ARM_AM::db: return ARM::LDMDB;
141 case ARM_AM::ib: return ARM::LDMIB;
147 default: llvm_unreachable("Unhandled submode!");
148 case ARM_AM::ia: return ARM::STMIA;
149 case ARM_AM::da: return ARM::STMDA;
150 case ARM_AM::db: return ARM::STMDB;
151 case ARM_AM::ib: return ARM::STMIB;
158 default: llvm_unreachable("Unhandled submode!");
159 case ARM_AM::ia: return ARM::t2LDMIA;
160 case ARM_AM::db: return ARM::t2LDMDB;
167 default: llvm_unreachable("Unhandled submode!");
168 case ARM_AM::ia: return ARM::t2STMIA;
169 case ARM_AM::db: return ARM::t2STMDB;
175 default: llvm_unreachable("Unhandled submode!");
176 case ARM_AM::ia: return ARM::VLDMSIA;
177 case ARM_AM::db: return ARM::VLDMSDB;
183 default: llvm_unreachable("Unhandled submode!");
184 case ARM_AM::ia: return ARM::VSTMSIA;
185 case ARM_AM::db: return ARM::VSTMSDB;
191 default: llvm_unreachable("Unhandled submode!");
192 case ARM_AM::ia: return ARM::VLDMDIA;
193 case ARM_AM::db: return ARM::VLDMDDB;
199 default: llvm_unreachable("Unhandled submode!");
200 case ARM_AM::ia: return ARM::VSTMDIA;
201 case ARM_AM::db: return ARM::VSTMDDB;
212 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
214 default: llvm_unreachable("Unhandled opcode!");
220 case ARM::t2LDMIA_RET:
222 case ARM::t2LDMIA_UPD:
224 case ARM::t2STMIA_UPD:
226 case ARM::VLDMSIA_UPD:
228 case ARM::VSTMSIA_UPD:
230 case ARM::VLDMDIA_UPD:
232 case ARM::VSTMDIA_UPD:
246 case ARM::t2LDMDB_UPD:
248 case ARM::t2STMDB_UPD:
250 case ARM::VLDMSDB_UPD:
252 case ARM::VSTMSDB_UPD:
254 case ARM::VLDMDDB_UPD:
256 case ARM::VSTMDDB_UPD:
266 return ARM_AM::bad_am_submode;
269 } // end namespace ARM_AM
270 } // end namespace llvm
272 static bool isT2i32Load(unsigned Opc) {
273 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
276 static bool isi32Load(unsigned Opc) {
277 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
280 static bool isT2i32Store(unsigned Opc) {
281 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
284 static bool isi32Store(unsigned Opc) {
285 return Opc == ARM::STRi12 || isT2i32Store(Opc);
288 /// MergeOps - Create and insert a LDM or STM with Base as base register and
289 /// registers in Regs as the register operands that would be loaded / stored.
290 /// It returns true if the transformation is done.
292 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
293 MachineBasicBlock::iterator MBBI,
294 int Offset, unsigned Base, bool BaseKill,
295 int Opcode, ARMCC::CondCodes Pred,
296 unsigned PredReg, unsigned Scratch, DebugLoc dl,
297 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
298 // Only a single register to load / store. Don't bother.
299 unsigned NumRegs = Regs.size();
303 ARM_AM::AMSubMode Mode = ARM_AM::ia;
304 // VFP and Thumb2 do not support IB or DA modes.
305 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
306 bool haveIBAndDA = isNotVFP && !isThumb2;
307 if (Offset == 4 && haveIBAndDA)
309 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
311 else if (Offset == -4 * (int)NumRegs && isNotVFP)
312 // VLDM/VSTM do not support DB mode without also updating the base reg.
314 else if (Offset != 0) {
315 // If starting offset isn't zero, insert a MI to materialize a new base.
316 // But only do so if it is cost effective, i.e. merging more than two
322 if (isi32Load(Opcode))
323 // If it is a load, then just use one of the destination register to
324 // use as the new base.
325 NewBase = Regs[NumRegs-1].first;
327 // Use the scratch register to use as a new base.
332 int BaseOpc = !isThumb2
334 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
338 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
341 int ImmedOffset = isThumb2
342 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
343 if (ImmedOffset == -1)
344 // FIXME: Try t2ADDri12 or t2SUBri12?
345 return false; // Probably not worth it then.
347 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
348 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
349 .addImm(Pred).addReg(PredReg).addReg(0);
351 BaseKill = true; // New base is always killed right its use.
354 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
355 Opcode == ARM::VLDRD);
356 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
357 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
358 .addReg(Base, getKillRegState(BaseKill))
359 .addImm(Pred).addReg(PredReg);
360 for (unsigned i = 0; i != NumRegs; ++i)
361 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
362 | getKillRegState(Regs[i].second));
367 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
369 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
371 unsigned memOpsBegin, unsigned memOpsEnd,
372 unsigned insertAfter, int Offset,
373 unsigned Base, bool BaseKill,
375 ARMCC::CondCodes Pred, unsigned PredReg,
378 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
379 // First calculate which of the registers should be killed by the merged
381 const unsigned insertPos = memOps[insertAfter].Position;
383 SmallSet<unsigned, 4> UnavailRegs;
384 SmallSet<unsigned, 4> KilledRegs;
385 DenseMap<unsigned, unsigned> Killer;
386 for (unsigned i = 0; i < memOpsBegin; ++i) {
387 if (memOps[i].Position < insertPos && memOps[i].isKill) {
388 unsigned Reg = memOps[i].Reg;
389 if (memOps[i].Merged)
390 UnavailRegs.insert(Reg);
392 KilledRegs.insert(Reg);
397 for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
398 if (memOps[i].Position < insertPos && memOps[i].isKill) {
399 unsigned Reg = memOps[i].Reg;
400 KilledRegs.insert(Reg);
405 SmallVector<std::pair<unsigned, bool>, 8> Regs;
406 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
407 unsigned Reg = memOps[i].Reg;
408 if (UnavailRegs.count(Reg))
409 // Register is killed before and it's not easy / possible to update the
410 // kill marker on already merged instructions. Abort.
413 // If we are inserting the merged operation after an unmerged operation that
414 // uses the same register, make sure to transfer any kill flag.
415 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
416 Regs.push_back(std::make_pair(Reg, isKill));
419 // Try to do the merge.
420 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
422 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
423 Pred, PredReg, Scratch, dl, Regs))
426 // Merge succeeded, update records.
427 Merges.push_back(prior(Loc));
428 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
429 // Remove kill flags from any unmerged memops that come before insertPos.
430 if (Regs[i-memOpsBegin].second) {
431 unsigned Reg = Regs[i-memOpsBegin].first;
432 if (KilledRegs.count(Reg)) {
433 unsigned j = Killer[Reg];
434 memOps[j].MBBI->getOperand(0).setIsKill(false);
435 memOps[j].isKill = false;
438 MBB.erase(memOps[i].MBBI);
439 memOps[i].Merged = true;
443 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
444 /// load / store multiple instructions.
446 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
447 unsigned Base, int Opcode, unsigned Size,
448 ARMCC::CondCodes Pred, unsigned PredReg,
449 unsigned Scratch, MemOpQueue &MemOps,
450 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
451 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
452 int Offset = MemOps[SIndex].Offset;
453 int SOffset = Offset;
454 unsigned insertAfter = SIndex;
455 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
456 DebugLoc dl = Loc->getDebugLoc();
457 const MachineOperand &PMO = Loc->getOperand(0);
458 unsigned PReg = PMO.getReg();
459 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
460 : getARMRegisterNumbering(PReg);
463 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
464 int NewOffset = MemOps[i].Offset;
465 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
466 unsigned Reg = MO.getReg();
467 unsigned RegNum = MO.isUndef() ? UINT_MAX
468 : getARMRegisterNumbering(Reg);
469 // Register numbers must be in ascending order. For VFP, the registers
470 // must also be consecutive and there is a limit of 16 double-word
471 // registers per instruction.
472 if (Reg != ARM::SP &&
473 NewOffset == Offset + (int)Size &&
474 ((isNotVFP && RegNum > PRegNum)
475 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
480 // Can't merge this in. Try merge the earlier ones first.
481 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
482 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
483 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
488 if (MemOps[i].Position > MemOps[insertAfter].Position)
492 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
493 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
494 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
498 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
499 unsigned Bytes, unsigned Limit,
500 ARMCC::CondCodes Pred, unsigned PredReg){
501 unsigned MyPredReg = 0;
504 if (MI->getOpcode() != ARM::t2SUBri &&
505 MI->getOpcode() != ARM::t2SUBrSPi &&
506 MI->getOpcode() != ARM::t2SUBrSPi12 &&
507 MI->getOpcode() != ARM::tSUBspi &&
508 MI->getOpcode() != ARM::SUBri)
511 // Make sure the offset fits in 8 bits.
512 if (Bytes == 0 || (Limit && Bytes >= Limit))
515 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
516 return (MI->getOperand(0).getReg() == Base &&
517 MI->getOperand(1).getReg() == Base &&
518 (MI->getOperand(2).getImm()*Scale) == Bytes &&
519 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
520 MyPredReg == PredReg);
523 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
524 unsigned Bytes, unsigned Limit,
525 ARMCC::CondCodes Pred, unsigned PredReg){
526 unsigned MyPredReg = 0;
529 if (MI->getOpcode() != ARM::t2ADDri &&
530 MI->getOpcode() != ARM::t2ADDrSPi &&
531 MI->getOpcode() != ARM::t2ADDrSPi12 &&
532 MI->getOpcode() != ARM::tADDspi &&
533 MI->getOpcode() != ARM::ADDri)
536 if (Bytes == 0 || (Limit && Bytes >= Limit))
537 // Make sure the offset fits in 8 bits.
540 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
541 return (MI->getOperand(0).getReg() == Base &&
542 MI->getOperand(1).getReg() == Base &&
543 (MI->getOperand(2).getImm()*Scale) == Bytes &&
544 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
545 MyPredReg == PredReg);
548 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
549 switch (MI->getOpcode()) {
579 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
584 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
588 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
589 ARM_AM::AMSubMode Mode) {
591 default: llvm_unreachable("Unhandled opcode!");
597 default: llvm_unreachable("Unhandled submode!");
598 case ARM_AM::ia: return ARM::LDMIA_UPD;
599 case ARM_AM::ib: return ARM::LDMIB_UPD;
600 case ARM_AM::da: return ARM::LDMDA_UPD;
601 case ARM_AM::db: return ARM::LDMDB_UPD;
609 default: llvm_unreachable("Unhandled submode!");
610 case ARM_AM::ia: return ARM::STMIA_UPD;
611 case ARM_AM::ib: return ARM::STMIB_UPD;
612 case ARM_AM::da: return ARM::STMDA_UPD;
613 case ARM_AM::db: return ARM::STMDB_UPD;
619 default: llvm_unreachable("Unhandled submode!");
620 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
621 case ARM_AM::db: return ARM::t2LDMDB_UPD;
627 default: llvm_unreachable("Unhandled submode!");
628 case ARM_AM::ia: return ARM::t2STMIA_UPD;
629 case ARM_AM::db: return ARM::t2STMDB_UPD;
635 default: llvm_unreachable("Unhandled submode!");
636 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
637 case ARM_AM::db: return ARM::VLDMSDB_UPD;
643 default: llvm_unreachable("Unhandled submode!");
644 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
645 case ARM_AM::db: return ARM::VLDMDDB_UPD;
651 default: llvm_unreachable("Unhandled submode!");
652 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
653 case ARM_AM::db: return ARM::VSTMSDB_UPD;
659 default: llvm_unreachable("Unhandled submode!");
660 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
661 case ARM_AM::db: return ARM::VSTMDDB_UPD;
669 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
670 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
672 /// stmia rn, <ra, rb, rc>
673 /// rn := rn + 4 * 3;
675 /// stmia rn!, <ra, rb, rc>
677 /// rn := rn - 4 * 3;
678 /// ldmia rn, <ra, rb, rc>
680 /// ldmdb rn!, <ra, rb, rc>
681 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
682 MachineBasicBlock::iterator MBBI,
684 MachineBasicBlock::iterator &I) {
685 MachineInstr *MI = MBBI;
686 unsigned Base = MI->getOperand(0).getReg();
687 bool BaseKill = MI->getOperand(0).isKill();
688 unsigned Bytes = getLSMultipleTransferSize(MI);
689 unsigned PredReg = 0;
690 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
691 int Opcode = MI->getOpcode();
692 DebugLoc dl = MI->getDebugLoc();
694 // Can't use an updating ld/st if the base register is also a dest
695 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
696 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
697 if (MI->getOperand(i).getReg() == Base)
700 bool DoMerge = false;
701 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
703 // Try merging with the previous instruction.
704 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
705 if (MBBI != BeginMBBI) {
706 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
707 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
709 if (Mode == ARM_AM::ia &&
710 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
713 } else if (Mode == ARM_AM::ib &&
714 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
722 // Try merging with the next instruction.
723 MachineBasicBlock::iterator EndMBBI = MBB.end();
724 if (!DoMerge && MBBI != EndMBBI) {
725 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
726 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
728 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
729 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
731 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
732 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
747 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
748 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
749 .addReg(Base, getDefRegState(true)) // WB base register
750 .addReg(Base, getKillRegState(BaseKill))
751 .addImm(Pred).addReg(PredReg);
753 // Transfer the rest of operands.
754 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
755 MIB.addOperand(MI->getOperand(OpNum));
757 // Transfer memoperands.
758 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
764 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
765 ARM_AM::AddrOpc Mode) {
772 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
774 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
776 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
778 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
781 return ARM::t2LDR_PRE;
784 return ARM::t2STR_PRE;
785 default: llvm_unreachable("Unhandled opcode!");
790 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
791 ARM_AM::AddrOpc Mode) {
794 return ARM::LDR_POST;
796 return ARM::STR_POST;
798 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
800 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
802 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
804 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
807 return ARM::t2LDR_POST;
810 return ARM::t2STR_POST;
811 default: llvm_unreachable("Unhandled opcode!");
816 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
817 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
818 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
819 MachineBasicBlock::iterator MBBI,
820 const TargetInstrInfo *TII,
822 MachineBasicBlock::iterator &I) {
823 MachineInstr *MI = MBBI;
824 unsigned Base = MI->getOperand(1).getReg();
825 bool BaseKill = MI->getOperand(1).isKill();
826 unsigned Bytes = getLSMultipleTransferSize(MI);
827 int Opcode = MI->getOpcode();
828 DebugLoc dl = MI->getDebugLoc();
829 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
830 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
831 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
832 if (isi32Load(Opcode) || isi32Store(Opcode))
833 if (MI->getOperand(2).getImm() != 0)
835 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
838 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
839 // Can't do the merge if the destination register is the same as the would-be
840 // writeback register.
841 if (isLd && MI->getOperand(0).getReg() == Base)
844 unsigned PredReg = 0;
845 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
846 bool DoMerge = false;
847 ARM_AM::AddrOpc AddSub = ARM_AM::add;
849 // AM2 - 12 bits, thumb2 - 8 bits.
850 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
852 // Try merging with the previous instruction.
853 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
854 if (MBBI != BeginMBBI) {
855 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
856 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
858 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
860 AddSub = ARM_AM::sub;
862 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
866 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
871 // Try merging with the next instruction.
872 MachineBasicBlock::iterator EndMBBI = MBB.end();
873 if (!DoMerge && MBBI != EndMBBI) {
874 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
875 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
878 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
880 AddSub = ARM_AM::sub;
881 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
885 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
899 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
901 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
904 // VLDM[SD}_UPD, VSTM[SD]_UPD
905 // (There are no base-updating versions of VLDR/VSTR instructions, but the
906 // updating load/store-multiple instructions can be used with only one
908 MachineOperand &MO = MI->getOperand(0);
909 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
910 .addReg(Base, getDefRegState(true)) // WB base register
911 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
912 .addImm(Pred).addReg(PredReg)
913 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
914 getKillRegState(MO.isKill())));
917 // LDR_PRE, LDR_POST,
918 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
919 .addReg(Base, RegState::Define)
920 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
922 // t2LDR_PRE, t2LDR_POST
923 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
924 .addReg(Base, RegState::Define)
925 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
927 MachineOperand &MO = MI->getOperand(0);
930 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
931 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
932 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
934 // t2STR_PRE, t2STR_POST
935 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
936 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
937 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
944 /// isMemoryOp - Returns true if instruction is a memory operations (that this
945 /// pass is capable of operating on).
946 static bool isMemoryOp(const MachineInstr *MI) {
947 // When no memory operands are present, conservatively assume unaligned,
948 // volatile, unfoldable.
949 if (!MI->hasOneMemOperand())
952 const MachineMemOperand *MMO = *MI->memoperands_begin();
954 // Don't touch volatile memory accesses - we may be changing their order.
955 if (MMO->isVolatile())
958 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
960 if (MMO->getAlignment() < 4)
963 // str <undef> could probably be eliminated entirely, but for now we just want
964 // to avoid making a mess of it.
965 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
966 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
967 MI->getOperand(0).isUndef())
970 // Likewise don't mess with references to undefined addresses.
971 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
972 MI->getOperand(1).isUndef())
975 int Opcode = MI->getOpcode();
980 return MI->getOperand(1).isReg();
983 return MI->getOperand(1).isReg();
990 return MI->getOperand(1).isReg();
995 /// AdvanceRS - Advance register scavenger to just before the earliest memory
996 /// op that is being merged.
997 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
998 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
999 unsigned Position = MemOps[0].Position;
1000 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1001 if (MemOps[i].Position < Position) {
1002 Position = MemOps[i].Position;
1003 Loc = MemOps[i].MBBI;
1007 if (Loc != MBB.begin())
1008 RS->forward(prior(Loc));
1011 static int getMemoryOpOffset(const MachineInstr *MI) {
1012 int Opcode = MI->getOpcode();
1013 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1014 unsigned NumOperands = MI->getDesc().getNumOperands();
1015 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1017 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1018 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1019 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1020 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1023 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1024 : ARM_AM::getAM5Offset(OffField) * 4;
1026 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1029 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1035 static void InsertLDR_STR(MachineBasicBlock &MBB,
1036 MachineBasicBlock::iterator &MBBI,
1037 int Offset, bool isDef,
1038 DebugLoc dl, unsigned NewOpc,
1039 unsigned Reg, bool RegDeadKill, bool RegUndef,
1040 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1041 bool OffKill, bool OffUndef,
1042 ARMCC::CondCodes Pred, unsigned PredReg,
1043 const TargetInstrInfo *TII, bool isT2) {
1045 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1047 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1048 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1049 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1051 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1053 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1054 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1055 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1059 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1060 MachineBasicBlock::iterator &MBBI) {
1061 MachineInstr *MI = &*MBBI;
1062 unsigned Opcode = MI->getOpcode();
1063 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1064 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1065 unsigned EvenReg = MI->getOperand(0).getReg();
1066 unsigned OddReg = MI->getOperand(1).getReg();
1067 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1068 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1069 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
1072 MachineBasicBlock::iterator NewBBI = MBBI;
1073 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1074 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1075 bool EvenDeadKill = isLd ?
1076 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1077 bool EvenUndef = MI->getOperand(0).isUndef();
1078 bool OddDeadKill = isLd ?
1079 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1080 bool OddUndef = MI->getOperand(1).isUndef();
1081 const MachineOperand &BaseOp = MI->getOperand(2);
1082 unsigned BaseReg = BaseOp.getReg();
1083 bool BaseKill = BaseOp.isKill();
1084 bool BaseUndef = BaseOp.isUndef();
1085 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1086 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1087 int OffImm = getMemoryOpOffset(MI);
1088 unsigned PredReg = 0;
1089 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1091 if (OddRegNum > EvenRegNum && OffImm == 0) {
1092 // Ascending register numbers and no offset. It's safe to change it to a
1094 unsigned NewOpc = (isLd)
1095 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1096 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1098 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1099 .addReg(BaseReg, getKillRegState(BaseKill))
1100 .addImm(Pred).addReg(PredReg)
1101 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1102 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1105 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1106 .addReg(BaseReg, getKillRegState(BaseKill))
1107 .addImm(Pred).addReg(PredReg)
1109 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1111 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1114 NewBBI = llvm::prior(MBBI);
1116 // Split into two instructions.
1117 unsigned NewOpc = (isLd)
1118 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1119 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1120 DebugLoc dl = MBBI->getDebugLoc();
1121 // If this is a load and base register is killed, it may have been
1122 // re-defed by the load, make sure the first load does not clobber it.
1124 (BaseKill || OffKill) &&
1125 (TRI->regsOverlap(EvenReg, BaseReg))) {
1126 assert(!TRI->regsOverlap(OddReg, BaseReg));
1127 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1128 OddReg, OddDeadKill, false,
1129 BaseReg, false, BaseUndef, false, OffUndef,
1130 Pred, PredReg, TII, isT2);
1131 NewBBI = llvm::prior(MBBI);
1132 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1133 EvenReg, EvenDeadKill, false,
1134 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1135 Pred, PredReg, TII, isT2);
1137 if (OddReg == EvenReg && EvenDeadKill) {
1138 // If the two source operands are the same, the kill marker is
1139 // probably on the first one. e.g.
1140 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1141 EvenDeadKill = false;
1144 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1145 EvenReg, EvenDeadKill, EvenUndef,
1146 BaseReg, false, BaseUndef, false, OffUndef,
1147 Pred, PredReg, TII, isT2);
1148 NewBBI = llvm::prior(MBBI);
1149 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1150 OddReg, OddDeadKill, OddUndef,
1151 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1152 Pred, PredReg, TII, isT2);
1167 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1168 /// ops of the same base and incrementing offset into LDM / STM ops.
1169 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1170 unsigned NumMerges = 0;
1171 unsigned NumMemOps = 0;
1173 unsigned CurrBase = 0;
1175 unsigned CurrSize = 0;
1176 ARMCC::CondCodes CurrPred = ARMCC::AL;
1177 unsigned CurrPredReg = 0;
1178 unsigned Position = 0;
1179 SmallVector<MachineBasicBlock::iterator,4> Merges;
1181 RS->enterBasicBlock(&MBB);
1182 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1184 if (FixInvalidRegPairOp(MBB, MBBI))
1187 bool Advance = false;
1188 bool TryMerge = false;
1189 bool Clobber = false;
1191 bool isMemOp = isMemoryOp(MBBI);
1193 int Opcode = MBBI->getOpcode();
1194 unsigned Size = getLSMultipleTransferSize(MBBI);
1195 const MachineOperand &MO = MBBI->getOperand(0);
1196 unsigned Reg = MO.getReg();
1197 bool isKill = MO.isDef() ? false : MO.isKill();
1198 unsigned Base = MBBI->getOperand(1).getReg();
1199 unsigned PredReg = 0;
1200 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1201 int Offset = getMemoryOpOffset(MBBI);
1204 // r5 := ldr [r5, #4]
1205 // r6 := ldr [r5, #8]
1207 // The second ldr has effectively broken the chain even though it
1208 // looks like the later ldr(s) use the same base register. Try to
1209 // merge the ldr's so far, including this one. But don't try to
1210 // combine the following ldr(s).
1211 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1212 if (CurrBase == 0 && !Clobber) {
1213 // Start of a new chain.
1218 CurrPredReg = PredReg;
1219 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1228 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1229 // No need to match PredReg.
1230 // Continue adding to the queue.
1231 if (Offset > MemOps.back().Offset) {
1232 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1237 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1239 if (Offset < I->Offset) {
1240 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1245 } else if (Offset == I->Offset) {
1246 // Collision! This can't be merged!
1255 if (MBBI->isDebugValue()) {
1258 // Reach the end of the block, try merging the memory instructions.
1260 } else if (Advance) {
1264 // Reach the end of the block, try merging the memory instructions.
1270 if (NumMemOps > 1) {
1271 // Try to find a free register to use as a new base in case it's needed.
1272 // First advance to the instruction just before the start of the chain.
1273 AdvanceRS(MBB, MemOps);
1274 // Find a scratch register.
1275 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1276 // Process the load / store instructions.
1277 RS->forward(prior(MBBI));
1281 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1282 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1284 // Try folding preceeding/trailing base inc/dec into the generated
1286 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1287 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1289 NumMerges += Merges.size();
1291 // Try folding preceeding/trailing base inc/dec into those load/store
1292 // that were not merged to form LDM/STM ops.
1293 for (unsigned i = 0; i != NumMemOps; ++i)
1294 if (!MemOps[i].Merged)
1295 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1298 // RS may be pointing to an instruction that's deleted.
1299 RS->skipTo(prior(MBBI));
1300 } else if (NumMemOps == 1) {
1301 // Try folding preceeding/trailing base inc/dec into the single
1303 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1305 RS->forward(prior(MBBI));
1312 CurrPred = ARMCC::AL;
1319 // If iterator hasn't been advanced and this is not a memory op, skip it.
1320 // It can't start a new chain anyway.
1321 if (!Advance && !isMemOp && MBBI != E) {
1327 return NumMerges > 0;
1331 struct OffsetCompare {
1332 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1333 int LOffset = getMemoryOpOffset(LHS);
1334 int ROffset = getMemoryOpOffset(RHS);
1335 assert(LHS == RHS || LOffset != ROffset);
1336 return LOffset > ROffset;
1341 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1342 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1343 /// directly restore the value of LR into pc.
1344 /// ldmfd sp!, {..., lr}
1347 /// ldmfd sp!, {..., lr}
1350 /// ldmfd sp!, {..., pc}
1351 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1352 if (MBB.empty()) return false;
1354 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1355 if (MBBI != MBB.begin() &&
1356 (MBBI->getOpcode() == ARM::BX_RET ||
1357 MBBI->getOpcode() == ARM::tBX_RET ||
1358 MBBI->getOpcode() == ARM::MOVPCLR)) {
1359 MachineInstr *PrevMI = prior(MBBI);
1360 unsigned Opcode = PrevMI->getOpcode();
1361 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1362 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1363 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1364 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1365 if (MO.getReg() != ARM::LR)
1367 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1368 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1369 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1370 PrevMI->setDesc(TII->get(NewOpc));
1372 PrevMI->copyImplicitOps(&*MBBI);
1380 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1381 const TargetMachine &TM = Fn.getTarget();
1382 AFI = Fn.getInfo<ARMFunctionInfo>();
1383 TII = TM.getInstrInfo();
1384 TRI = TM.getRegisterInfo();
1385 RS = new RegScavenger();
1386 isThumb2 = AFI->isThumb2Function();
1388 bool Modified = false;
1389 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1391 MachineBasicBlock &MBB = *MFI;
1392 Modified |= LoadStoreMultipleOpti(MBB);
1393 Modified |= MergeReturnIntoLDM(MBB);
1401 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1402 /// load / stores from consecutive locations close to make it more
1403 /// likely they will be combined later.
1406 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1408 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1410 const TargetData *TD;
1411 const TargetInstrInfo *TII;
1412 const TargetRegisterInfo *TRI;
1413 const ARMSubtarget *STI;
1414 MachineRegisterInfo *MRI;
1415 MachineFunction *MF;
1417 virtual bool runOnMachineFunction(MachineFunction &Fn);
1419 virtual const char *getPassName() const {
1420 return "ARM pre- register allocation load / store optimization pass";
1424 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1425 unsigned &NewOpc, unsigned &EvenReg,
1426 unsigned &OddReg, unsigned &BaseReg,
1428 unsigned &PredReg, ARMCC::CondCodes &Pred,
1430 bool RescheduleOps(MachineBasicBlock *MBB,
1431 SmallVector<MachineInstr*, 4> &Ops,
1432 unsigned Base, bool isLd,
1433 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1434 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1436 char ARMPreAllocLoadStoreOpt::ID = 0;
1439 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1440 TD = Fn.getTarget().getTargetData();
1441 TII = Fn.getTarget().getInstrInfo();
1442 TRI = Fn.getTarget().getRegisterInfo();
1443 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1444 MRI = &Fn.getRegInfo();
1447 bool Modified = false;
1448 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1450 Modified |= RescheduleLoadStoreInstrs(MFI);
1455 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1456 MachineBasicBlock::iterator I,
1457 MachineBasicBlock::iterator E,
1458 SmallPtrSet<MachineInstr*, 4> &MemOps,
1459 SmallSet<unsigned, 4> &MemRegs,
1460 const TargetRegisterInfo *TRI) {
1461 // Are there stores / loads / calls between them?
1462 // FIXME: This is overly conservative. We should make use of alias information
1464 SmallSet<unsigned, 4> AddedRegPressure;
1466 if (I->isDebugValue() || MemOps.count(&*I))
1468 const TargetInstrDesc &TID = I->getDesc();
1469 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1471 if (isLd && TID.mayStore())
1476 // It's not safe to move the first 'str' down.
1479 // str r4, [r0, #+4]
1483 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1484 MachineOperand &MO = I->getOperand(j);
1487 unsigned Reg = MO.getReg();
1488 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1490 if (Reg != Base && !MemRegs.count(Reg))
1491 AddedRegPressure.insert(Reg);
1495 // Estimate register pressure increase due to the transformation.
1496 if (MemRegs.size() <= 4)
1497 // Ok if we are moving small number of instructions.
1499 return AddedRegPressure.size() <= MemRegs.size() * 2;
1503 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1505 unsigned &NewOpc, unsigned &EvenReg,
1506 unsigned &OddReg, unsigned &BaseReg,
1507 int &Offset, unsigned &PredReg,
1508 ARMCC::CondCodes &Pred,
1510 // Make sure we're allowed to generate LDRD/STRD.
1511 if (!STI->hasV5TEOps())
1514 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1516 unsigned Opcode = Op0->getOpcode();
1517 if (Opcode == ARM::LDRi12)
1519 else if (Opcode == ARM::STRi12)
1521 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1522 NewOpc = ARM::t2LDRDi8;
1525 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1526 NewOpc = ARM::t2STRDi8;
1532 // Make sure the base address satisfies i64 ld / st alignment requirement.
1533 if (!Op0->hasOneMemOperand() ||
1534 !(*Op0->memoperands_begin())->getValue() ||
1535 (*Op0->memoperands_begin())->isVolatile())
1538 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1539 const Function *Func = MF->getFunction();
1540 unsigned ReqAlign = STI->hasV6Ops()
1541 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1542 : 8; // Pre-v6 need 8-byte align
1543 if (Align < ReqAlign)
1546 // Then make sure the immediate offset fits.
1547 int OffImm = getMemoryOpOffset(Op0);
1551 // Can't fall back to t2LDRi8 / t2STRi8.
1554 int Limit = (1 << 8) * Scale;
1555 if (OffImm >= Limit || (OffImm & (Scale-1)))
1560 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1562 AddSub = ARM_AM::sub;
1565 int Limit = (1 << 8) * Scale;
1566 if (OffImm >= Limit || (OffImm & (Scale-1)))
1568 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1570 EvenReg = Op0->getOperand(0).getReg();
1571 OddReg = Op1->getOperand(0).getReg();
1572 if (EvenReg == OddReg)
1574 BaseReg = Op0->getOperand(1).getReg();
1575 Pred = llvm::getInstrPredicate(Op0, PredReg);
1576 dl = Op0->getDebugLoc();
1580 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1581 SmallVector<MachineInstr*, 4> &Ops,
1582 unsigned Base, bool isLd,
1583 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1584 bool RetVal = false;
1586 // Sort by offset (in reverse order).
1587 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1589 // The loads / stores of the same base are in order. Scan them from first to
1590 // last and check for the following:
1591 // 1. Any def of base.
1593 while (Ops.size() > 1) {
1594 unsigned FirstLoc = ~0U;
1595 unsigned LastLoc = 0;
1596 MachineInstr *FirstOp = 0;
1597 MachineInstr *LastOp = 0;
1599 unsigned LastOpcode = 0;
1600 unsigned LastBytes = 0;
1601 unsigned NumMove = 0;
1602 for (int i = Ops.size() - 1; i >= 0; --i) {
1603 MachineInstr *Op = Ops[i];
1604 unsigned Loc = MI2LocMap[Op];
1605 if (Loc <= FirstLoc) {
1609 if (Loc >= LastLoc) {
1614 unsigned Opcode = Op->getOpcode();
1615 if (LastOpcode && Opcode != LastOpcode)
1618 int Offset = getMemoryOpOffset(Op);
1619 unsigned Bytes = getLSMultipleTransferSize(Op);
1621 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1624 LastOffset = Offset;
1626 LastOpcode = Opcode;
1627 if (++NumMove == 8) // FIXME: Tune this limit.
1634 SmallPtrSet<MachineInstr*, 4> MemOps;
1635 SmallSet<unsigned, 4> MemRegs;
1636 for (int i = NumMove-1; i >= 0; --i) {
1637 MemOps.insert(Ops[i]);
1638 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1641 // Be conservative, if the instructions are too far apart, don't
1642 // move them. We want to limit the increase of register pressure.
1643 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1645 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1646 MemOps, MemRegs, TRI);
1648 for (unsigned i = 0; i != NumMove; ++i)
1651 // This is the new location for the loads / stores.
1652 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1653 while (InsertPos != MBB->end()
1654 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1657 // If we are moving a pair of loads / stores, see if it makes sense
1658 // to try to allocate a pair of registers that can form register pairs.
1659 MachineInstr *Op0 = Ops.back();
1660 MachineInstr *Op1 = Ops[Ops.size()-2];
1661 unsigned EvenReg = 0, OddReg = 0;
1662 unsigned BaseReg = 0, PredReg = 0;
1663 ARMCC::CondCodes Pred = ARMCC::AL;
1665 unsigned NewOpc = 0;
1668 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1669 EvenReg, OddReg, BaseReg,
1670 Offset, PredReg, Pred, isT2)) {
1674 // Form the pair instruction.
1676 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1677 dl, TII->get(NewOpc))
1678 .addReg(EvenReg, RegState::Define)
1679 .addReg(OddReg, RegState::Define)
1681 // FIXME: We're converting from LDRi12 to an insn that still
1682 // uses addrmode2, so we need an explicit offset reg. It should
1683 // always by reg0 since we're transforming LDRi12s.
1686 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1689 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1690 dl, TII->get(NewOpc))
1694 // FIXME: We're converting from LDRi12 to an insn that still
1695 // uses addrmode2, so we need an explicit offset reg. It should
1696 // always by reg0 since we're transforming STRi12s.
1699 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1705 // Add register allocation hints to form register pairs.
1706 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1707 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1709 for (unsigned i = 0; i != NumMove; ++i) {
1710 MachineInstr *Op = Ops.back();
1712 MBB->splice(InsertPos, MBB, Op);
1716 NumLdStMoved += NumMove;
1726 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1727 bool RetVal = false;
1729 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1730 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1731 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1732 SmallVector<unsigned, 4> LdBases;
1733 SmallVector<unsigned, 4> StBases;
1736 MachineBasicBlock::iterator MBBI = MBB->begin();
1737 MachineBasicBlock::iterator E = MBB->end();
1739 for (; MBBI != E; ++MBBI) {
1740 MachineInstr *MI = MBBI;
1741 const TargetInstrDesc &TID = MI->getDesc();
1742 if (TID.isCall() || TID.isTerminator()) {
1743 // Stop at barriers.
1748 if (!MI->isDebugValue())
1749 MI2LocMap[MI] = ++Loc;
1751 if (!isMemoryOp(MI))
1753 unsigned PredReg = 0;
1754 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1757 int Opc = MI->getOpcode();
1758 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1759 unsigned Base = MI->getOperand(1).getReg();
1760 int Offset = getMemoryOpOffset(MI);
1762 bool StopHere = false;
1764 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1765 Base2LdsMap.find(Base);
1766 if (BI != Base2LdsMap.end()) {
1767 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1768 if (Offset == getMemoryOpOffset(BI->second[i])) {
1774 BI->second.push_back(MI);
1776 SmallVector<MachineInstr*, 4> MIs;
1778 Base2LdsMap[Base] = MIs;
1779 LdBases.push_back(Base);
1782 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1783 Base2StsMap.find(Base);
1784 if (BI != Base2StsMap.end()) {
1785 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1786 if (Offset == getMemoryOpOffset(BI->second[i])) {
1792 BI->second.push_back(MI);
1794 SmallVector<MachineInstr*, 4> MIs;
1796 Base2StsMap[Base] = MIs;
1797 StBases.push_back(Base);
1802 // Found a duplicate (a base+offset combination that's seen earlier).
1809 // Re-schedule loads.
1810 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1811 unsigned Base = LdBases[i];
1812 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1814 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1817 // Re-schedule stores.
1818 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1819 unsigned Base = StBases[i];
1820 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1822 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1826 Base2LdsMap.clear();
1827 Base2StsMap.clear();
1837 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1838 /// optimization pass.
1839 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1841 return new ARMPreAllocLoadStoreOpt();
1842 return new ARMLoadStoreOpt();