1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
80 MachineBasicBlock::iterator MBBI;
82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
83 MachineBasicBlock::iterator i)
84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
87 typedef MemOpQueue::iterator MemOpQueueIter;
89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
90 int Offset, unsigned Base, bool BaseKill, int Opcode,
91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
93 void MergeOpsUpdate(MachineBasicBlock &MBB,
102 ARMCC::CondCodes Pred,
106 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
108 int Opcode, unsigned Size,
109 ARMCC::CondCodes Pred, unsigned PredReg,
110 unsigned Scratch, MemOpQueue &MemOps,
111 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
115 MachineBasicBlock::iterator &MBBI);
116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
117 MachineBasicBlock::iterator MBBI,
118 const TargetInstrInfo *TII,
120 MachineBasicBlock::iterator &I);
121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator MBBI,
124 MachineBasicBlock::iterator &I);
125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128 char ARMLoadStoreOpt::ID = 0;
131 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
133 default: llvm_unreachable("Unhandled opcode!");
137 default: llvm_unreachable("Unhandled submode!");
138 case ARM_AM::ia: return ARM::LDMIA;
139 case ARM_AM::da: return ARM::LDMDA;
140 case ARM_AM::db: return ARM::LDMDB;
141 case ARM_AM::ib: return ARM::LDMIB;
147 default: llvm_unreachable("Unhandled submode!");
148 case ARM_AM::ia: return ARM::STMIA;
149 case ARM_AM::da: return ARM::STMDA;
150 case ARM_AM::db: return ARM::STMDB;
151 case ARM_AM::ib: return ARM::STMIB;
158 default: llvm_unreachable("Unhandled submode!");
159 case ARM_AM::ia: return ARM::t2LDMIA;
160 case ARM_AM::db: return ARM::t2LDMDB;
167 default: llvm_unreachable("Unhandled submode!");
168 case ARM_AM::ia: return ARM::t2STMIA;
169 case ARM_AM::db: return ARM::t2STMDB;
175 default: llvm_unreachable("Unhandled submode!");
176 case ARM_AM::ia: return ARM::VLDMSIA;
177 case ARM_AM::db: return ARM::VLDMSDB;
183 default: llvm_unreachable("Unhandled submode!");
184 case ARM_AM::ia: return ARM::VSTMSIA;
185 case ARM_AM::db: return ARM::VSTMSDB;
191 default: llvm_unreachable("Unhandled submode!");
192 case ARM_AM::ia: return ARM::VLDMDIA;
193 case ARM_AM::db: return ARM::VLDMDDB;
199 default: llvm_unreachable("Unhandled submode!");
200 case ARM_AM::ia: return ARM::VSTMDIA;
201 case ARM_AM::db: return ARM::VSTMDDB;
209 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
211 default: llvm_unreachable("Unhandled opcode!");
241 return ARM_AM::bad_am_submode;
244 static bool isT2i32Load(unsigned Opc) {
245 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
248 static bool isi32Load(unsigned Opc) {
249 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
252 static bool isT2i32Store(unsigned Opc) {
253 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
256 static bool isi32Store(unsigned Opc) {
257 return Opc == ARM::STRi12 || isT2i32Store(Opc);
260 /// MergeOps - Create and insert a LDM or STM with Base as base register and
261 /// registers in Regs as the register operands that would be loaded / stored.
262 /// It returns true if the transformation is done.
264 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
265 MachineBasicBlock::iterator MBBI,
266 int Offset, unsigned Base, bool BaseKill,
267 int Opcode, ARMCC::CondCodes Pred,
268 unsigned PredReg, unsigned Scratch, DebugLoc dl,
269 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
270 // Only a single register to load / store. Don't bother.
271 unsigned NumRegs = Regs.size();
275 ARM_AM::AMSubMode Mode = ARM_AM::ia;
276 // VFP and Thumb2 do not support IB or DA modes.
277 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
278 bool haveIBAndDA = isNotVFP && !isThumb2;
279 if (Offset == 4 && haveIBAndDA)
281 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
283 else if (Offset == -4 * (int)NumRegs && isNotVFP)
284 // VLDM/VSTM do not support DB mode without also updating the base reg.
286 else if (Offset != 0) {
287 // If starting offset isn't zero, insert a MI to materialize a new base.
288 // But only do so if it is cost effective, i.e. merging more than two
294 if (isi32Load(Opcode))
295 // If it is a load, then just use one of the destination register to
296 // use as the new base.
297 NewBase = Regs[NumRegs-1].first;
299 // Use the scratch register to use as a new base.
304 int BaseOpc = !isThumb2
306 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
310 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
313 int ImmedOffset = isThumb2
314 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
315 if (ImmedOffset == -1)
316 // FIXME: Try t2ADDri12 or t2SUBri12?
317 return false; // Probably not worth it then.
319 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
320 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
321 .addImm(Pred).addReg(PredReg).addReg(0);
323 BaseKill = true; // New base is always killed right its use.
326 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
327 Opcode == ARM::VLDRD);
328 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
329 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
330 .addReg(Base, getKillRegState(BaseKill))
331 .addImm(Pred).addReg(PredReg);
332 for (unsigned i = 0; i != NumRegs; ++i)
333 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
334 | getKillRegState(Regs[i].second));
339 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
341 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
343 unsigned memOpsBegin, unsigned memOpsEnd,
344 unsigned insertAfter, int Offset,
345 unsigned Base, bool BaseKill,
347 ARMCC::CondCodes Pred, unsigned PredReg,
350 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
351 // First calculate which of the registers should be killed by the merged
353 const unsigned insertPos = memOps[insertAfter].Position;
355 SmallSet<unsigned, 4> UnavailRegs;
356 SmallSet<unsigned, 4> KilledRegs;
357 DenseMap<unsigned, unsigned> Killer;
358 for (unsigned i = 0; i < memOpsBegin; ++i) {
359 if (memOps[i].Position < insertPos && memOps[i].isKill) {
360 unsigned Reg = memOps[i].Reg;
361 if (memOps[i].Merged)
362 UnavailRegs.insert(Reg);
364 KilledRegs.insert(Reg);
369 for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
370 if (memOps[i].Position < insertPos && memOps[i].isKill) {
371 unsigned Reg = memOps[i].Reg;
372 KilledRegs.insert(Reg);
377 SmallVector<std::pair<unsigned, bool>, 8> Regs;
378 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
379 unsigned Reg = memOps[i].Reg;
380 if (UnavailRegs.count(Reg))
381 // Register is killed before and it's not easy / possible to update the
382 // kill marker on already merged instructions. Abort.
385 // If we are inserting the merged operation after an unmerged operation that
386 // uses the same register, make sure to transfer any kill flag.
387 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
388 Regs.push_back(std::make_pair(Reg, isKill));
391 // Try to do the merge.
392 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
394 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
395 Pred, PredReg, Scratch, dl, Regs))
398 // Merge succeeded, update records.
399 Merges.push_back(prior(Loc));
400 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
401 // Remove kill flags from any unmerged memops that come before insertPos.
402 if (Regs[i-memOpsBegin].second) {
403 unsigned Reg = Regs[i-memOpsBegin].first;
404 if (KilledRegs.count(Reg)) {
405 unsigned j = Killer[Reg];
406 memOps[j].MBBI->getOperand(0).setIsKill(false);
407 memOps[j].isKill = false;
410 MBB.erase(memOps[i].MBBI);
411 memOps[i].Merged = true;
415 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
416 /// load / store multiple instructions.
418 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
419 unsigned Base, int Opcode, unsigned Size,
420 ARMCC::CondCodes Pred, unsigned PredReg,
421 unsigned Scratch, MemOpQueue &MemOps,
422 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
423 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
424 int Offset = MemOps[SIndex].Offset;
425 int SOffset = Offset;
426 unsigned insertAfter = SIndex;
427 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
428 DebugLoc dl = Loc->getDebugLoc();
429 const MachineOperand &PMO = Loc->getOperand(0);
430 unsigned PReg = PMO.getReg();
431 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
432 : getARMRegisterNumbering(PReg);
435 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
436 int NewOffset = MemOps[i].Offset;
437 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
438 unsigned Reg = MO.getReg();
439 unsigned RegNum = MO.isUndef() ? UINT_MAX
440 : getARMRegisterNumbering(Reg);
441 // Register numbers must be in ascending order. For VFP, the registers
442 // must also be consecutive and there is a limit of 16 double-word
443 // registers per instruction.
444 if (Reg != ARM::SP &&
445 NewOffset == Offset + (int)Size &&
446 ((isNotVFP && RegNum > PRegNum)
447 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
452 // Can't merge this in. Try merge the earlier ones first.
453 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
454 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
455 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
460 if (MemOps[i].Position > MemOps[insertAfter].Position)
464 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
465 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
466 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
470 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
471 unsigned Bytes, unsigned Limit,
472 ARMCC::CondCodes Pred, unsigned PredReg){
473 unsigned MyPredReg = 0;
476 if (MI->getOpcode() != ARM::t2SUBri &&
477 MI->getOpcode() != ARM::t2SUBrSPi &&
478 MI->getOpcode() != ARM::t2SUBrSPi12 &&
479 MI->getOpcode() != ARM::tSUBspi &&
480 MI->getOpcode() != ARM::SUBri)
483 // Make sure the offset fits in 8 bits.
484 if (Bytes == 0 || (Limit && Bytes >= Limit))
487 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
488 return (MI->getOperand(0).getReg() == Base &&
489 MI->getOperand(1).getReg() == Base &&
490 (MI->getOperand(2).getImm()*Scale) == Bytes &&
491 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
492 MyPredReg == PredReg);
495 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
496 unsigned Bytes, unsigned Limit,
497 ARMCC::CondCodes Pred, unsigned PredReg){
498 unsigned MyPredReg = 0;
501 if (MI->getOpcode() != ARM::t2ADDri &&
502 MI->getOpcode() != ARM::t2ADDrSPi &&
503 MI->getOpcode() != ARM::t2ADDrSPi12 &&
504 MI->getOpcode() != ARM::tADDspi &&
505 MI->getOpcode() != ARM::ADDri)
508 if (Bytes == 0 || (Limit && Bytes >= Limit))
509 // Make sure the offset fits in 8 bits.
512 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
513 return (MI->getOperand(0).getReg() == Base &&
514 MI->getOperand(1).getReg() == Base &&
515 (MI->getOperand(2).getImm()*Scale) == Bytes &&
516 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
517 MyPredReg == PredReg);
520 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
521 switch (MI->getOpcode()) {
551 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
556 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
560 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
561 ARM_AM::AMSubMode Mode) {
563 default: llvm_unreachable("Unhandled opcode!");
569 default: llvm_unreachable("Unhandled submode!");
570 case ARM_AM::ia: return ARM::LDMIA_UPD;
571 case ARM_AM::ib: return ARM::LDMIB_UPD;
572 case ARM_AM::da: return ARM::LDMDA_UPD;
573 case ARM_AM::db: return ARM::LDMDB_UPD;
581 default: llvm_unreachable("Unhandled submode!");
582 case ARM_AM::ia: return ARM::STMIA_UPD;
583 case ARM_AM::ib: return ARM::STMIB_UPD;
584 case ARM_AM::da: return ARM::STMDA_UPD;
585 case ARM_AM::db: return ARM::STMDB_UPD;
591 default: llvm_unreachable("Unhandled submode!");
592 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
593 case ARM_AM::db: return ARM::t2LDMDB_UPD;
599 default: llvm_unreachable("Unhandled submode!");
600 case ARM_AM::ia: return ARM::t2STMIA_UPD;
601 case ARM_AM::db: return ARM::t2STMDB_UPD;
607 default: llvm_unreachable("Unhandled submode!");
608 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
609 case ARM_AM::db: return ARM::VLDMSDB_UPD;
615 default: llvm_unreachable("Unhandled submode!");
616 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
617 case ARM_AM::db: return ARM::VLDMDDB_UPD;
623 default: llvm_unreachable("Unhandled submode!");
624 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
625 case ARM_AM::db: return ARM::VSTMSDB_UPD;
631 default: llvm_unreachable("Unhandled submode!");
632 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
633 case ARM_AM::db: return ARM::VSTMDDB_UPD;
641 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
642 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
644 /// stmia rn, <ra, rb, rc>
645 /// rn := rn + 4 * 3;
647 /// stmia rn!, <ra, rb, rc>
649 /// rn := rn - 4 * 3;
650 /// ldmia rn, <ra, rb, rc>
652 /// ldmdb rn!, <ra, rb, rc>
653 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
654 MachineBasicBlock::iterator MBBI,
656 MachineBasicBlock::iterator &I) {
657 MachineInstr *MI = MBBI;
658 unsigned Base = MI->getOperand(0).getReg();
659 bool BaseKill = MI->getOperand(0).isKill();
660 unsigned Bytes = getLSMultipleTransferSize(MI);
661 unsigned PredReg = 0;
662 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
663 int Opcode = MI->getOpcode();
664 DebugLoc dl = MI->getDebugLoc();
666 // Can't use an updating ld/st if the base register is also a dest
667 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
668 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
669 if (MI->getOperand(i).getReg() == Base)
672 bool DoMerge = false;
673 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
675 // Try merging with the previous instruction.
676 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
677 if (MBBI != BeginMBBI) {
678 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
679 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
681 if (Mode == ARM_AM::ia &&
682 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
685 } else if (Mode == ARM_AM::ib &&
686 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
694 // Try merging with the next instruction.
695 MachineBasicBlock::iterator EndMBBI = MBB.end();
696 if (!DoMerge && MBBI != EndMBBI) {
697 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
698 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
700 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
701 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
703 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
704 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
719 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
720 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
721 .addReg(Base, getDefRegState(true)) // WB base register
722 .addReg(Base, getKillRegState(BaseKill))
723 .addImm(Pred).addReg(PredReg);
725 // Transfer the rest of operands.
726 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
727 MIB.addOperand(MI->getOperand(OpNum));
729 // Transfer memoperands.
730 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
736 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
737 ARM_AM::AddrOpc Mode) {
744 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
746 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
748 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
750 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
753 return ARM::t2LDR_PRE;
756 return ARM::t2STR_PRE;
757 default: llvm_unreachable("Unhandled opcode!");
762 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
763 ARM_AM::AddrOpc Mode) {
766 return ARM::LDR_POST;
768 return ARM::STR_POST;
770 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
772 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
774 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
776 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
779 return ARM::t2LDR_POST;
782 return ARM::t2STR_POST;
783 default: llvm_unreachable("Unhandled opcode!");
788 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
789 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
790 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
791 MachineBasicBlock::iterator MBBI,
792 const TargetInstrInfo *TII,
794 MachineBasicBlock::iterator &I) {
795 MachineInstr *MI = MBBI;
796 unsigned Base = MI->getOperand(1).getReg();
797 bool BaseKill = MI->getOperand(1).isKill();
798 unsigned Bytes = getLSMultipleTransferSize(MI);
799 int Opcode = MI->getOpcode();
800 DebugLoc dl = MI->getDebugLoc();
801 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
802 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
803 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
804 if (isi32Load(Opcode) || isi32Store(Opcode))
805 if (MI->getOperand(2).getImm() != 0)
807 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
810 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
811 // Can't do the merge if the destination register is the same as the would-be
812 // writeback register.
813 if (isLd && MI->getOperand(0).getReg() == Base)
816 unsigned PredReg = 0;
817 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
818 bool DoMerge = false;
819 ARM_AM::AddrOpc AddSub = ARM_AM::add;
821 // AM2 - 12 bits, thumb2 - 8 bits.
822 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
824 // Try merging with the previous instruction.
825 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
826 if (MBBI != BeginMBBI) {
827 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
828 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
830 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
832 AddSub = ARM_AM::sub;
834 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
838 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
843 // Try merging with the next instruction.
844 MachineBasicBlock::iterator EndMBBI = MBB.end();
845 if (!DoMerge && MBBI != EndMBBI) {
846 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
847 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
850 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
852 AddSub = ARM_AM::sub;
853 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
857 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
871 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
873 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
876 // VLDM[SD}_UPD, VSTM[SD]_UPD
877 // (There are no base-updating versions of VLDR/VSTR instructions, but the
878 // updating load/store-multiple instructions can be used with only one
880 MachineOperand &MO = MI->getOperand(0);
881 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
882 .addReg(Base, getDefRegState(true)) // WB base register
883 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
884 .addImm(Pred).addReg(PredReg)
885 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
886 getKillRegState(MO.isKill())));
889 // LDR_PRE, LDR_POST,
890 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
891 .addReg(Base, RegState::Define)
892 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
894 // t2LDR_PRE, t2LDR_POST
895 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
896 .addReg(Base, RegState::Define)
897 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
899 MachineOperand &MO = MI->getOperand(0);
902 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
903 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
904 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
906 // t2STR_PRE, t2STR_POST
907 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
908 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
909 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
916 /// isMemoryOp - Returns true if instruction is a memory operations (that this
917 /// pass is capable of operating on).
918 static bool isMemoryOp(const MachineInstr *MI) {
919 // When no memory operands are present, conservatively assume unaligned,
920 // volatile, unfoldable.
921 if (!MI->hasOneMemOperand())
924 const MachineMemOperand *MMO = *MI->memoperands_begin();
926 // Don't touch volatile memory accesses - we may be changing their order.
927 if (MMO->isVolatile())
930 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
932 if (MMO->getAlignment() < 4)
935 // str <undef> could probably be eliminated entirely, but for now we just want
936 // to avoid making a mess of it.
937 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
938 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
939 MI->getOperand(0).isUndef())
942 // Likewise don't mess with references to undefined addresses.
943 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
944 MI->getOperand(1).isUndef())
947 int Opcode = MI->getOpcode();
952 return MI->getOperand(1).isReg();
955 return MI->getOperand(1).isReg();
962 return MI->getOperand(1).isReg();
967 /// AdvanceRS - Advance register scavenger to just before the earliest memory
968 /// op that is being merged.
969 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
970 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
971 unsigned Position = MemOps[0].Position;
972 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
973 if (MemOps[i].Position < Position) {
974 Position = MemOps[i].Position;
975 Loc = MemOps[i].MBBI;
979 if (Loc != MBB.begin())
980 RS->forward(prior(Loc));
983 static int getMemoryOpOffset(const MachineInstr *MI) {
984 int Opcode = MI->getOpcode();
985 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
986 unsigned NumOperands = MI->getDesc().getNumOperands();
987 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
989 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
990 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
991 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
992 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
995 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
996 : ARM_AM::getAM5Offset(OffField) * 4;
998 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1001 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1007 static void InsertLDR_STR(MachineBasicBlock &MBB,
1008 MachineBasicBlock::iterator &MBBI,
1009 int Offset, bool isDef,
1010 DebugLoc dl, unsigned NewOpc,
1011 unsigned Reg, bool RegDeadKill, bool RegUndef,
1012 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1013 bool OffKill, bool OffUndef,
1014 ARMCC::CondCodes Pred, unsigned PredReg,
1015 const TargetInstrInfo *TII, bool isT2) {
1017 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1019 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1020 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1021 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1023 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1025 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1026 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1027 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1031 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1032 MachineBasicBlock::iterator &MBBI) {
1033 MachineInstr *MI = &*MBBI;
1034 unsigned Opcode = MI->getOpcode();
1035 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1036 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1037 unsigned EvenReg = MI->getOperand(0).getReg();
1038 unsigned OddReg = MI->getOperand(1).getReg();
1039 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1040 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1041 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
1044 MachineBasicBlock::iterator NewBBI = MBBI;
1045 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1046 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1047 bool EvenDeadKill = isLd ?
1048 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1049 bool EvenUndef = MI->getOperand(0).isUndef();
1050 bool OddDeadKill = isLd ?
1051 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1052 bool OddUndef = MI->getOperand(1).isUndef();
1053 const MachineOperand &BaseOp = MI->getOperand(2);
1054 unsigned BaseReg = BaseOp.getReg();
1055 bool BaseKill = BaseOp.isKill();
1056 bool BaseUndef = BaseOp.isUndef();
1057 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1058 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1059 int OffImm = getMemoryOpOffset(MI);
1060 unsigned PredReg = 0;
1061 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1063 if (OddRegNum > EvenRegNum && OffImm == 0) {
1064 // Ascending register numbers and no offset. It's safe to change it to a
1066 unsigned NewOpc = (isLd)
1067 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1068 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1070 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1071 .addReg(BaseReg, getKillRegState(BaseKill))
1072 .addImm(Pred).addReg(PredReg)
1073 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1074 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1077 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1078 .addReg(BaseReg, getKillRegState(BaseKill))
1079 .addImm(Pred).addReg(PredReg)
1081 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1083 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1086 NewBBI = llvm::prior(MBBI);
1088 // Split into two instructions.
1089 unsigned NewOpc = (isLd)
1090 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1091 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1092 DebugLoc dl = MBBI->getDebugLoc();
1093 // If this is a load and base register is killed, it may have been
1094 // re-defed by the load, make sure the first load does not clobber it.
1096 (BaseKill || OffKill) &&
1097 (TRI->regsOverlap(EvenReg, BaseReg))) {
1098 assert(!TRI->regsOverlap(OddReg, BaseReg));
1099 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1100 OddReg, OddDeadKill, false,
1101 BaseReg, false, BaseUndef, false, OffUndef,
1102 Pred, PredReg, TII, isT2);
1103 NewBBI = llvm::prior(MBBI);
1104 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1105 EvenReg, EvenDeadKill, false,
1106 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1107 Pred, PredReg, TII, isT2);
1109 if (OddReg == EvenReg && EvenDeadKill) {
1110 // If the two source operands are the same, the kill marker is
1111 // probably on the first one. e.g.
1112 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1113 EvenDeadKill = false;
1116 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1117 EvenReg, EvenDeadKill, EvenUndef,
1118 BaseReg, false, BaseUndef, false, OffUndef,
1119 Pred, PredReg, TII, isT2);
1120 NewBBI = llvm::prior(MBBI);
1121 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1122 OddReg, OddDeadKill, OddUndef,
1123 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1124 Pred, PredReg, TII, isT2);
1139 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1140 /// ops of the same base and incrementing offset into LDM / STM ops.
1141 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1142 unsigned NumMerges = 0;
1143 unsigned NumMemOps = 0;
1145 unsigned CurrBase = 0;
1147 unsigned CurrSize = 0;
1148 ARMCC::CondCodes CurrPred = ARMCC::AL;
1149 unsigned CurrPredReg = 0;
1150 unsigned Position = 0;
1151 SmallVector<MachineBasicBlock::iterator,4> Merges;
1153 RS->enterBasicBlock(&MBB);
1154 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1156 if (FixInvalidRegPairOp(MBB, MBBI))
1159 bool Advance = false;
1160 bool TryMerge = false;
1161 bool Clobber = false;
1163 bool isMemOp = isMemoryOp(MBBI);
1165 int Opcode = MBBI->getOpcode();
1166 unsigned Size = getLSMultipleTransferSize(MBBI);
1167 const MachineOperand &MO = MBBI->getOperand(0);
1168 unsigned Reg = MO.getReg();
1169 bool isKill = MO.isDef() ? false : MO.isKill();
1170 unsigned Base = MBBI->getOperand(1).getReg();
1171 unsigned PredReg = 0;
1172 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1173 int Offset = getMemoryOpOffset(MBBI);
1176 // r5 := ldr [r5, #4]
1177 // r6 := ldr [r5, #8]
1179 // The second ldr has effectively broken the chain even though it
1180 // looks like the later ldr(s) use the same base register. Try to
1181 // merge the ldr's so far, including this one. But don't try to
1182 // combine the following ldr(s).
1183 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1184 if (CurrBase == 0 && !Clobber) {
1185 // Start of a new chain.
1190 CurrPredReg = PredReg;
1191 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1200 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1201 // No need to match PredReg.
1202 // Continue adding to the queue.
1203 if (Offset > MemOps.back().Offset) {
1204 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1209 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1211 if (Offset < I->Offset) {
1212 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1217 } else if (Offset == I->Offset) {
1218 // Collision! This can't be merged!
1227 if (MBBI->isDebugValue()) {
1230 // Reach the end of the block, try merging the memory instructions.
1232 } else if (Advance) {
1236 // Reach the end of the block, try merging the memory instructions.
1242 if (NumMemOps > 1) {
1243 // Try to find a free register to use as a new base in case it's needed.
1244 // First advance to the instruction just before the start of the chain.
1245 AdvanceRS(MBB, MemOps);
1246 // Find a scratch register.
1247 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1248 // Process the load / store instructions.
1249 RS->forward(prior(MBBI));
1253 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1254 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1256 // Try folding preceeding/trailing base inc/dec into the generated
1258 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1259 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1261 NumMerges += Merges.size();
1263 // Try folding preceeding/trailing base inc/dec into those load/store
1264 // that were not merged to form LDM/STM ops.
1265 for (unsigned i = 0; i != NumMemOps; ++i)
1266 if (!MemOps[i].Merged)
1267 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1270 // RS may be pointing to an instruction that's deleted.
1271 RS->skipTo(prior(MBBI));
1272 } else if (NumMemOps == 1) {
1273 // Try folding preceeding/trailing base inc/dec into the single
1275 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1277 RS->forward(prior(MBBI));
1284 CurrPred = ARMCC::AL;
1291 // If iterator hasn't been advanced and this is not a memory op, skip it.
1292 // It can't start a new chain anyway.
1293 if (!Advance && !isMemOp && MBBI != E) {
1299 return NumMerges > 0;
1303 struct OffsetCompare {
1304 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1305 int LOffset = getMemoryOpOffset(LHS);
1306 int ROffset = getMemoryOpOffset(RHS);
1307 assert(LHS == RHS || LOffset != ROffset);
1308 return LOffset > ROffset;
1313 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1314 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1315 /// directly restore the value of LR into pc.
1316 /// ldmfd sp!, {..., lr}
1319 /// ldmfd sp!, {..., lr}
1322 /// ldmfd sp!, {..., pc}
1323 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1324 if (MBB.empty()) return false;
1326 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1327 if (MBBI != MBB.begin() &&
1328 (MBBI->getOpcode() == ARM::BX_RET ||
1329 MBBI->getOpcode() == ARM::tBX_RET ||
1330 MBBI->getOpcode() == ARM::MOVPCLR)) {
1331 MachineInstr *PrevMI = prior(MBBI);
1332 unsigned Opcode = PrevMI->getOpcode();
1333 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1334 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1335 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1336 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1337 if (MO.getReg() != ARM::LR)
1339 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1340 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1341 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1342 PrevMI->setDesc(TII->get(NewOpc));
1344 PrevMI->copyImplicitOps(&*MBBI);
1352 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1353 const TargetMachine &TM = Fn.getTarget();
1354 AFI = Fn.getInfo<ARMFunctionInfo>();
1355 TII = TM.getInstrInfo();
1356 TRI = TM.getRegisterInfo();
1357 RS = new RegScavenger();
1358 isThumb2 = AFI->isThumb2Function();
1360 bool Modified = false;
1361 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1363 MachineBasicBlock &MBB = *MFI;
1364 Modified |= LoadStoreMultipleOpti(MBB);
1365 Modified |= MergeReturnIntoLDM(MBB);
1373 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1374 /// load / stores from consecutive locations close to make it more
1375 /// likely they will be combined later.
1378 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1380 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1382 const TargetData *TD;
1383 const TargetInstrInfo *TII;
1384 const TargetRegisterInfo *TRI;
1385 const ARMSubtarget *STI;
1386 MachineRegisterInfo *MRI;
1387 MachineFunction *MF;
1389 virtual bool runOnMachineFunction(MachineFunction &Fn);
1391 virtual const char *getPassName() const {
1392 return "ARM pre- register allocation load / store optimization pass";
1396 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1397 unsigned &NewOpc, unsigned &EvenReg,
1398 unsigned &OddReg, unsigned &BaseReg,
1400 unsigned &PredReg, ARMCC::CondCodes &Pred,
1402 bool RescheduleOps(MachineBasicBlock *MBB,
1403 SmallVector<MachineInstr*, 4> &Ops,
1404 unsigned Base, bool isLd,
1405 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1406 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1408 char ARMPreAllocLoadStoreOpt::ID = 0;
1411 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1412 TD = Fn.getTarget().getTargetData();
1413 TII = Fn.getTarget().getInstrInfo();
1414 TRI = Fn.getTarget().getRegisterInfo();
1415 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1416 MRI = &Fn.getRegInfo();
1419 bool Modified = false;
1420 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1422 Modified |= RescheduleLoadStoreInstrs(MFI);
1427 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1428 MachineBasicBlock::iterator I,
1429 MachineBasicBlock::iterator E,
1430 SmallPtrSet<MachineInstr*, 4> &MemOps,
1431 SmallSet<unsigned, 4> &MemRegs,
1432 const TargetRegisterInfo *TRI) {
1433 // Are there stores / loads / calls between them?
1434 // FIXME: This is overly conservative. We should make use of alias information
1436 SmallSet<unsigned, 4> AddedRegPressure;
1438 if (I->isDebugValue() || MemOps.count(&*I))
1440 const TargetInstrDesc &TID = I->getDesc();
1441 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1443 if (isLd && TID.mayStore())
1448 // It's not safe to move the first 'str' down.
1451 // str r4, [r0, #+4]
1455 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1456 MachineOperand &MO = I->getOperand(j);
1459 unsigned Reg = MO.getReg();
1460 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1462 if (Reg != Base && !MemRegs.count(Reg))
1463 AddedRegPressure.insert(Reg);
1467 // Estimate register pressure increase due to the transformation.
1468 if (MemRegs.size() <= 4)
1469 // Ok if we are moving small number of instructions.
1471 return AddedRegPressure.size() <= MemRegs.size() * 2;
1475 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1477 unsigned &NewOpc, unsigned &EvenReg,
1478 unsigned &OddReg, unsigned &BaseReg,
1479 int &Offset, unsigned &PredReg,
1480 ARMCC::CondCodes &Pred,
1482 // Make sure we're allowed to generate LDRD/STRD.
1483 if (!STI->hasV5TEOps())
1486 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1488 unsigned Opcode = Op0->getOpcode();
1489 if (Opcode == ARM::LDRi12)
1491 else if (Opcode == ARM::STRi12)
1493 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1494 NewOpc = ARM::t2LDRDi8;
1497 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1498 NewOpc = ARM::t2STRDi8;
1504 // Make sure the base address satisfies i64 ld / st alignment requirement.
1505 if (!Op0->hasOneMemOperand() ||
1506 !(*Op0->memoperands_begin())->getValue() ||
1507 (*Op0->memoperands_begin())->isVolatile())
1510 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1511 const Function *Func = MF->getFunction();
1512 unsigned ReqAlign = STI->hasV6Ops()
1513 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1514 : 8; // Pre-v6 need 8-byte align
1515 if (Align < ReqAlign)
1518 // Then make sure the immediate offset fits.
1519 int OffImm = getMemoryOpOffset(Op0);
1523 // Can't fall back to t2LDRi8 / t2STRi8.
1526 int Limit = (1 << 8) * Scale;
1527 if (OffImm >= Limit || (OffImm & (Scale-1)))
1532 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1534 AddSub = ARM_AM::sub;
1537 int Limit = (1 << 8) * Scale;
1538 if (OffImm >= Limit || (OffImm & (Scale-1)))
1540 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1542 EvenReg = Op0->getOperand(0).getReg();
1543 OddReg = Op1->getOperand(0).getReg();
1544 if (EvenReg == OddReg)
1546 BaseReg = Op0->getOperand(1).getReg();
1547 Pred = llvm::getInstrPredicate(Op0, PredReg);
1548 dl = Op0->getDebugLoc();
1552 static MachineMemOperand *CopyMMO(const MachineMemOperand *MMO,
1553 unsigned NewSize, MachineFunction *MF) {
1554 return MF->getMachineMemOperand(MachinePointerInfo(MMO->getValue(),
1556 MMO->getFlags(), NewSize,
1557 MMO->getAlignment(), MMO->getTBAAInfo());
1560 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1561 SmallVector<MachineInstr*, 4> &Ops,
1562 unsigned Base, bool isLd,
1563 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1564 bool RetVal = false;
1566 // Sort by offset (in reverse order).
1567 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1569 // The loads / stores of the same base are in order. Scan them from first to
1570 // last and check for the following:
1571 // 1. Any def of base.
1573 while (Ops.size() > 1) {
1574 unsigned FirstLoc = ~0U;
1575 unsigned LastLoc = 0;
1576 MachineInstr *FirstOp = 0;
1577 MachineInstr *LastOp = 0;
1579 unsigned LastOpcode = 0;
1580 unsigned LastBytes = 0;
1581 unsigned NumMove = 0;
1582 for (int i = Ops.size() - 1; i >= 0; --i) {
1583 MachineInstr *Op = Ops[i];
1584 unsigned Loc = MI2LocMap[Op];
1585 if (Loc <= FirstLoc) {
1589 if (Loc >= LastLoc) {
1594 unsigned Opcode = Op->getOpcode();
1595 if (LastOpcode && Opcode != LastOpcode)
1598 int Offset = getMemoryOpOffset(Op);
1599 unsigned Bytes = getLSMultipleTransferSize(Op);
1601 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1604 LastOffset = Offset;
1606 LastOpcode = Opcode;
1607 if (++NumMove == 8) // FIXME: Tune this limit.
1614 SmallPtrSet<MachineInstr*, 4> MemOps;
1615 SmallSet<unsigned, 4> MemRegs;
1616 for (int i = NumMove-1; i >= 0; --i) {
1617 MemOps.insert(Ops[i]);
1618 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1621 // Be conservative, if the instructions are too far apart, don't
1622 // move them. We want to limit the increase of register pressure.
1623 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1625 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1626 MemOps, MemRegs, TRI);
1628 for (unsigned i = 0; i != NumMove; ++i)
1631 // This is the new location for the loads / stores.
1632 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1633 while (InsertPos != MBB->end()
1634 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1637 // If we are moving a pair of loads / stores, see if it makes sense
1638 // to try to allocate a pair of registers that can form register pairs.
1639 MachineInstr *Op0 = Ops.back();
1640 MachineInstr *Op1 = Ops[Ops.size()-2];
1641 unsigned EvenReg = 0, OddReg = 0;
1642 unsigned BaseReg = 0, PredReg = 0;
1643 ARMCC::CondCodes Pred = ARMCC::AL;
1645 unsigned NewOpc = 0;
1648 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1649 EvenReg, OddReg, BaseReg,
1650 Offset, PredReg, Pred, isT2)) {
1654 // Form the pair instruction.
1656 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1657 dl, TII->get(NewOpc))
1658 .addReg(EvenReg, RegState::Define)
1659 .addReg(OddReg, RegState::Define)
1661 // FIXME: We're converting from LDRi12 to an insn that still
1662 // uses addrmode2, so we need an explicit offset reg. It should
1663 // always by reg0 since we're transforming LDRi12s.
1666 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1668 // Copy memoperands bug change size to 8.
1669 for (MachineInstr::mmo_iterator mmo = Op0->memoperands_begin();
1670 mmo != Op0->memoperands_end(); ++mmo)
1671 MIB.addMemOperand(CopyMMO(*mmo, 8, MF));
1674 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1675 dl, TII->get(NewOpc))
1679 // FIXME: We're converting from LDRi12 to an insn that still
1680 // uses addrmode2, so we need an explicit offset reg. It should
1681 // always by reg0 since we're transforming STRi12s.
1684 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1685 // Copy memoperands bug change size to 8.
1686 for (MachineInstr::mmo_iterator mmo = Op0->memoperands_begin();
1687 mmo != Op0->memoperands_end(); ++mmo)
1688 MIB.addMemOperand(CopyMMO(*mmo, 8, MF));
1694 // Add register allocation hints to form register pairs.
1695 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1696 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1698 for (unsigned i = 0; i != NumMove; ++i) {
1699 MachineInstr *Op = Ops.back();
1701 MBB->splice(InsertPos, MBB, Op);
1705 NumLdStMoved += NumMove;
1715 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1716 bool RetVal = false;
1718 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1719 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1720 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1721 SmallVector<unsigned, 4> LdBases;
1722 SmallVector<unsigned, 4> StBases;
1725 MachineBasicBlock::iterator MBBI = MBB->begin();
1726 MachineBasicBlock::iterator E = MBB->end();
1728 for (; MBBI != E; ++MBBI) {
1729 MachineInstr *MI = MBBI;
1730 const TargetInstrDesc &TID = MI->getDesc();
1731 if (TID.isCall() || TID.isTerminator()) {
1732 // Stop at barriers.
1737 if (!MI->isDebugValue())
1738 MI2LocMap[MI] = ++Loc;
1740 if (!isMemoryOp(MI))
1742 unsigned PredReg = 0;
1743 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1746 int Opc = MI->getOpcode();
1747 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1748 unsigned Base = MI->getOperand(1).getReg();
1749 int Offset = getMemoryOpOffset(MI);
1751 bool StopHere = false;
1753 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1754 Base2LdsMap.find(Base);
1755 if (BI != Base2LdsMap.end()) {
1756 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1757 if (Offset == getMemoryOpOffset(BI->second[i])) {
1763 BI->second.push_back(MI);
1765 SmallVector<MachineInstr*, 4> MIs;
1767 Base2LdsMap[Base] = MIs;
1768 LdBases.push_back(Base);
1771 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1772 Base2StsMap.find(Base);
1773 if (BI != Base2StsMap.end()) {
1774 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1775 if (Offset == getMemoryOpOffset(BI->second[i])) {
1781 BI->second.push_back(MI);
1783 SmallVector<MachineInstr*, 4> MIs;
1785 Base2StsMap[Base] = MIs;
1786 StBases.push_back(Base);
1791 // Found a duplicate (a base+offset combination that's seen earlier).
1798 // Re-schedule loads.
1799 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1800 unsigned Base = LdBases[i];
1801 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1803 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1806 // Re-schedule stores.
1807 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1808 unsigned Base = StBases[i];
1809 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1811 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1815 Base2LdsMap.clear();
1816 Base2StsMap.clear();
1826 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1827 /// optimization pass.
1828 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1830 return new ARMPreAllocLoadStoreOpt();
1831 return new ARMLoadStoreOpt();