1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file This file contains a pass that performs load / store related peephole
11 /// optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterClassInfo.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/CodeGen/LivePhysRegs.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/Support/Allocator.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Target/TargetInstrInfo.h"
45 #include "llvm/Target/TargetMachine.h"
46 #include "llvm/Target/TargetRegisterInfo.h"
49 #define DEBUG_TYPE "arm-ldst-opt"
51 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
52 STATISTIC(NumSTMGened , "Number of stm instructions generated");
53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
64 void initializeARMLoadStoreOptPass(PassRegistry &);
67 #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
70 /// Post- register allocation pass the combine load / store instructions to
71 /// form ldm / stm instructions.
72 struct ARMLoadStoreOpt : public MachineFunctionPass {
74 ARMLoadStoreOpt() : MachineFunctionPass(ID) {
75 initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry());
78 const MachineFunction *MF;
79 const TargetInstrInfo *TII;
80 const TargetRegisterInfo *TRI;
81 const ARMSubtarget *STI;
82 const TargetLowering *TL;
84 LivePhysRegs LiveRegs;
85 RegisterClassInfo RegClassInfo;
86 MachineBasicBlock::const_iterator LiveRegPos;
88 bool RegClassInfoValid;
89 bool isThumb1, isThumb2;
91 bool runOnMachineFunction(MachineFunction &Fn) override;
93 const char *getPassName() const override {
94 return ARM_LOAD_STORE_OPT_NAME;
98 /// A set of load/store MachineInstrs with same base register sorted by
100 struct MemOpQueueEntry {
102 int Offset; ///< Load/Store offset.
103 unsigned Position; ///< Position as counted from end of basic block.
104 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
105 : MI(MI), Offset(Offset), Position(Position) {}
107 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
109 /// A set of MachineInstrs that fulfill (nearly all) conditions to get
110 /// merged into a LDM/STM.
111 struct MergeCandidate {
112 /// List of instructions ordered by load/store offset.
113 SmallVector<MachineInstr*, 4> Instrs;
114 /// Index in Instrs of the instruction being latest in the schedule.
115 unsigned LatestMIIdx;
116 /// Index in Instrs of the instruction being earliest in the schedule.
117 unsigned EarliestMIIdx;
118 /// Index into the basic block where the merged instruction will be
119 /// inserted. (See MemOpQueueEntry.Position)
121 /// Whether the instructions can be merged into a ldm/stm instruction.
122 bool CanMergeToLSMulti;
123 /// Whether the instructions can be merged into a ldrd/strd instruction.
124 bool CanMergeToLSDouble;
126 SpecificBumpPtrAllocator<MergeCandidate> Allocator;
127 SmallVector<const MergeCandidate*,4> Candidates;
128 SmallVector<MachineInstr*,4> MergeBaseCandidates;
130 void moveLiveRegsBefore(const MachineBasicBlock &MBB,
131 MachineBasicBlock::const_iterator Before);
132 unsigned findFreeReg(const TargetRegisterClass &RegClass);
133 void UpdateBaseRegUses(MachineBasicBlock &MBB,
134 MachineBasicBlock::iterator MBBI,
135 DebugLoc DL, unsigned Base, unsigned WordOffset,
136 ARMCC::CondCodes Pred, unsigned PredReg);
137 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
138 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
139 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
140 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
141 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
142 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
143 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
144 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
145 void FormCandidates(const MemOpQueue &MemOps);
146 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
147 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
148 MachineBasicBlock::iterator &MBBI);
149 bool MergeBaseUpdateLoadStore(MachineInstr *MI);
150 bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
151 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
152 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
153 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
155 char ARMLoadStoreOpt::ID = 0;
158 INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false)
160 static bool definesCPSR(const MachineInstr *MI) {
161 for (const auto &MO : MI->operands()) {
164 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
165 // If the instruction has live CPSR def, then it's not safe to fold it
166 // into load / store.
173 static int getMemoryOpOffset(const MachineInstr *MI) {
174 unsigned Opcode = MI->getOpcode();
175 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
176 unsigned NumOperands = MI->getDesc().getNumOperands();
177 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
179 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
180 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
181 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
182 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
185 // Thumb1 immediate offsets are scaled by 4
186 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
187 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
190 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
191 : ARM_AM::getAM5Offset(OffField) * 4;
192 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
193 : ARM_AM::getAM5Op(OffField);
195 if (Op == ARM_AM::sub)
201 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
202 return MI.getOperand(1);
205 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
206 return MI.getOperand(0);
209 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
211 default: llvm_unreachable("Unhandled opcode!");
215 default: llvm_unreachable("Unhandled submode!");
216 case ARM_AM::ia: return ARM::LDMIA;
217 case ARM_AM::da: return ARM::LDMDA;
218 case ARM_AM::db: return ARM::LDMDB;
219 case ARM_AM::ib: return ARM::LDMIB;
224 default: llvm_unreachable("Unhandled submode!");
225 case ARM_AM::ia: return ARM::STMIA;
226 case ARM_AM::da: return ARM::STMDA;
227 case ARM_AM::db: return ARM::STMDB;
228 case ARM_AM::ib: return ARM::STMIB;
232 // tLDMIA is writeback-only - unless the base register is in the input
236 default: llvm_unreachable("Unhandled submode!");
237 case ARM_AM::ia: return ARM::tLDMIA;
241 // There is no non-writeback tSTMIA either.
244 default: llvm_unreachable("Unhandled submode!");
245 case ARM_AM::ia: return ARM::tSTMIA_UPD;
251 default: llvm_unreachable("Unhandled submode!");
252 case ARM_AM::ia: return ARM::t2LDMIA;
253 case ARM_AM::db: return ARM::t2LDMDB;
259 default: llvm_unreachable("Unhandled submode!");
260 case ARM_AM::ia: return ARM::t2STMIA;
261 case ARM_AM::db: return ARM::t2STMDB;
266 default: llvm_unreachable("Unhandled submode!");
267 case ARM_AM::ia: return ARM::VLDMSIA;
268 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
273 default: llvm_unreachable("Unhandled submode!");
274 case ARM_AM::ia: return ARM::VSTMSIA;
275 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
280 default: llvm_unreachable("Unhandled submode!");
281 case ARM_AM::ia: return ARM::VLDMDIA;
282 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
287 default: llvm_unreachable("Unhandled submode!");
288 case ARM_AM::ia: return ARM::VSTMDIA;
289 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
294 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
296 default: llvm_unreachable("Unhandled opcode!");
303 case ARM::tLDMIA_UPD:
304 case ARM::tSTMIA_UPD:
305 case ARM::t2LDMIA_RET:
307 case ARM::t2LDMIA_UPD:
309 case ARM::t2STMIA_UPD:
311 case ARM::VLDMSIA_UPD:
313 case ARM::VSTMSIA_UPD:
315 case ARM::VLDMDIA_UPD:
317 case ARM::VSTMDIA_UPD:
331 case ARM::t2LDMDB_UPD:
333 case ARM::t2STMDB_UPD:
334 case ARM::VLDMSDB_UPD:
335 case ARM::VSTMSDB_UPD:
336 case ARM::VLDMDDB_UPD:
337 case ARM::VSTMDDB_UPD:
348 static bool isT1i32Load(unsigned Opc) {
349 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
352 static bool isT2i32Load(unsigned Opc) {
353 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
356 static bool isi32Load(unsigned Opc) {
357 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
360 static bool isT1i32Store(unsigned Opc) {
361 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
364 static bool isT2i32Store(unsigned Opc) {
365 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
368 static bool isi32Store(unsigned Opc) {
369 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
372 static bool isLoadSingle(unsigned Opc) {
373 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
376 static unsigned getImmScale(unsigned Opc) {
378 default: llvm_unreachable("Unhandled opcode!");
393 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
394 switch (MI->getOpcode()) {
421 case ARM::tLDMIA_UPD:
422 case ARM::tSTMIA_UPD:
429 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
432 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
436 /// Update future uses of the base register with the offset introduced
437 /// due to writeback. This function only works on Thumb1.
439 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
440 MachineBasicBlock::iterator MBBI,
441 DebugLoc DL, unsigned Base,
443 ARMCC::CondCodes Pred, unsigned PredReg) {
444 assert(isThumb1 && "Can only update base register uses for Thumb1!");
445 // Start updating any instructions with immediate offsets. Insert a SUB before
446 // the first non-updateable instruction (if any).
447 for (; MBBI != MBB.end(); ++MBBI) {
448 bool InsertSub = false;
449 unsigned Opc = MBBI->getOpcode();
451 if (MBBI->readsRegister(Base)) {
454 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
456 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
458 if (IsLoad || IsStore) {
459 // Loads and stores with immediate offsets can be updated, but only if
460 // the new offset isn't negative.
461 // The MachineOperand containing the offset immediate is the last one
462 // before predicates.
464 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
465 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
466 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
468 // If storing the base register, it needs to be reset first.
469 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
471 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
476 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
477 !definesCPSR(MBBI)) {
478 // SUBS/ADDS using this register, with a dead def of the CPSR.
479 // Merge it with the update; if the merged offset is too large,
480 // insert a new sub instead.
482 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
483 Offset = (Opc == ARM::tSUBi8) ?
484 MO.getImm() + WordOffset * 4 :
485 MO.getImm() - WordOffset * 4 ;
486 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
487 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
490 // The base register has now been reset, so exit early.
497 // Can't update the instruction.
501 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
502 // Since SUBS sets the condition flags, we can't place the base reset
503 // after an instruction that has a live CPSR def.
504 // The base register might also contain an argument for a function call.
509 // An instruction above couldn't be updated, so insert a sub.
510 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
511 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
515 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
516 // Register got killed. Stop updating.
520 // End of block was reached.
521 if (MBB.succ_size() > 0) {
522 // FIXME: Because of a bug, live registers are sometimes missing from
523 // the successor blocks' live-in sets. This means we can't trust that
524 // information and *always* have to reset at the end of a block.
526 if (MBBI != MBB.end()) --MBBI;
528 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
529 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
533 /// Return the first register of class \p RegClass that is not in \p Regs.
534 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
535 if (!RegClassInfoValid) {
536 RegClassInfo.runOnMachineFunction(*MF);
537 RegClassInfoValid = true;
540 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
541 if (!LiveRegs.contains(Reg))
546 /// Compute live registers just before instruction \p Before (in normal schedule
547 /// direction). Computes backwards so multiple queries in the same block must
548 /// come in reverse order.
549 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
550 MachineBasicBlock::const_iterator Before) {
551 // Initialize if we never queried in this block.
552 if (!LiveRegsValid) {
554 LiveRegs.addLiveOuts(&MBB, true);
555 LiveRegPos = MBB.end();
556 LiveRegsValid = true;
558 // Move backward just before the "Before" position.
559 while (LiveRegPos != Before) {
561 LiveRegs.stepBackward(*LiveRegPos);
565 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
567 for (const std::pair<unsigned, bool> &R : Regs)
573 /// Create and insert a LDM or STM with Base as base register and registers in
574 /// Regs as the register operands that would be loaded / stored. It returns
575 /// true if the transformation is done.
576 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
577 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
578 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
579 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
580 unsigned NumRegs = Regs.size();
583 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
584 // Compute liveness information for that register to make the decision.
585 bool SafeToClobberCPSR = !isThumb1 ||
586 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
587 MachineBasicBlock::LQR_Dead);
589 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
591 // Exception: If the base register is in the input reglist, Thumb1 LDM is
593 // It's also not possible to merge an STR of the base register in Thumb1.
594 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
595 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
596 if (Opcode == ARM::tLDRi) {
598 } else if (Opcode == ARM::tSTRi) {
603 ARM_AM::AMSubMode Mode = ARM_AM::ia;
604 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
605 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
606 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
608 if (Offset == 4 && haveIBAndDA) {
610 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
612 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
613 // VLDM/VSTM do not support DB mode without also updating the base reg.
615 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
616 // Check if this is a supported opcode before inserting instructions to
617 // calculate a new base register.
618 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
620 // If starting offset isn't zero, insert a MI to materialize a new base.
621 // But only do so if it is cost effective, i.e. merging more than two
626 // On Thumb1, it's not worth materializing a new base register without
627 // clobbering the CPSR (i.e. not using ADDS/SUBS).
628 if (!SafeToClobberCPSR)
632 if (isi32Load(Opcode)) {
633 // If it is a load, then just use one of the destination registers
634 // as the new base. Will no longer be writeback in Thumb1.
635 NewBase = Regs[NumRegs-1].first;
638 // Find a free register that we can use as scratch register.
639 moveLiveRegsBefore(MBB, InsertBefore);
640 // The merged instruction does not exist yet but will use several Regs if
642 if (!isLoadSingle(Opcode))
643 for (const std::pair<unsigned, bool> &R : Regs)
644 LiveRegs.addReg(R.first);
646 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
652 isThumb2 ? ARM::t2ADDri :
653 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
654 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
655 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
660 isThumb2 ? ARM::t2SUBri :
661 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
662 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
665 if (!TL->isLegalAddImmediate(Offset))
666 // FIXME: Try add with register operand?
667 return nullptr; // Probably not worth it then.
669 // We can only append a kill flag to the add/sub input if the value is not
670 // used in the register list of the stm as well.
671 bool KillOldBase = BaseKill &&
672 (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
675 // Thumb1: depending on immediate size, use either
676 // ADDS NewBase, Base, #imm3
679 // ADDS NewBase, #imm8.
680 if (Base != NewBase &&
681 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
682 // Need to insert a MOV to the new base first.
683 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
685 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
686 if (Pred != ARMCC::AL)
688 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
689 .addReg(Base, getKillRegState(KillOldBase));
691 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
692 .addReg(Base, getKillRegState(KillOldBase))
693 .addImm(Pred).addReg(PredReg);
695 // The following ADDS/SUBS becomes an update.
699 if (BaseOpc == ARM::tADDrSPi) {
700 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
701 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
702 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
703 .addImm(Pred).addReg(PredReg);
706 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
707 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
708 .addImm(Pred).addReg(PredReg);
710 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
711 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
712 .addImm(Pred).addReg(PredReg).addReg(0);
715 BaseKill = true; // New base is always killed straight away.
718 bool isDef = isLoadSingle(Opcode);
720 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
721 // base register writeback.
722 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
726 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
727 // - There is no writeback (LDM of base register),
728 // - the base register is killed by the merged instruction,
729 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
730 // to reset the base register.
731 // Otherwise, don't merge.
732 // It's safe to return here since the code to materialize a new base register
733 // above is also conditional on SafeToClobberCPSR.
734 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
737 MachineInstrBuilder MIB;
740 assert(isThumb1 && "expected Writeback only inThumb1");
741 if (Opcode == ARM::tLDMIA) {
742 assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
743 // Update tLDMIA with writeback if necessary.
744 Opcode = ARM::tLDMIA_UPD;
747 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
749 // Thumb1: we might need to set base writeback when building the MI.
750 MIB.addReg(Base, getDefRegState(true))
751 .addReg(Base, getKillRegState(BaseKill));
753 // The base isn't dead after a merged instruction with writeback.
754 // Insert a sub instruction after the newly formed instruction to reset.
756 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
759 // No writeback, simply build the MachineInstr.
760 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
761 MIB.addReg(Base, getKillRegState(BaseKill));
764 MIB.addImm(Pred).addReg(PredReg);
766 for (const std::pair<unsigned, bool> &R : Regs)
767 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
769 return MIB.getInstr();
772 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
773 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
774 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
775 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
776 bool IsLoad = isi32Load(Opcode);
777 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
778 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
780 assert(Regs.size() == 2);
781 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
782 TII->get(LoadStoreOpcode));
784 MIB.addReg(Regs[0].first, RegState::Define)
785 .addReg(Regs[1].first, RegState::Define);
787 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
788 .addReg(Regs[1].first, getKillRegState(Regs[1].second));
790 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
791 return MIB.getInstr();
794 /// Call MergeOps and update MemOps and merges accordingly on success.
795 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
796 const MachineInstr *First = Cand.Instrs.front();
797 unsigned Opcode = First->getOpcode();
798 bool IsLoad = isLoadSingle(Opcode);
799 SmallVector<std::pair<unsigned, bool>, 8> Regs;
800 SmallVector<unsigned, 4> ImpDefs;
801 DenseSet<unsigned> KilledRegs;
802 DenseSet<unsigned> UsedRegs;
803 // Determine list of registers and list of implicit super-register defs.
804 for (const MachineInstr *MI : Cand.Instrs) {
805 const MachineOperand &MO = getLoadStoreRegOp(*MI);
806 unsigned Reg = MO.getReg();
807 bool IsKill = MO.isKill();
809 KilledRegs.insert(Reg);
810 Regs.push_back(std::make_pair(Reg, IsKill));
811 UsedRegs.insert(Reg);
814 // Collect any implicit defs of super-registers, after merging we can't
815 // be sure anymore that we properly preserved these live ranges and must
816 // removed these implicit operands.
817 for (const MachineOperand &MO : MI->implicit_operands()) {
818 if (!MO.isReg() || !MO.isDef() || MO.isDead())
820 assert(MO.isImplicit());
821 unsigned DefReg = MO.getReg();
823 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
825 // We can ignore cases where the super-reg is read and written.
826 if (MI->readsRegister(DefReg))
828 ImpDefs.push_back(DefReg);
833 // Attempt the merge.
834 typedef MachineBasicBlock::iterator iterator;
835 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
836 iterator InsertBefore = std::next(iterator(LatestMI));
837 MachineBasicBlock &MBB = *LatestMI->getParent();
838 unsigned Offset = getMemoryOpOffset(First);
839 unsigned Base = getLoadStoreBaseOp(*First).getReg();
840 bool BaseKill = LatestMI->killsRegister(Base);
841 unsigned PredReg = 0;
842 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
843 DebugLoc DL = First->getDebugLoc();
844 MachineInstr *Merged = nullptr;
845 if (Cand.CanMergeToLSDouble)
846 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
847 Opcode, Pred, PredReg, DL, Regs);
848 if (!Merged && Cand.CanMergeToLSMulti)
849 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
850 Opcode, Pred, PredReg, DL, Regs);
854 // Determine earliest instruction that will get removed. We then keep an
855 // iterator just above it so the following erases don't invalidated it.
856 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
857 bool EarliestAtBegin = false;
858 if (EarliestI == MBB.begin()) {
859 EarliestAtBegin = true;
861 EarliestI = std::prev(EarliestI);
864 // Remove instructions which have been merged.
865 for (MachineInstr *MI : Cand.Instrs)
868 // Determine range between the earliest removed instruction and the new one.
870 EarliestI = MBB.begin();
872 EarliestI = std::next(EarliestI);
873 auto FixupRange = make_range(EarliestI, iterator(Merged));
875 if (isLoadSingle(Opcode)) {
876 // If the previous loads defined a super-reg, then we have to mark earlier
877 // operands undef; Replicate the super-reg def on the merged instruction.
878 for (MachineInstr &MI : FixupRange) {
879 for (unsigned &ImpDefReg : ImpDefs) {
880 for (MachineOperand &MO : MI.implicit_operands()) {
881 if (!MO.isReg() || MO.getReg() != ImpDefReg)
891 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
892 for (unsigned ImpDef : ImpDefs)
893 MIB.addReg(ImpDef, RegState::ImplicitDefine);
895 // Remove kill flags: We are possibly storing the values later now.
896 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
897 for (MachineInstr &MI : FixupRange) {
898 for (MachineOperand &MO : MI.uses()) {
899 if (!MO.isReg() || !MO.isKill())
901 if (UsedRegs.count(MO.getReg()))
905 assert(ImpDefs.empty());
911 static bool isValidLSDoubleOffset(int Offset) {
912 unsigned Value = abs(Offset);
913 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
915 return (Value % 4) == 0 && Value < 1024;
918 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
919 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
920 const MachineInstr *FirstMI = MemOps[0].MI;
921 unsigned Opcode = FirstMI->getOpcode();
922 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
923 unsigned Size = getLSMultipleTransferSize(FirstMI);
926 unsigned EIndex = MemOps.size();
928 // Look at the first instruction.
929 const MachineInstr *MI = MemOps[SIndex].MI;
930 int Offset = MemOps[SIndex].Offset;
931 const MachineOperand &PMO = getLoadStoreRegOp(*MI);
932 unsigned PReg = PMO.getReg();
933 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
934 unsigned Latest = SIndex;
935 unsigned Earliest = SIndex;
937 bool CanMergeToLSDouble =
938 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
939 // ARM errata 602117: LDRD with base in list may result in incorrect base
940 // register when interrupted or faulted.
941 if (STI->isCortexM3() && isi32Load(Opcode) &&
942 PReg == getLoadStoreBaseOp(*MI).getReg())
943 CanMergeToLSDouble = false;
945 bool CanMergeToLSMulti = true;
946 // On swift vldm/vstm starting with an odd register number as that needs
947 // more uops than single vldrs.
948 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
949 CanMergeToLSMulti = false;
951 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
952 // deprecated; LDM to PC is fine but cannot happen here.
953 if (PReg == ARM::SP || PReg == ARM::PC)
954 CanMergeToLSMulti = CanMergeToLSDouble = false;
956 // Merge following instructions where possible.
957 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
958 int NewOffset = MemOps[I].Offset;
959 if (NewOffset != Offset + (int)Size)
961 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
962 unsigned Reg = MO.getReg();
963 if (Reg == ARM::SP || Reg == ARM::PC)
966 // See if the current load/store may be part of a multi load/store.
967 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
968 bool PartOfLSMulti = CanMergeToLSMulti;
970 // Register numbers must be in ascending order.
971 if (RegNum <= PRegNum)
972 PartOfLSMulti = false;
973 // For VFP / NEON load/store multiples, the registers must be
974 // consecutive and within the limit on the number of registers per
976 else if (!isNotVFP && RegNum != PRegNum+1)
977 PartOfLSMulti = false;
979 // See if the current load/store may be part of a double load/store.
980 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
982 if (!PartOfLSMulti && !PartOfLSDouble)
984 CanMergeToLSMulti &= PartOfLSMulti;
985 CanMergeToLSDouble &= PartOfLSDouble;
986 // Track MemOp with latest and earliest position (Positions are
987 // counted in reverse).
988 unsigned Position = MemOps[I].Position;
989 if (Position < MemOps[Latest].Position)
991 else if (Position > MemOps[Earliest].Position)
993 // Prepare for next MemOp.
998 // Form a candidate from the Ops collected so far.
999 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
1000 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
1001 Candidate->Instrs.push_back(MemOps[C].MI);
1002 Candidate->LatestMIIdx = Latest - SIndex;
1003 Candidate->EarliestMIIdx = Earliest - SIndex;
1004 Candidate->InsertPos = MemOps[Latest].Position;
1006 CanMergeToLSMulti = CanMergeToLSDouble = false;
1007 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1008 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1009 Candidates.push_back(Candidate);
1010 // Continue after the chain.
1012 } while (SIndex < EIndex);
1015 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1016 ARM_AM::AMSubMode Mode) {
1018 default: llvm_unreachable("Unhandled opcode!");
1024 default: llvm_unreachable("Unhandled submode!");
1025 case ARM_AM::ia: return ARM::LDMIA_UPD;
1026 case ARM_AM::ib: return ARM::LDMIB_UPD;
1027 case ARM_AM::da: return ARM::LDMDA_UPD;
1028 case ARM_AM::db: return ARM::LDMDB_UPD;
1035 default: llvm_unreachable("Unhandled submode!");
1036 case ARM_AM::ia: return ARM::STMIA_UPD;
1037 case ARM_AM::ib: return ARM::STMIB_UPD;
1038 case ARM_AM::da: return ARM::STMDA_UPD;
1039 case ARM_AM::db: return ARM::STMDB_UPD;
1044 default: llvm_unreachable("Unhandled submode!");
1045 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1046 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1051 default: llvm_unreachable("Unhandled submode!");
1052 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1053 case ARM_AM::db: return ARM::t2STMDB_UPD;
1057 default: llvm_unreachable("Unhandled submode!");
1058 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1059 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1063 default: llvm_unreachable("Unhandled submode!");
1064 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1065 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1069 default: llvm_unreachable("Unhandled submode!");
1070 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1071 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1075 default: llvm_unreachable("Unhandled submode!");
1076 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1077 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1082 /// Check if the given instruction increments or decrements a register and
1083 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
1084 /// generated by the instruction are possibly read as well.
1085 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
1086 ARMCC::CondCodes Pred, unsigned PredReg) {
1089 switch (MI.getOpcode()) {
1090 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
1091 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
1093 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
1095 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
1096 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
1097 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
1102 if (MI.getOperand(0).getReg() != Reg ||
1103 MI.getOperand(1).getReg() != Reg ||
1104 getInstrPredicate(&MI, MIPredReg) != Pred ||
1105 MIPredReg != PredReg)
1108 if (CheckCPSRDef && definesCPSR(&MI))
1110 return MI.getOperand(2).getImm() * Scale;
1113 /// Searches for an increment or decrement of \p Reg before \p MBBI.
1114 static MachineBasicBlock::iterator
1115 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
1116 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1118 MachineBasicBlock &MBB = *MBBI->getParent();
1119 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1120 MachineBasicBlock::iterator EndMBBI = MBB.end();
1121 if (MBBI == BeginMBBI)
1124 // Skip debug values.
1125 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1126 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
1129 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
1130 return Offset == 0 ? EndMBBI : PrevMBBI;
1133 /// Searches for a increment or decrement of \p Reg after \p MBBI.
1134 static MachineBasicBlock::iterator
1135 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
1136 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1138 MachineBasicBlock &MBB = *MBBI->getParent();
1139 MachineBasicBlock::iterator EndMBBI = MBB.end();
1140 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1141 // Skip debug values.
1142 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1144 if (NextMBBI == EndMBBI)
1147 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
1148 return Offset == 0 ? EndMBBI : NextMBBI;
1151 /// Fold proceeding/trailing inc/dec of base register into the
1152 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1154 /// stmia rn, <ra, rb, rc>
1155 /// rn := rn + 4 * 3;
1157 /// stmia rn!, <ra, rb, rc>
1159 /// rn := rn - 4 * 3;
1160 /// ldmia rn, <ra, rb, rc>
1162 /// ldmdb rn!, <ra, rb, rc>
1163 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1164 // Thumb1 is already using updating loads/stores.
1165 if (isThumb1) return false;
1167 const MachineOperand &BaseOP = MI->getOperand(0);
1168 unsigned Base = BaseOP.getReg();
1169 bool BaseKill = BaseOP.isKill();
1170 unsigned PredReg = 0;
1171 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1172 unsigned Opcode = MI->getOpcode();
1173 DebugLoc DL = MI->getDebugLoc();
1175 // Can't use an updating ld/st if the base register is also a dest
1176 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1177 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1178 if (MI->getOperand(i).getReg() == Base)
1181 int Bytes = getLSMultipleTransferSize(MI);
1182 MachineBasicBlock &MBB = *MI->getParent();
1183 MachineBasicBlock::iterator MBBI(MI);
1185 MachineBasicBlock::iterator MergeInstr
1186 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1187 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
1188 if (Mode == ARM_AM::ia && Offset == -Bytes) {
1190 } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
1193 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1194 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
1195 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
1198 MBB.erase(MergeInstr);
1200 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1201 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1202 .addReg(Base, getDefRegState(true)) // WB base register
1203 .addReg(Base, getKillRegState(BaseKill))
1204 .addImm(Pred).addReg(PredReg);
1206 // Transfer the rest of operands.
1207 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1208 MIB.addOperand(MI->getOperand(OpNum));
1210 // Transfer memoperands.
1211 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1217 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1218 ARM_AM::AddrOpc Mode) {
1221 return ARM::LDR_PRE_IMM;
1223 return ARM::STR_PRE_IMM;
1225 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1227 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1229 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1231 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1234 return ARM::t2LDR_PRE;
1237 return ARM::t2STR_PRE;
1238 default: llvm_unreachable("Unhandled opcode!");
1242 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1243 ARM_AM::AddrOpc Mode) {
1246 return ARM::LDR_POST_IMM;
1248 return ARM::STR_POST_IMM;
1250 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1252 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1254 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1256 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1259 return ARM::t2LDR_POST;
1262 return ARM::t2STR_POST;
1263 default: llvm_unreachable("Unhandled opcode!");
1267 /// Fold proceeding/trailing inc/dec of base register into the
1268 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1269 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1270 // Thumb1 doesn't have updating LDR/STR.
1271 // FIXME: Use LDM/STM with single register instead.
1272 if (isThumb1) return false;
1274 unsigned Base = getLoadStoreBaseOp(*MI).getReg();
1275 bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1276 unsigned Opcode = MI->getOpcode();
1277 DebugLoc DL = MI->getDebugLoc();
1278 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1279 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1280 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1281 if (isi32Load(Opcode) || isi32Store(Opcode))
1282 if (MI->getOperand(2).getImm() != 0)
1284 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1287 // Can't do the merge if the destination register is the same as the would-be
1288 // writeback register.
1289 if (MI->getOperand(0).getReg() == Base)
1292 unsigned PredReg = 0;
1293 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1294 int Bytes = getLSMultipleTransferSize(MI);
1295 MachineBasicBlock &MBB = *MI->getParent();
1296 MachineBasicBlock::iterator MBBI(MI);
1298 MachineBasicBlock::iterator MergeInstr
1299 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1301 if (!isAM5 && Offset == Bytes) {
1302 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1303 } else if (Offset == -Bytes) {
1304 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1306 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1307 if (Offset == Bytes) {
1308 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1309 } else if (!isAM5 && Offset == -Bytes) {
1310 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1314 MBB.erase(MergeInstr);
1316 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
1318 bool isLd = isLoadSingle(Opcode);
1320 // VLDM[SD]_UPD, VSTM[SD]_UPD
1321 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1322 // updating load/store-multiple instructions can be used with only one
1324 MachineOperand &MO = MI->getOperand(0);
1325 BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1326 .addReg(Base, getDefRegState(true)) // WB base register
1327 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1328 .addImm(Pred).addReg(PredReg)
1329 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1330 getKillRegState(MO.isKill())));
1333 // LDR_PRE, LDR_POST
1334 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1335 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1336 .addReg(Base, RegState::Define)
1337 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1339 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1340 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1341 .addReg(Base, RegState::Define)
1342 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
1345 // t2LDR_PRE, t2LDR_POST
1346 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1347 .addReg(Base, RegState::Define)
1348 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1351 MachineOperand &MO = MI->getOperand(0);
1352 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1353 // the vestigal zero-reg offset register. When that's fixed, this clause
1354 // can be removed entirely.
1355 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1356 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1357 // STR_PRE, STR_POST
1358 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1359 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1360 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
1362 // t2STR_PRE, t2STR_POST
1363 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1364 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1365 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1373 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
1374 unsigned Opcode = MI.getOpcode();
1375 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1376 "Must have t2STRDi8 or t2LDRDi8");
1377 if (MI.getOperand(3).getImm() != 0)
1380 // Behaviour for writeback is undefined if base register is the same as one
1382 const MachineOperand &BaseOp = MI.getOperand(2);
1383 unsigned Base = BaseOp.getReg();
1384 const MachineOperand &Reg0Op = MI.getOperand(0);
1385 const MachineOperand &Reg1Op = MI.getOperand(1);
1386 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
1390 ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
1391 MachineBasicBlock::iterator MBBI(MI);
1392 MachineBasicBlock &MBB = *MI.getParent();
1394 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
1397 if (Offset == 8 || Offset == -8) {
1398 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1400 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1401 if (Offset == 8 || Offset == -8) {
1402 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1406 MBB.erase(MergeInstr);
1408 DebugLoc DL = MI.getDebugLoc();
1409 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
1410 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1411 MIB.addOperand(Reg0Op).addOperand(Reg1Op)
1412 .addReg(BaseOp.getReg(), RegState::Define);
1414 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1415 MIB.addReg(BaseOp.getReg(), RegState::Define)
1416 .addOperand(Reg0Op).addOperand(Reg1Op);
1418 MIB.addReg(BaseOp.getReg(), RegState::Kill)
1419 .addImm(Offset).addImm(Pred).addReg(PredReg);
1420 assert(TII->get(Opcode).getNumOperands() == 6 &&
1421 TII->get(NewOpc).getNumOperands() == 7 &&
1422 "Unexpected number of operands in Opcode specification.");
1424 // Transfer implicit operands.
1425 for (const MachineOperand &MO : MI.implicit_operands())
1427 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
1433 /// Returns true if instruction is a memory operation that this pass is capable
1434 /// of operating on.
1435 static bool isMemoryOp(const MachineInstr *MI) {
1436 // When no memory operands are present, conservatively assume unaligned,
1437 // volatile, unfoldable.
1438 if (!MI->hasOneMemOperand())
1441 const MachineMemOperand *MMO = *MI->memoperands_begin();
1443 // Don't touch volatile memory accesses - we may be changing their order.
1444 if (MMO->isVolatile())
1447 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1449 if (MMO->getAlignment() < 4)
1452 // str <undef> could probably be eliminated entirely, but for now we just want
1453 // to avoid making a mess of it.
1454 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1455 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1456 MI->getOperand(0).isUndef())
1459 // Likewise don't mess with references to undefined addresses.
1460 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1461 MI->getOperand(1).isUndef())
1464 unsigned Opcode = MI->getOpcode();
1469 return MI->getOperand(1).isReg();
1472 return MI->getOperand(1).isReg();
1483 return MI->getOperand(1).isReg();
1488 static void InsertLDR_STR(MachineBasicBlock &MBB,
1489 MachineBasicBlock::iterator &MBBI,
1490 int Offset, bool isDef,
1491 DebugLoc DL, unsigned NewOpc,
1492 unsigned Reg, bool RegDeadKill, bool RegUndef,
1493 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1494 bool OffKill, bool OffUndef,
1495 ARMCC::CondCodes Pred, unsigned PredReg,
1496 const TargetInstrInfo *TII, bool isT2) {
1498 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1500 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1501 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1502 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1504 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1506 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1507 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1508 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1512 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1513 MachineBasicBlock::iterator &MBBI) {
1514 MachineInstr *MI = &*MBBI;
1515 unsigned Opcode = MI->getOpcode();
1516 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1519 const MachineOperand &BaseOp = MI->getOperand(2);
1520 unsigned BaseReg = BaseOp.getReg();
1521 unsigned EvenReg = MI->getOperand(0).getReg();
1522 unsigned OddReg = MI->getOperand(1).getReg();
1523 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1524 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1526 // ARM errata 602117: LDRD with base in list may result in incorrect base
1527 // register when interrupted or faulted.
1528 bool Errata602117 = EvenReg == BaseReg &&
1529 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1530 // ARM LDRD/STRD needs consecutive registers.
1531 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1532 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1534 if (!Errata602117 && !NonConsecutiveRegs)
1537 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1538 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1539 bool EvenDeadKill = isLd ?
1540 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1541 bool EvenUndef = MI->getOperand(0).isUndef();
1542 bool OddDeadKill = isLd ?
1543 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1544 bool OddUndef = MI->getOperand(1).isUndef();
1545 bool BaseKill = BaseOp.isKill();
1546 bool BaseUndef = BaseOp.isUndef();
1547 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1548 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1549 int OffImm = getMemoryOpOffset(MI);
1550 unsigned PredReg = 0;
1551 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1553 if (OddRegNum > EvenRegNum && OffImm == 0) {
1554 // Ascending register numbers and no offset. It's safe to change it to a
1556 unsigned NewOpc = (isLd)
1557 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1558 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1560 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1561 .addReg(BaseReg, getKillRegState(BaseKill))
1562 .addImm(Pred).addReg(PredReg)
1563 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1564 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1567 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1568 .addReg(BaseReg, getKillRegState(BaseKill))
1569 .addImm(Pred).addReg(PredReg)
1571 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1573 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1577 // Split into two instructions.
1578 unsigned NewOpc = (isLd)
1579 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1580 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1581 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1582 // so adjust and use t2LDRi12 here for that.
1583 unsigned NewOpc2 = (isLd)
1584 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1585 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1586 DebugLoc dl = MBBI->getDebugLoc();
1587 // If this is a load and base register is killed, it may have been
1588 // re-defed by the load, make sure the first load does not clobber it.
1590 (BaseKill || OffKill) &&
1591 (TRI->regsOverlap(EvenReg, BaseReg))) {
1592 assert(!TRI->regsOverlap(OddReg, BaseReg));
1593 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1594 OddReg, OddDeadKill, false,
1595 BaseReg, false, BaseUndef, false, OffUndef,
1596 Pred, PredReg, TII, isT2);
1597 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1598 EvenReg, EvenDeadKill, false,
1599 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1600 Pred, PredReg, TII, isT2);
1602 if (OddReg == EvenReg && EvenDeadKill) {
1603 // If the two source operands are the same, the kill marker is
1604 // probably on the first one. e.g.
1605 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1606 EvenDeadKill = false;
1609 // Never kill the base register in the first instruction.
1610 if (EvenReg == BaseReg)
1611 EvenDeadKill = false;
1612 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1613 EvenReg, EvenDeadKill, EvenUndef,
1614 BaseReg, false, BaseUndef, false, OffUndef,
1615 Pred, PredReg, TII, isT2);
1616 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1617 OddReg, OddDeadKill, OddUndef,
1618 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1619 Pred, PredReg, TII, isT2);
1627 MBBI = MBB.erase(MBBI);
1631 /// An optimization pass to turn multiple LDR / STR ops of the same base and
1632 /// incrementing offset into LDM / STM ops.
1633 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1635 unsigned CurrBase = 0;
1636 unsigned CurrOpc = ~0u;
1637 ARMCC::CondCodes CurrPred = ARMCC::AL;
1638 unsigned Position = 0;
1639 assert(Candidates.size() == 0);
1640 assert(MergeBaseCandidates.size() == 0);
1641 LiveRegsValid = false;
1643 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
1645 // The instruction in front of the iterator is the one we look at.
1646 MBBI = std::prev(I);
1647 if (FixInvalidRegPairOp(MBB, MBBI))
1651 if (isMemoryOp(MBBI)) {
1652 unsigned Opcode = MBBI->getOpcode();
1653 const MachineOperand &MO = MBBI->getOperand(0);
1654 unsigned Reg = MO.getReg();
1655 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
1656 unsigned PredReg = 0;
1657 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1658 int Offset = getMemoryOpOffset(MBBI);
1659 if (CurrBase == 0) {
1660 // Start of a new chain.
1664 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1667 // Note: No need to match PredReg in the next if.
1668 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1670 // r4 := ldr [r0, #8]
1671 // r4 := ldr [r0, #4]
1674 // If a load overrides the base register or a register loaded by
1675 // another load in our chain, we cannot take this instruction.
1676 bool Overlap = false;
1677 if (isLoadSingle(Opcode)) {
1678 Overlap = (Base == Reg);
1680 for (const MemOpQueueEntry &E : MemOps) {
1681 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1690 // Check offset and sort memory operation into the current chain.
1691 if (Offset > MemOps.back().Offset) {
1692 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1695 MemOpQueue::iterator MI, ME;
1696 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1697 if (Offset < MI->Offset) {
1698 // Found a place to insert.
1701 if (Offset == MI->Offset) {
1702 // Collision, abort.
1707 if (MI != MemOps.end()) {
1708 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
1715 // Don't advance the iterator; The op will start a new chain next.
1718 // Fallthrough to look into existing chain.
1719 } else if (MBBI->isDebugValue()) {
1721 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
1722 MBBI->getOpcode() == ARM::t2STRDi8) {
1723 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
1724 // remember them because we may still be able to merge add/sub into them.
1725 MergeBaseCandidates.push_back(MBBI);
1729 // If we are here then the chain is broken; Extract candidates for a merge.
1730 if (MemOps.size() > 0) {
1731 FormCandidates(MemOps);
1732 // Reset for the next chain.
1735 CurrPred = ARMCC::AL;
1739 if (MemOps.size() > 0)
1740 FormCandidates(MemOps);
1742 // Sort candidates so they get processed from end to begin of the basic
1743 // block later; This is necessary for liveness calculation.
1744 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1745 return M0->InsertPos < M1->InsertPos;
1747 std::sort(Candidates.begin(), Candidates.end(), LessThan);
1749 // Go through list of candidates and merge.
1750 bool Changed = false;
1751 for (const MergeCandidate *Candidate : Candidates) {
1752 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1753 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1754 // Merge preceding/trailing base inc/dec into the merged op.
1757 unsigned Opcode = Merged->getOpcode();
1758 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
1759 MergeBaseUpdateLSDouble(*Merged);
1761 MergeBaseUpdateLSMultiple(Merged);
1763 for (MachineInstr *MI : Candidate->Instrs) {
1764 if (MergeBaseUpdateLoadStore(MI))
1769 assert(Candidate->Instrs.size() == 1);
1770 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1775 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
1776 for (MachineInstr *MI : MergeBaseCandidates)
1777 MergeBaseUpdateLSDouble(*MI);
1778 MergeBaseCandidates.clear();
1783 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
1784 /// into the preceding stack restore so it directly restore the value of LR
1786 /// ldmfd sp!, {..., lr}
1789 /// ldmfd sp!, {..., lr}
1792 /// ldmfd sp!, {..., pc}
1793 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1794 // Thumb1 LDM doesn't allow high registers.
1795 if (isThumb1) return false;
1796 if (MBB.empty()) return false;
1798 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1799 if (MBBI != MBB.begin() &&
1800 (MBBI->getOpcode() == ARM::BX_RET ||
1801 MBBI->getOpcode() == ARM::tBX_RET ||
1802 MBBI->getOpcode() == ARM::MOVPCLR)) {
1803 MachineInstr *PrevMI = std::prev(MBBI);
1804 unsigned Opcode = PrevMI->getOpcode();
1805 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1806 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1807 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1808 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1809 if (MO.getReg() != ARM::LR)
1811 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1812 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1813 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1814 PrevMI->setDesc(TII->get(NewOpc));
1816 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1824 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1826 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1827 TL = STI->getTargetLowering();
1828 AFI = Fn.getInfo<ARMFunctionInfo>();
1829 TII = STI->getInstrInfo();
1830 TRI = STI->getRegisterInfo();
1832 RegClassInfoValid = false;
1833 isThumb2 = AFI->isThumb2Function();
1834 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1836 bool Modified = false;
1837 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1839 MachineBasicBlock &MBB = *MFI;
1840 Modified |= LoadStoreMultipleOpti(MBB);
1841 if (STI->hasV5TOps())
1842 Modified |= MergeReturnIntoLDM(MBB);
1845 Allocator.DestroyAll();
1850 void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
1853 #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
1854 "ARM pre- register allocation load / store optimization pass"
1857 /// Pre- register allocation pass that move load / stores from consecutive
1858 /// locations close to make it more likely they will be combined later.
1859 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1861 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {
1862 initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry());
1865 const DataLayout *TD;
1866 const TargetInstrInfo *TII;
1867 const TargetRegisterInfo *TRI;
1868 const ARMSubtarget *STI;
1869 MachineRegisterInfo *MRI;
1870 MachineFunction *MF;
1872 bool runOnMachineFunction(MachineFunction &Fn) override;
1874 const char *getPassName() const override {
1875 return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
1879 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1880 unsigned &NewOpc, unsigned &EvenReg,
1881 unsigned &OddReg, unsigned &BaseReg,
1883 unsigned &PredReg, ARMCC::CondCodes &Pred,
1885 bool RescheduleOps(MachineBasicBlock *MBB,
1886 SmallVectorImpl<MachineInstr *> &Ops,
1887 unsigned Base, bool isLd,
1888 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1889 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1891 char ARMPreAllocLoadStoreOpt::ID = 0;
1894 INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt",
1895 ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
1897 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1898 TD = &Fn.getDataLayout();
1899 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1900 TII = STI->getInstrInfo();
1901 TRI = STI->getRegisterInfo();
1902 MRI = &Fn.getRegInfo();
1905 bool Modified = false;
1906 for (MachineBasicBlock &MFI : Fn)
1907 Modified |= RescheduleLoadStoreInstrs(&MFI);
1912 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1913 MachineBasicBlock::iterator I,
1914 MachineBasicBlock::iterator E,
1915 SmallPtrSetImpl<MachineInstr*> &MemOps,
1916 SmallSet<unsigned, 4> &MemRegs,
1917 const TargetRegisterInfo *TRI) {
1918 // Are there stores / loads / calls between them?
1919 // FIXME: This is overly conservative. We should make use of alias information
1921 SmallSet<unsigned, 4> AddedRegPressure;
1923 if (I->isDebugValue() || MemOps.count(&*I))
1925 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1927 if (isLd && I->mayStore())
1932 // It's not safe to move the first 'str' down.
1935 // str r4, [r0, #+4]
1939 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1940 MachineOperand &MO = I->getOperand(j);
1943 unsigned Reg = MO.getReg();
1944 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1946 if (Reg != Base && !MemRegs.count(Reg))
1947 AddedRegPressure.insert(Reg);
1951 // Estimate register pressure increase due to the transformation.
1952 if (MemRegs.size() <= 4)
1953 // Ok if we are moving small number of instructions.
1955 return AddedRegPressure.size() <= MemRegs.size() * 2;
1959 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
1960 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1961 MachineInstr *Op1) {
1962 assert(MI->memoperands_empty() && "expected a new machineinstr");
1963 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1964 + (Op1->memoperands_end() - Op1->memoperands_begin());
1966 MachineFunction *MF = MI->getParent()->getParent();
1967 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1968 MachineSDNode::mmo_iterator MemEnd =
1969 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1971 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1972 MI->setMemRefs(MemBegin, MemEnd);
1976 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1977 DebugLoc &dl, unsigned &NewOpc,
1979 unsigned &SecondReg,
1980 unsigned &BaseReg, int &Offset,
1982 ARMCC::CondCodes &Pred,
1984 // Make sure we're allowed to generate LDRD/STRD.
1985 if (!STI->hasV5TEOps())
1988 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1990 unsigned Opcode = Op0->getOpcode();
1991 if (Opcode == ARM::LDRi12) {
1993 } else if (Opcode == ARM::STRi12) {
1995 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1996 NewOpc = ARM::t2LDRDi8;
1999 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2000 NewOpc = ARM::t2STRDi8;
2007 // Make sure the base address satisfies i64 ld / st alignment requirement.
2008 // At the moment, we ignore the memoryoperand's value.
2009 // If we want to use AliasAnalysis, we should check it accordingly.
2010 if (!Op0->hasOneMemOperand() ||
2011 (*Op0->memoperands_begin())->isVolatile())
2014 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
2015 const Function *Func = MF->getFunction();
2016 unsigned ReqAlign = STI->hasV6Ops()
2017 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
2018 : 8; // Pre-v6 need 8-byte align
2019 if (Align < ReqAlign)
2022 // Then make sure the immediate offset fits.
2023 int OffImm = getMemoryOpOffset(Op0);
2025 int Limit = (1 << 8) * Scale;
2026 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2030 ARM_AM::AddrOpc AddSub = ARM_AM::add;
2032 AddSub = ARM_AM::sub;
2035 int Limit = (1 << 8) * Scale;
2036 if (OffImm >= Limit || (OffImm & (Scale-1)))
2038 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2040 FirstReg = Op0->getOperand(0).getReg();
2041 SecondReg = Op1->getOperand(0).getReg();
2042 if (FirstReg == SecondReg)
2044 BaseReg = Op0->getOperand(1).getReg();
2045 Pred = getInstrPredicate(Op0, PredReg);
2046 dl = Op0->getDebugLoc();
2050 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2051 SmallVectorImpl<MachineInstr *> &Ops,
2052 unsigned Base, bool isLd,
2053 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2054 bool RetVal = false;
2056 // Sort by offset (in reverse order).
2057 std::sort(Ops.begin(), Ops.end(),
2058 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2059 int LOffset = getMemoryOpOffset(LHS);
2060 int ROffset = getMemoryOpOffset(RHS);
2061 assert(LHS == RHS || LOffset != ROffset);
2062 return LOffset > ROffset;
2065 // The loads / stores of the same base are in order. Scan them from first to
2066 // last and check for the following:
2067 // 1. Any def of base.
2069 while (Ops.size() > 1) {
2070 unsigned FirstLoc = ~0U;
2071 unsigned LastLoc = 0;
2072 MachineInstr *FirstOp = nullptr;
2073 MachineInstr *LastOp = nullptr;
2075 unsigned LastOpcode = 0;
2076 unsigned LastBytes = 0;
2077 unsigned NumMove = 0;
2078 for (int i = Ops.size() - 1; i >= 0; --i) {
2079 MachineInstr *Op = Ops[i];
2080 unsigned Loc = MI2LocMap[Op];
2081 if (Loc <= FirstLoc) {
2085 if (Loc >= LastLoc) {
2091 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2092 if (LastOpcode && LSMOpcode != LastOpcode)
2095 int Offset = getMemoryOpOffset(Op);
2096 unsigned Bytes = getLSMultipleTransferSize(Op);
2098 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2101 LastOffset = Offset;
2103 LastOpcode = LSMOpcode;
2104 if (++NumMove == 8) // FIXME: Tune this limit.
2111 SmallPtrSet<MachineInstr*, 4> MemOps;
2112 SmallSet<unsigned, 4> MemRegs;
2113 for (int i = NumMove-1; i >= 0; --i) {
2114 MemOps.insert(Ops[i]);
2115 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2118 // Be conservative, if the instructions are too far apart, don't
2119 // move them. We want to limit the increase of register pressure.
2120 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2122 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2123 MemOps, MemRegs, TRI);
2125 for (unsigned i = 0; i != NumMove; ++i)
2128 // This is the new location for the loads / stores.
2129 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2130 while (InsertPos != MBB->end()
2131 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2134 // If we are moving a pair of loads / stores, see if it makes sense
2135 // to try to allocate a pair of registers that can form register pairs.
2136 MachineInstr *Op0 = Ops.back();
2137 MachineInstr *Op1 = Ops[Ops.size()-2];
2138 unsigned FirstReg = 0, SecondReg = 0;
2139 unsigned BaseReg = 0, PredReg = 0;
2140 ARMCC::CondCodes Pred = ARMCC::AL;
2142 unsigned NewOpc = 0;
2145 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2146 FirstReg, SecondReg, BaseReg,
2147 Offset, PredReg, Pred, isT2)) {
2151 const MCInstrDesc &MCID = TII->get(NewOpc);
2152 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2153 MRI->constrainRegClass(FirstReg, TRC);
2154 MRI->constrainRegClass(SecondReg, TRC);
2156 // Form the pair instruction.
2158 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2159 .addReg(FirstReg, RegState::Define)
2160 .addReg(SecondReg, RegState::Define)
2162 // FIXME: We're converting from LDRi12 to an insn that still
2163 // uses addrmode2, so we need an explicit offset reg. It should
2164 // always by reg0 since we're transforming LDRi12s.
2167 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2168 concatenateMemOperands(MIB, Op0, Op1);
2169 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2172 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2176 // FIXME: We're converting from LDRi12 to an insn that still
2177 // uses addrmode2, so we need an explicit offset reg. It should
2178 // always by reg0 since we're transforming STRi12s.
2181 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2182 concatenateMemOperands(MIB, Op0, Op1);
2183 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2190 // Add register allocation hints to form register pairs.
2191 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2192 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2195 for (unsigned i = 0; i != NumMove; ++i) {
2196 MachineInstr *Op = Ops.back();
2198 MBB->splice(InsertPos, MBB, Op);
2202 NumLdStMoved += NumMove;
2212 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2213 bool RetVal = false;
2215 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2216 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2217 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2218 SmallVector<unsigned, 4> LdBases;
2219 SmallVector<unsigned, 4> StBases;
2222 MachineBasicBlock::iterator MBBI = MBB->begin();
2223 MachineBasicBlock::iterator E = MBB->end();
2225 for (; MBBI != E; ++MBBI) {
2226 MachineInstr *MI = MBBI;
2227 if (MI->isCall() || MI->isTerminator()) {
2228 // Stop at barriers.
2233 if (!MI->isDebugValue())
2234 MI2LocMap[MI] = ++Loc;
2236 if (!isMemoryOp(MI))
2238 unsigned PredReg = 0;
2239 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2242 int Opc = MI->getOpcode();
2243 bool isLd = isLoadSingle(Opc);
2244 unsigned Base = MI->getOperand(1).getReg();
2245 int Offset = getMemoryOpOffset(MI);
2247 bool StopHere = false;
2249 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2250 Base2LdsMap.find(Base);
2251 if (BI != Base2LdsMap.end()) {
2252 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2253 if (Offset == getMemoryOpOffset(BI->second[i])) {
2259 BI->second.push_back(MI);
2261 Base2LdsMap[Base].push_back(MI);
2262 LdBases.push_back(Base);
2265 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2266 Base2StsMap.find(Base);
2267 if (BI != Base2StsMap.end()) {
2268 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2269 if (Offset == getMemoryOpOffset(BI->second[i])) {
2275 BI->second.push_back(MI);
2277 Base2StsMap[Base].push_back(MI);
2278 StBases.push_back(Base);
2283 // Found a duplicate (a base+offset combination that's seen earlier).
2290 // Re-schedule loads.
2291 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2292 unsigned Base = LdBases[i];
2293 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2295 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2298 // Re-schedule stores.
2299 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2300 unsigned Base = StBases[i];
2301 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2303 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2307 Base2LdsMap.clear();
2308 Base2StsMap.clear();
2318 /// Returns an instance of the load / store optimization pass.
2319 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2321 return new ARMPreAllocLoadStoreOpt();
2322 return new ARMLoadStoreOpt();