1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file This file contains a pass that performs load / store related peephole
11 /// optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterClassInfo.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/CodeGen/LivePhysRegs.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/Support/Allocator.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Target/TargetInstrInfo.h"
45 #include "llvm/Target/TargetMachine.h"
46 #include "llvm/Target/TargetRegisterInfo.h"
49 #define DEBUG_TYPE "arm-ldst-opt"
51 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
52 STATISTIC(NumSTMGened , "Number of stm instructions generated");
53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
64 void initializeARMLoadStoreOptPass(PassRegistry &);
67 #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
70 /// Post- register allocation pass the combine load / store instructions to
71 /// form ldm / stm instructions.
72 struct ARMLoadStoreOpt : public MachineFunctionPass {
74 ARMLoadStoreOpt() : MachineFunctionPass(ID) {
75 initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry());
78 const MachineFunction *MF;
79 const TargetInstrInfo *TII;
80 const TargetRegisterInfo *TRI;
81 const ARMSubtarget *STI;
82 const TargetLowering *TL;
84 LivePhysRegs LiveRegs;
85 RegisterClassInfo RegClassInfo;
86 MachineBasicBlock::const_iterator LiveRegPos;
88 bool RegClassInfoValid;
89 bool isThumb1, isThumb2;
91 bool runOnMachineFunction(MachineFunction &Fn) override;
93 const char *getPassName() const override {
94 return ARM_LOAD_STORE_OPT_NAME;
98 /// A set of load/store MachineInstrs with same base register sorted by
100 struct MemOpQueueEntry {
102 int Offset; ///< Load/Store offset.
103 unsigned Position; ///< Position as counted from end of basic block.
104 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
105 : MI(MI), Offset(Offset), Position(Position) {}
107 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
109 /// A set of MachineInstrs that fulfill (nearly all) conditions to get
110 /// merged into a LDM/STM.
111 struct MergeCandidate {
112 /// List of instructions ordered by load/store offset.
113 SmallVector<MachineInstr*, 4> Instrs;
114 /// Index in Instrs of the instruction being latest in the schedule.
115 unsigned LatestMIIdx;
116 /// Index in Instrs of the instruction being earliest in the schedule.
117 unsigned EarliestMIIdx;
118 /// Index into the basic block where the merged instruction will be
119 /// inserted. (See MemOpQueueEntry.Position)
121 /// Whether the instructions can be merged into a ldm/stm instruction.
122 bool CanMergeToLSMulti;
123 /// Whether the instructions can be merged into a ldrd/strd instruction.
124 bool CanMergeToLSDouble;
126 SpecificBumpPtrAllocator<MergeCandidate> Allocator;
127 SmallVector<const MergeCandidate*,4> Candidates;
128 SmallVector<MachineInstr*,4> MergeBaseCandidates;
130 void moveLiveRegsBefore(const MachineBasicBlock &MBB,
131 MachineBasicBlock::const_iterator Before);
132 unsigned findFreeReg(const TargetRegisterClass &RegClass);
133 void UpdateBaseRegUses(MachineBasicBlock &MBB,
134 MachineBasicBlock::iterator MBBI,
135 DebugLoc DL, unsigned Base, unsigned WordOffset,
136 ARMCC::CondCodes Pred, unsigned PredReg);
137 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
138 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
139 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
140 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
141 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
142 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
143 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
144 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
145 void FormCandidates(const MemOpQueue &MemOps);
146 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
147 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
148 MachineBasicBlock::iterator &MBBI);
149 bool MergeBaseUpdateLoadStore(MachineInstr *MI);
150 bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
151 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
152 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
153 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
154 bool CombineMovBx(MachineBasicBlock &MBB);
156 char ARMLoadStoreOpt::ID = 0;
159 INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false)
161 static bool definesCPSR(const MachineInstr *MI) {
162 for (const auto &MO : MI->operands()) {
165 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
166 // If the instruction has live CPSR def, then it's not safe to fold it
167 // into load / store.
174 static int getMemoryOpOffset(const MachineInstr *MI) {
175 unsigned Opcode = MI->getOpcode();
176 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
177 unsigned NumOperands = MI->getDesc().getNumOperands();
178 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
180 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
181 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
182 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
183 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
186 // Thumb1 immediate offsets are scaled by 4
187 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
188 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
191 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
192 : ARM_AM::getAM5Offset(OffField) * 4;
193 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
194 : ARM_AM::getAM5Op(OffField);
196 if (Op == ARM_AM::sub)
202 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
203 return MI.getOperand(1);
206 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
207 return MI.getOperand(0);
210 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
212 default: llvm_unreachable("Unhandled opcode!");
216 default: llvm_unreachable("Unhandled submode!");
217 case ARM_AM::ia: return ARM::LDMIA;
218 case ARM_AM::da: return ARM::LDMDA;
219 case ARM_AM::db: return ARM::LDMDB;
220 case ARM_AM::ib: return ARM::LDMIB;
225 default: llvm_unreachable("Unhandled submode!");
226 case ARM_AM::ia: return ARM::STMIA;
227 case ARM_AM::da: return ARM::STMDA;
228 case ARM_AM::db: return ARM::STMDB;
229 case ARM_AM::ib: return ARM::STMIB;
233 // tLDMIA is writeback-only - unless the base register is in the input
237 default: llvm_unreachable("Unhandled submode!");
238 case ARM_AM::ia: return ARM::tLDMIA;
242 // There is no non-writeback tSTMIA either.
245 default: llvm_unreachable("Unhandled submode!");
246 case ARM_AM::ia: return ARM::tSTMIA_UPD;
252 default: llvm_unreachable("Unhandled submode!");
253 case ARM_AM::ia: return ARM::t2LDMIA;
254 case ARM_AM::db: return ARM::t2LDMDB;
260 default: llvm_unreachable("Unhandled submode!");
261 case ARM_AM::ia: return ARM::t2STMIA;
262 case ARM_AM::db: return ARM::t2STMDB;
267 default: llvm_unreachable("Unhandled submode!");
268 case ARM_AM::ia: return ARM::VLDMSIA;
269 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
274 default: llvm_unreachable("Unhandled submode!");
275 case ARM_AM::ia: return ARM::VSTMSIA;
276 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
281 default: llvm_unreachable("Unhandled submode!");
282 case ARM_AM::ia: return ARM::VLDMDIA;
283 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
288 default: llvm_unreachable("Unhandled submode!");
289 case ARM_AM::ia: return ARM::VSTMDIA;
290 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
295 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
297 default: llvm_unreachable("Unhandled opcode!");
304 case ARM::tLDMIA_UPD:
305 case ARM::tSTMIA_UPD:
306 case ARM::t2LDMIA_RET:
308 case ARM::t2LDMIA_UPD:
310 case ARM::t2STMIA_UPD:
312 case ARM::VLDMSIA_UPD:
314 case ARM::VSTMSIA_UPD:
316 case ARM::VLDMDIA_UPD:
318 case ARM::VSTMDIA_UPD:
332 case ARM::t2LDMDB_UPD:
334 case ARM::t2STMDB_UPD:
335 case ARM::VLDMSDB_UPD:
336 case ARM::VSTMSDB_UPD:
337 case ARM::VLDMDDB_UPD:
338 case ARM::VSTMDDB_UPD:
349 static bool isT1i32Load(unsigned Opc) {
350 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
353 static bool isT2i32Load(unsigned Opc) {
354 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
357 static bool isi32Load(unsigned Opc) {
358 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
361 static bool isT1i32Store(unsigned Opc) {
362 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
365 static bool isT2i32Store(unsigned Opc) {
366 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
369 static bool isi32Store(unsigned Opc) {
370 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
373 static bool isLoadSingle(unsigned Opc) {
374 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
377 static unsigned getImmScale(unsigned Opc) {
379 default: llvm_unreachable("Unhandled opcode!");
394 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
395 switch (MI->getOpcode()) {
422 case ARM::tLDMIA_UPD:
423 case ARM::tSTMIA_UPD:
430 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
433 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
437 /// Update future uses of the base register with the offset introduced
438 /// due to writeback. This function only works on Thumb1.
440 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
441 MachineBasicBlock::iterator MBBI,
442 DebugLoc DL, unsigned Base,
444 ARMCC::CondCodes Pred, unsigned PredReg) {
445 assert(isThumb1 && "Can only update base register uses for Thumb1!");
446 // Start updating any instructions with immediate offsets. Insert a SUB before
447 // the first non-updateable instruction (if any).
448 for (; MBBI != MBB.end(); ++MBBI) {
449 bool InsertSub = false;
450 unsigned Opc = MBBI->getOpcode();
452 if (MBBI->readsRegister(Base)) {
455 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
457 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
459 if (IsLoad || IsStore) {
460 // Loads and stores with immediate offsets can be updated, but only if
461 // the new offset isn't negative.
462 // The MachineOperand containing the offset immediate is the last one
463 // before predicates.
465 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
466 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
467 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
469 // If storing the base register, it needs to be reset first.
470 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
472 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
477 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
478 !definesCPSR(MBBI)) {
479 // SUBS/ADDS using this register, with a dead def of the CPSR.
480 // Merge it with the update; if the merged offset is too large,
481 // insert a new sub instead.
483 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
484 Offset = (Opc == ARM::tSUBi8) ?
485 MO.getImm() + WordOffset * 4 :
486 MO.getImm() - WordOffset * 4 ;
487 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
488 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
491 // The base register has now been reset, so exit early.
498 // Can't update the instruction.
502 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
503 // Since SUBS sets the condition flags, we can't place the base reset
504 // after an instruction that has a live CPSR def.
505 // The base register might also contain an argument for a function call.
510 // An instruction above couldn't be updated, so insert a sub.
511 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
512 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
516 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
517 // Register got killed. Stop updating.
521 // End of block was reached.
522 if (MBB.succ_size() > 0) {
523 // FIXME: Because of a bug, live registers are sometimes missing from
524 // the successor blocks' live-in sets. This means we can't trust that
525 // information and *always* have to reset at the end of a block.
527 if (MBBI != MBB.end()) --MBBI;
529 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
530 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
534 /// Return the first register of class \p RegClass that is not in \p Regs.
535 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
536 if (!RegClassInfoValid) {
537 RegClassInfo.runOnMachineFunction(*MF);
538 RegClassInfoValid = true;
541 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
542 if (!LiveRegs.contains(Reg))
547 /// Compute live registers just before instruction \p Before (in normal schedule
548 /// direction). Computes backwards so multiple queries in the same block must
549 /// come in reverse order.
550 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
551 MachineBasicBlock::const_iterator Before) {
552 // Initialize if we never queried in this block.
553 if (!LiveRegsValid) {
555 LiveRegs.addLiveOuts(&MBB, true);
556 LiveRegPos = MBB.end();
557 LiveRegsValid = true;
559 // Move backward just before the "Before" position.
560 while (LiveRegPos != Before) {
562 LiveRegs.stepBackward(*LiveRegPos);
566 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
568 for (const std::pair<unsigned, bool> &R : Regs)
574 /// Create and insert a LDM or STM with Base as base register and registers in
575 /// Regs as the register operands that would be loaded / stored. It returns
576 /// true if the transformation is done.
577 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
578 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
579 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
580 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
581 unsigned NumRegs = Regs.size();
584 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
585 // Compute liveness information for that register to make the decision.
586 bool SafeToClobberCPSR = !isThumb1 ||
587 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
588 MachineBasicBlock::LQR_Dead);
590 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
592 // Exception: If the base register is in the input reglist, Thumb1 LDM is
594 // It's also not possible to merge an STR of the base register in Thumb1.
595 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
596 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
597 if (Opcode == ARM::tLDRi) {
599 } else if (Opcode == ARM::tSTRi) {
604 ARM_AM::AMSubMode Mode = ARM_AM::ia;
605 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
606 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
607 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
609 if (Offset == 4 && haveIBAndDA) {
611 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
613 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
614 // VLDM/VSTM do not support DB mode without also updating the base reg.
616 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
617 // Check if this is a supported opcode before inserting instructions to
618 // calculate a new base register.
619 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
621 // If starting offset isn't zero, insert a MI to materialize a new base.
622 // But only do so if it is cost effective, i.e. merging more than two
627 // On Thumb1, it's not worth materializing a new base register without
628 // clobbering the CPSR (i.e. not using ADDS/SUBS).
629 if (!SafeToClobberCPSR)
633 if (isi32Load(Opcode)) {
634 // If it is a load, then just use one of the destination registers
635 // as the new base. Will no longer be writeback in Thumb1.
636 NewBase = Regs[NumRegs-1].first;
639 // Find a free register that we can use as scratch register.
640 moveLiveRegsBefore(MBB, InsertBefore);
641 // The merged instruction does not exist yet but will use several Regs if
643 if (!isLoadSingle(Opcode))
644 for (const std::pair<unsigned, bool> &R : Regs)
645 LiveRegs.addReg(R.first);
647 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
653 isThumb2 ? ARM::t2ADDri :
654 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
655 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
656 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
661 isThumb2 ? ARM::t2SUBri :
662 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
663 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
666 if (!TL->isLegalAddImmediate(Offset))
667 // FIXME: Try add with register operand?
668 return nullptr; // Probably not worth it then.
670 // We can only append a kill flag to the add/sub input if the value is not
671 // used in the register list of the stm as well.
672 bool KillOldBase = BaseKill &&
673 (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
676 // Thumb1: depending on immediate size, use either
677 // ADDS NewBase, Base, #imm3
680 // ADDS NewBase, #imm8.
681 if (Base != NewBase &&
682 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
683 // Need to insert a MOV to the new base first.
684 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
686 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
687 if (Pred != ARMCC::AL)
689 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
690 .addReg(Base, getKillRegState(KillOldBase));
692 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
693 .addReg(Base, getKillRegState(KillOldBase))
694 .addImm(Pred).addReg(PredReg);
696 // The following ADDS/SUBS becomes an update.
700 if (BaseOpc == ARM::tADDrSPi) {
701 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
702 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
703 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
704 .addImm(Pred).addReg(PredReg);
707 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
708 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
709 .addImm(Pred).addReg(PredReg);
711 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
712 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
713 .addImm(Pred).addReg(PredReg).addReg(0);
716 BaseKill = true; // New base is always killed straight away.
719 bool isDef = isLoadSingle(Opcode);
721 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
722 // base register writeback.
723 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
727 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
728 // - There is no writeback (LDM of base register),
729 // - the base register is killed by the merged instruction,
730 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
731 // to reset the base register.
732 // Otherwise, don't merge.
733 // It's safe to return here since the code to materialize a new base register
734 // above is also conditional on SafeToClobberCPSR.
735 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
738 MachineInstrBuilder MIB;
741 assert(isThumb1 && "expected Writeback only inThumb1");
742 if (Opcode == ARM::tLDMIA) {
743 assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
744 // Update tLDMIA with writeback if necessary.
745 Opcode = ARM::tLDMIA_UPD;
748 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
750 // Thumb1: we might need to set base writeback when building the MI.
751 MIB.addReg(Base, getDefRegState(true))
752 .addReg(Base, getKillRegState(BaseKill));
754 // The base isn't dead after a merged instruction with writeback.
755 // Insert a sub instruction after the newly formed instruction to reset.
757 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
760 // No writeback, simply build the MachineInstr.
761 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
762 MIB.addReg(Base, getKillRegState(BaseKill));
765 MIB.addImm(Pred).addReg(PredReg);
767 for (const std::pair<unsigned, bool> &R : Regs)
768 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
770 return MIB.getInstr();
773 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
774 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
775 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
776 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
777 bool IsLoad = isi32Load(Opcode);
778 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
779 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
781 assert(Regs.size() == 2);
782 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
783 TII->get(LoadStoreOpcode));
785 MIB.addReg(Regs[0].first, RegState::Define)
786 .addReg(Regs[1].first, RegState::Define);
788 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
789 .addReg(Regs[1].first, getKillRegState(Regs[1].second));
791 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
792 return MIB.getInstr();
795 /// Call MergeOps and update MemOps and merges accordingly on success.
796 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
797 const MachineInstr *First = Cand.Instrs.front();
798 unsigned Opcode = First->getOpcode();
799 bool IsLoad = isLoadSingle(Opcode);
800 SmallVector<std::pair<unsigned, bool>, 8> Regs;
801 SmallVector<unsigned, 4> ImpDefs;
802 DenseSet<unsigned> KilledRegs;
803 DenseSet<unsigned> UsedRegs;
804 // Determine list of registers and list of implicit super-register defs.
805 for (const MachineInstr *MI : Cand.Instrs) {
806 const MachineOperand &MO = getLoadStoreRegOp(*MI);
807 unsigned Reg = MO.getReg();
808 bool IsKill = MO.isKill();
810 KilledRegs.insert(Reg);
811 Regs.push_back(std::make_pair(Reg, IsKill));
812 UsedRegs.insert(Reg);
815 // Collect any implicit defs of super-registers, after merging we can't
816 // be sure anymore that we properly preserved these live ranges and must
817 // removed these implicit operands.
818 for (const MachineOperand &MO : MI->implicit_operands()) {
819 if (!MO.isReg() || !MO.isDef() || MO.isDead())
821 assert(MO.isImplicit());
822 unsigned DefReg = MO.getReg();
824 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
826 // We can ignore cases where the super-reg is read and written.
827 if (MI->readsRegister(DefReg))
829 ImpDefs.push_back(DefReg);
834 // Attempt the merge.
835 typedef MachineBasicBlock::iterator iterator;
836 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
837 iterator InsertBefore = std::next(iterator(LatestMI));
838 MachineBasicBlock &MBB = *LatestMI->getParent();
839 unsigned Offset = getMemoryOpOffset(First);
840 unsigned Base = getLoadStoreBaseOp(*First).getReg();
841 bool BaseKill = LatestMI->killsRegister(Base);
842 unsigned PredReg = 0;
843 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
844 DebugLoc DL = First->getDebugLoc();
845 MachineInstr *Merged = nullptr;
846 if (Cand.CanMergeToLSDouble)
847 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
848 Opcode, Pred, PredReg, DL, Regs);
849 if (!Merged && Cand.CanMergeToLSMulti)
850 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
851 Opcode, Pred, PredReg, DL, Regs);
855 // Determine earliest instruction that will get removed. We then keep an
856 // iterator just above it so the following erases don't invalidated it.
857 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
858 bool EarliestAtBegin = false;
859 if (EarliestI == MBB.begin()) {
860 EarliestAtBegin = true;
862 EarliestI = std::prev(EarliestI);
865 // Remove instructions which have been merged.
866 for (MachineInstr *MI : Cand.Instrs)
869 // Determine range between the earliest removed instruction and the new one.
871 EarliestI = MBB.begin();
873 EarliestI = std::next(EarliestI);
874 auto FixupRange = make_range(EarliestI, iterator(Merged));
876 if (isLoadSingle(Opcode)) {
877 // If the previous loads defined a super-reg, then we have to mark earlier
878 // operands undef; Replicate the super-reg def on the merged instruction.
879 for (MachineInstr &MI : FixupRange) {
880 for (unsigned &ImpDefReg : ImpDefs) {
881 for (MachineOperand &MO : MI.implicit_operands()) {
882 if (!MO.isReg() || MO.getReg() != ImpDefReg)
892 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
893 for (unsigned ImpDef : ImpDefs)
894 MIB.addReg(ImpDef, RegState::ImplicitDefine);
896 // Remove kill flags: We are possibly storing the values later now.
897 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
898 for (MachineInstr &MI : FixupRange) {
899 for (MachineOperand &MO : MI.uses()) {
900 if (!MO.isReg() || !MO.isKill())
902 if (UsedRegs.count(MO.getReg()))
906 assert(ImpDefs.empty());
912 static bool isValidLSDoubleOffset(int Offset) {
913 unsigned Value = abs(Offset);
914 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
916 return (Value % 4) == 0 && Value < 1024;
919 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
920 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
921 const MachineInstr *FirstMI = MemOps[0].MI;
922 unsigned Opcode = FirstMI->getOpcode();
923 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
924 unsigned Size = getLSMultipleTransferSize(FirstMI);
927 unsigned EIndex = MemOps.size();
929 // Look at the first instruction.
930 const MachineInstr *MI = MemOps[SIndex].MI;
931 int Offset = MemOps[SIndex].Offset;
932 const MachineOperand &PMO = getLoadStoreRegOp(*MI);
933 unsigned PReg = PMO.getReg();
934 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
935 unsigned Latest = SIndex;
936 unsigned Earliest = SIndex;
938 bool CanMergeToLSDouble =
939 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
940 // ARM errata 602117: LDRD with base in list may result in incorrect base
941 // register when interrupted or faulted.
942 if (STI->isCortexM3() && isi32Load(Opcode) &&
943 PReg == getLoadStoreBaseOp(*MI).getReg())
944 CanMergeToLSDouble = false;
946 bool CanMergeToLSMulti = true;
947 // On swift vldm/vstm starting with an odd register number as that needs
948 // more uops than single vldrs.
949 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
950 CanMergeToLSMulti = false;
952 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
953 // deprecated; LDM to PC is fine but cannot happen here.
954 if (PReg == ARM::SP || PReg == ARM::PC)
955 CanMergeToLSMulti = CanMergeToLSDouble = false;
957 // Merge following instructions where possible.
958 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
959 int NewOffset = MemOps[I].Offset;
960 if (NewOffset != Offset + (int)Size)
962 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
963 unsigned Reg = MO.getReg();
964 if (Reg == ARM::SP || Reg == ARM::PC)
967 // See if the current load/store may be part of a multi load/store.
968 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
969 bool PartOfLSMulti = CanMergeToLSMulti;
971 // Register numbers must be in ascending order.
972 if (RegNum <= PRegNum)
973 PartOfLSMulti = false;
974 // For VFP / NEON load/store multiples, the registers must be
975 // consecutive and within the limit on the number of registers per
977 else if (!isNotVFP && RegNum != PRegNum+1)
978 PartOfLSMulti = false;
980 // See if the current load/store may be part of a double load/store.
981 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
983 if (!PartOfLSMulti && !PartOfLSDouble)
985 CanMergeToLSMulti &= PartOfLSMulti;
986 CanMergeToLSDouble &= PartOfLSDouble;
987 // Track MemOp with latest and earliest position (Positions are
988 // counted in reverse).
989 unsigned Position = MemOps[I].Position;
990 if (Position < MemOps[Latest].Position)
992 else if (Position > MemOps[Earliest].Position)
994 // Prepare for next MemOp.
999 // Form a candidate from the Ops collected so far.
1000 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
1001 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
1002 Candidate->Instrs.push_back(MemOps[C].MI);
1003 Candidate->LatestMIIdx = Latest - SIndex;
1004 Candidate->EarliestMIIdx = Earliest - SIndex;
1005 Candidate->InsertPos = MemOps[Latest].Position;
1007 CanMergeToLSMulti = CanMergeToLSDouble = false;
1008 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1009 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1010 Candidates.push_back(Candidate);
1011 // Continue after the chain.
1013 } while (SIndex < EIndex);
1016 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1017 ARM_AM::AMSubMode Mode) {
1019 default: llvm_unreachable("Unhandled opcode!");
1025 default: llvm_unreachable("Unhandled submode!");
1026 case ARM_AM::ia: return ARM::LDMIA_UPD;
1027 case ARM_AM::ib: return ARM::LDMIB_UPD;
1028 case ARM_AM::da: return ARM::LDMDA_UPD;
1029 case ARM_AM::db: return ARM::LDMDB_UPD;
1036 default: llvm_unreachable("Unhandled submode!");
1037 case ARM_AM::ia: return ARM::STMIA_UPD;
1038 case ARM_AM::ib: return ARM::STMIB_UPD;
1039 case ARM_AM::da: return ARM::STMDA_UPD;
1040 case ARM_AM::db: return ARM::STMDB_UPD;
1045 default: llvm_unreachable("Unhandled submode!");
1046 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1047 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1052 default: llvm_unreachable("Unhandled submode!");
1053 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1054 case ARM_AM::db: return ARM::t2STMDB_UPD;
1058 default: llvm_unreachable("Unhandled submode!");
1059 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1060 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1064 default: llvm_unreachable("Unhandled submode!");
1065 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1066 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1070 default: llvm_unreachable("Unhandled submode!");
1071 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1072 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1076 default: llvm_unreachable("Unhandled submode!");
1077 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1078 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1083 /// Check if the given instruction increments or decrements a register and
1084 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
1085 /// generated by the instruction are possibly read as well.
1086 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
1087 ARMCC::CondCodes Pred, unsigned PredReg) {
1090 switch (MI.getOpcode()) {
1091 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
1092 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
1094 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
1096 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
1097 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
1098 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
1103 if (MI.getOperand(0).getReg() != Reg ||
1104 MI.getOperand(1).getReg() != Reg ||
1105 getInstrPredicate(&MI, MIPredReg) != Pred ||
1106 MIPredReg != PredReg)
1109 if (CheckCPSRDef && definesCPSR(&MI))
1111 return MI.getOperand(2).getImm() * Scale;
1114 /// Searches for an increment or decrement of \p Reg before \p MBBI.
1115 static MachineBasicBlock::iterator
1116 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
1117 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1119 MachineBasicBlock &MBB = *MBBI->getParent();
1120 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1121 MachineBasicBlock::iterator EndMBBI = MBB.end();
1122 if (MBBI == BeginMBBI)
1125 // Skip debug values.
1126 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1127 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
1130 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
1131 return Offset == 0 ? EndMBBI : PrevMBBI;
1134 /// Searches for a increment or decrement of \p Reg after \p MBBI.
1135 static MachineBasicBlock::iterator
1136 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
1137 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1139 MachineBasicBlock &MBB = *MBBI->getParent();
1140 MachineBasicBlock::iterator EndMBBI = MBB.end();
1141 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1142 // Skip debug values.
1143 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1145 if (NextMBBI == EndMBBI)
1148 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
1149 return Offset == 0 ? EndMBBI : NextMBBI;
1152 /// Fold proceeding/trailing inc/dec of base register into the
1153 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1155 /// stmia rn, <ra, rb, rc>
1156 /// rn := rn + 4 * 3;
1158 /// stmia rn!, <ra, rb, rc>
1160 /// rn := rn - 4 * 3;
1161 /// ldmia rn, <ra, rb, rc>
1163 /// ldmdb rn!, <ra, rb, rc>
1164 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1165 // Thumb1 is already using updating loads/stores.
1166 if (isThumb1) return false;
1168 const MachineOperand &BaseOP = MI->getOperand(0);
1169 unsigned Base = BaseOP.getReg();
1170 bool BaseKill = BaseOP.isKill();
1171 unsigned PredReg = 0;
1172 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1173 unsigned Opcode = MI->getOpcode();
1174 DebugLoc DL = MI->getDebugLoc();
1176 // Can't use an updating ld/st if the base register is also a dest
1177 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1178 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1179 if (MI->getOperand(i).getReg() == Base)
1182 int Bytes = getLSMultipleTransferSize(MI);
1183 MachineBasicBlock &MBB = *MI->getParent();
1184 MachineBasicBlock::iterator MBBI(MI);
1186 MachineBasicBlock::iterator MergeInstr
1187 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1188 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
1189 if (Mode == ARM_AM::ia && Offset == -Bytes) {
1191 } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
1194 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1195 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
1196 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
1199 MBB.erase(MergeInstr);
1201 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1202 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1203 .addReg(Base, getDefRegState(true)) // WB base register
1204 .addReg(Base, getKillRegState(BaseKill))
1205 .addImm(Pred).addReg(PredReg);
1207 // Transfer the rest of operands.
1208 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1209 MIB.addOperand(MI->getOperand(OpNum));
1211 // Transfer memoperands.
1212 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1218 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1219 ARM_AM::AddrOpc Mode) {
1222 return ARM::LDR_PRE_IMM;
1224 return ARM::STR_PRE_IMM;
1226 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1228 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1230 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1232 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1235 return ARM::t2LDR_PRE;
1238 return ARM::t2STR_PRE;
1239 default: llvm_unreachable("Unhandled opcode!");
1243 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1244 ARM_AM::AddrOpc Mode) {
1247 return ARM::LDR_POST_IMM;
1249 return ARM::STR_POST_IMM;
1251 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1253 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1255 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1257 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1260 return ARM::t2LDR_POST;
1263 return ARM::t2STR_POST;
1264 default: llvm_unreachable("Unhandled opcode!");
1268 /// Fold proceeding/trailing inc/dec of base register into the
1269 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1270 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1271 // Thumb1 doesn't have updating LDR/STR.
1272 // FIXME: Use LDM/STM with single register instead.
1273 if (isThumb1) return false;
1275 unsigned Base = getLoadStoreBaseOp(*MI).getReg();
1276 bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1277 unsigned Opcode = MI->getOpcode();
1278 DebugLoc DL = MI->getDebugLoc();
1279 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1280 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1281 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1282 if (isi32Load(Opcode) || isi32Store(Opcode))
1283 if (MI->getOperand(2).getImm() != 0)
1285 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1288 // Can't do the merge if the destination register is the same as the would-be
1289 // writeback register.
1290 if (MI->getOperand(0).getReg() == Base)
1293 unsigned PredReg = 0;
1294 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1295 int Bytes = getLSMultipleTransferSize(MI);
1296 MachineBasicBlock &MBB = *MI->getParent();
1297 MachineBasicBlock::iterator MBBI(MI);
1299 MachineBasicBlock::iterator MergeInstr
1300 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1302 if (!isAM5 && Offset == Bytes) {
1303 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1304 } else if (Offset == -Bytes) {
1305 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1307 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1308 if (Offset == Bytes) {
1309 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1310 } else if (!isAM5 && Offset == -Bytes) {
1311 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1315 MBB.erase(MergeInstr);
1317 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
1319 bool isLd = isLoadSingle(Opcode);
1321 // VLDM[SD]_UPD, VSTM[SD]_UPD
1322 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1323 // updating load/store-multiple instructions can be used with only one
1325 MachineOperand &MO = MI->getOperand(0);
1326 BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1327 .addReg(Base, getDefRegState(true)) // WB base register
1328 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1329 .addImm(Pred).addReg(PredReg)
1330 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1331 getKillRegState(MO.isKill())));
1334 // LDR_PRE, LDR_POST
1335 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1336 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1337 .addReg(Base, RegState::Define)
1338 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1340 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1341 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1342 .addReg(Base, RegState::Define)
1343 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
1346 // t2LDR_PRE, t2LDR_POST
1347 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1348 .addReg(Base, RegState::Define)
1349 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1352 MachineOperand &MO = MI->getOperand(0);
1353 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1354 // the vestigal zero-reg offset register. When that's fixed, this clause
1355 // can be removed entirely.
1356 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1357 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1358 // STR_PRE, STR_POST
1359 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1360 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1361 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
1363 // t2STR_PRE, t2STR_POST
1364 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1365 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1366 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1374 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
1375 unsigned Opcode = MI.getOpcode();
1376 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1377 "Must have t2STRDi8 or t2LDRDi8");
1378 if (MI.getOperand(3).getImm() != 0)
1381 // Behaviour for writeback is undefined if base register is the same as one
1383 const MachineOperand &BaseOp = MI.getOperand(2);
1384 unsigned Base = BaseOp.getReg();
1385 const MachineOperand &Reg0Op = MI.getOperand(0);
1386 const MachineOperand &Reg1Op = MI.getOperand(1);
1387 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
1391 ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
1392 MachineBasicBlock::iterator MBBI(MI);
1393 MachineBasicBlock &MBB = *MI.getParent();
1395 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
1398 if (Offset == 8 || Offset == -8) {
1399 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1401 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1402 if (Offset == 8 || Offset == -8) {
1403 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1407 MBB.erase(MergeInstr);
1409 DebugLoc DL = MI.getDebugLoc();
1410 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
1411 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1412 MIB.addOperand(Reg0Op).addOperand(Reg1Op)
1413 .addReg(BaseOp.getReg(), RegState::Define);
1415 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1416 MIB.addReg(BaseOp.getReg(), RegState::Define)
1417 .addOperand(Reg0Op).addOperand(Reg1Op);
1419 MIB.addReg(BaseOp.getReg(), RegState::Kill)
1420 .addImm(Offset).addImm(Pred).addReg(PredReg);
1421 assert(TII->get(Opcode).getNumOperands() == 6 &&
1422 TII->get(NewOpc).getNumOperands() == 7 &&
1423 "Unexpected number of operands in Opcode specification.");
1425 // Transfer implicit operands.
1426 for (const MachineOperand &MO : MI.implicit_operands())
1428 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
1434 /// Returns true if instruction is a memory operation that this pass is capable
1435 /// of operating on.
1436 static bool isMemoryOp(const MachineInstr &MI) {
1437 unsigned Opcode = MI.getOpcode();
1457 if (!MI.getOperand(1).isReg())
1460 // When no memory operands are present, conservatively assume unaligned,
1461 // volatile, unfoldable.
1462 if (!MI.hasOneMemOperand())
1465 const MachineMemOperand &MMO = **MI.memoperands_begin();
1467 // Don't touch volatile memory accesses - we may be changing their order.
1468 if (MMO.isVolatile())
1471 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1473 if (MMO.getAlignment() < 4)
1476 // str <undef> could probably be eliminated entirely, but for now we just want
1477 // to avoid making a mess of it.
1478 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1479 if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
1482 // Likewise don't mess with references to undefined addresses.
1483 if (MI.getOperand(1).isUndef())
1489 static void InsertLDR_STR(MachineBasicBlock &MBB,
1490 MachineBasicBlock::iterator &MBBI,
1491 int Offset, bool isDef,
1492 DebugLoc DL, unsigned NewOpc,
1493 unsigned Reg, bool RegDeadKill, bool RegUndef,
1494 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1495 bool OffKill, bool OffUndef,
1496 ARMCC::CondCodes Pred, unsigned PredReg,
1497 const TargetInstrInfo *TII, bool isT2) {
1499 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1501 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1502 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1503 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1505 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1507 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1508 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1509 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1513 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1514 MachineBasicBlock::iterator &MBBI) {
1515 MachineInstr *MI = &*MBBI;
1516 unsigned Opcode = MI->getOpcode();
1517 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1520 const MachineOperand &BaseOp = MI->getOperand(2);
1521 unsigned BaseReg = BaseOp.getReg();
1522 unsigned EvenReg = MI->getOperand(0).getReg();
1523 unsigned OddReg = MI->getOperand(1).getReg();
1524 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1525 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1527 // ARM errata 602117: LDRD with base in list may result in incorrect base
1528 // register when interrupted or faulted.
1529 bool Errata602117 = EvenReg == BaseReg &&
1530 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1531 // ARM LDRD/STRD needs consecutive registers.
1532 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1533 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1535 if (!Errata602117 && !NonConsecutiveRegs)
1538 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1539 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1540 bool EvenDeadKill = isLd ?
1541 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1542 bool EvenUndef = MI->getOperand(0).isUndef();
1543 bool OddDeadKill = isLd ?
1544 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1545 bool OddUndef = MI->getOperand(1).isUndef();
1546 bool BaseKill = BaseOp.isKill();
1547 bool BaseUndef = BaseOp.isUndef();
1548 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1549 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1550 int OffImm = getMemoryOpOffset(MI);
1551 unsigned PredReg = 0;
1552 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1554 if (OddRegNum > EvenRegNum && OffImm == 0) {
1555 // Ascending register numbers and no offset. It's safe to change it to a
1557 unsigned NewOpc = (isLd)
1558 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1559 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1561 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1562 .addReg(BaseReg, getKillRegState(BaseKill))
1563 .addImm(Pred).addReg(PredReg)
1564 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1565 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1568 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1569 .addReg(BaseReg, getKillRegState(BaseKill))
1570 .addImm(Pred).addReg(PredReg)
1572 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1574 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1578 // Split into two instructions.
1579 unsigned NewOpc = (isLd)
1580 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1581 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1582 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1583 // so adjust and use t2LDRi12 here for that.
1584 unsigned NewOpc2 = (isLd)
1585 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1586 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1587 DebugLoc dl = MBBI->getDebugLoc();
1588 // If this is a load and base register is killed, it may have been
1589 // re-defed by the load, make sure the first load does not clobber it.
1591 (BaseKill || OffKill) &&
1592 (TRI->regsOverlap(EvenReg, BaseReg))) {
1593 assert(!TRI->regsOverlap(OddReg, BaseReg));
1594 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1595 OddReg, OddDeadKill, false,
1596 BaseReg, false, BaseUndef, false, OffUndef,
1597 Pred, PredReg, TII, isT2);
1598 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1599 EvenReg, EvenDeadKill, false,
1600 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1601 Pred, PredReg, TII, isT2);
1603 if (OddReg == EvenReg && EvenDeadKill) {
1604 // If the two source operands are the same, the kill marker is
1605 // probably on the first one. e.g.
1606 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1607 EvenDeadKill = false;
1610 // Never kill the base register in the first instruction.
1611 if (EvenReg == BaseReg)
1612 EvenDeadKill = false;
1613 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1614 EvenReg, EvenDeadKill, EvenUndef,
1615 BaseReg, false, BaseUndef, false, OffUndef,
1616 Pred, PredReg, TII, isT2);
1617 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1618 OddReg, OddDeadKill, OddUndef,
1619 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1620 Pred, PredReg, TII, isT2);
1628 MBBI = MBB.erase(MBBI);
1632 /// An optimization pass to turn multiple LDR / STR ops of the same base and
1633 /// incrementing offset into LDM / STM ops.
1634 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1636 unsigned CurrBase = 0;
1637 unsigned CurrOpc = ~0u;
1638 ARMCC::CondCodes CurrPred = ARMCC::AL;
1639 unsigned Position = 0;
1640 assert(Candidates.size() == 0);
1641 assert(MergeBaseCandidates.size() == 0);
1642 LiveRegsValid = false;
1644 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
1646 // The instruction in front of the iterator is the one we look at.
1647 MBBI = std::prev(I);
1648 if (FixInvalidRegPairOp(MBB, MBBI))
1652 if (isMemoryOp(*MBBI)) {
1653 unsigned Opcode = MBBI->getOpcode();
1654 const MachineOperand &MO = MBBI->getOperand(0);
1655 unsigned Reg = MO.getReg();
1656 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
1657 unsigned PredReg = 0;
1658 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1659 int Offset = getMemoryOpOffset(MBBI);
1660 if (CurrBase == 0) {
1661 // Start of a new chain.
1665 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1668 // Note: No need to match PredReg in the next if.
1669 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1671 // r4 := ldr [r0, #8]
1672 // r4 := ldr [r0, #4]
1675 // If a load overrides the base register or a register loaded by
1676 // another load in our chain, we cannot take this instruction.
1677 bool Overlap = false;
1678 if (isLoadSingle(Opcode)) {
1679 Overlap = (Base == Reg);
1681 for (const MemOpQueueEntry &E : MemOps) {
1682 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1691 // Check offset and sort memory operation into the current chain.
1692 if (Offset > MemOps.back().Offset) {
1693 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1696 MemOpQueue::iterator MI, ME;
1697 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1698 if (Offset < MI->Offset) {
1699 // Found a place to insert.
1702 if (Offset == MI->Offset) {
1703 // Collision, abort.
1708 if (MI != MemOps.end()) {
1709 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
1716 // Don't advance the iterator; The op will start a new chain next.
1719 // Fallthrough to look into existing chain.
1720 } else if (MBBI->isDebugValue()) {
1722 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
1723 MBBI->getOpcode() == ARM::t2STRDi8) {
1724 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
1725 // remember them because we may still be able to merge add/sub into them.
1726 MergeBaseCandidates.push_back(MBBI);
1730 // If we are here then the chain is broken; Extract candidates for a merge.
1731 if (MemOps.size() > 0) {
1732 FormCandidates(MemOps);
1733 // Reset for the next chain.
1736 CurrPred = ARMCC::AL;
1740 if (MemOps.size() > 0)
1741 FormCandidates(MemOps);
1743 // Sort candidates so they get processed from end to begin of the basic
1744 // block later; This is necessary for liveness calculation.
1745 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1746 return M0->InsertPos < M1->InsertPos;
1748 std::sort(Candidates.begin(), Candidates.end(), LessThan);
1750 // Go through list of candidates and merge.
1751 bool Changed = false;
1752 for (const MergeCandidate *Candidate : Candidates) {
1753 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1754 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1755 // Merge preceding/trailing base inc/dec into the merged op.
1758 unsigned Opcode = Merged->getOpcode();
1759 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
1760 MergeBaseUpdateLSDouble(*Merged);
1762 MergeBaseUpdateLSMultiple(Merged);
1764 for (MachineInstr *MI : Candidate->Instrs) {
1765 if (MergeBaseUpdateLoadStore(MI))
1770 assert(Candidate->Instrs.size() == 1);
1771 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1776 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
1777 for (MachineInstr *MI : MergeBaseCandidates)
1778 MergeBaseUpdateLSDouble(*MI);
1779 MergeBaseCandidates.clear();
1784 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
1785 /// into the preceding stack restore so it directly restore the value of LR
1787 /// ldmfd sp!, {..., lr}
1790 /// ldmfd sp!, {..., lr}
1793 /// ldmfd sp!, {..., pc}
1794 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1795 // Thumb1 LDM doesn't allow high registers.
1796 if (isThumb1) return false;
1797 if (MBB.empty()) return false;
1799 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1800 if (MBBI != MBB.begin() &&
1801 (MBBI->getOpcode() == ARM::BX_RET ||
1802 MBBI->getOpcode() == ARM::tBX_RET ||
1803 MBBI->getOpcode() == ARM::MOVPCLR)) {
1804 MachineBasicBlock::iterator PrevI = std::prev(MBBI);
1805 // Ignore any DBG_VALUE instructions.
1806 while (PrevI->isDebugValue() && PrevI != MBB.begin())
1808 MachineInstr *PrevMI = PrevI;
1809 unsigned Opcode = PrevMI->getOpcode();
1810 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1811 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1812 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1813 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1814 if (MO.getReg() != ARM::LR)
1816 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1817 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1818 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1819 PrevMI->setDesc(TII->get(NewOpc));
1821 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1829 bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
1830 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1831 if (MBBI == MBB.begin() || MBBI == MBB.end() ||
1832 MBBI->getOpcode() != ARM::tBX_RET)
1835 MachineBasicBlock::iterator Prev = MBBI;
1837 if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
1840 for (auto Use : Prev->uses())
1842 AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
1843 .addReg(Use.getReg(), RegState::Kill))
1844 .copyImplicitOps(&*MBBI);
1850 llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
1853 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1855 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1856 TL = STI->getTargetLowering();
1857 AFI = Fn.getInfo<ARMFunctionInfo>();
1858 TII = STI->getInstrInfo();
1859 TRI = STI->getRegisterInfo();
1861 RegClassInfoValid = false;
1862 isThumb2 = AFI->isThumb2Function();
1863 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1865 bool Modified = false;
1866 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1868 MachineBasicBlock &MBB = *MFI;
1869 Modified |= LoadStoreMultipleOpti(MBB);
1870 if (STI->hasV5TOps())
1871 Modified |= MergeReturnIntoLDM(MBB);
1873 Modified |= CombineMovBx(MBB);
1876 Allocator.DestroyAll();
1881 void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
1884 #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
1885 "ARM pre- register allocation load / store optimization pass"
1888 /// Pre- register allocation pass that move load / stores from consecutive
1889 /// locations close to make it more likely they will be combined later.
1890 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1892 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {
1893 initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry());
1896 const DataLayout *TD;
1897 const TargetInstrInfo *TII;
1898 const TargetRegisterInfo *TRI;
1899 const ARMSubtarget *STI;
1900 MachineRegisterInfo *MRI;
1901 MachineFunction *MF;
1903 bool runOnMachineFunction(MachineFunction &Fn) override;
1905 const char *getPassName() const override {
1906 return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
1910 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1911 unsigned &NewOpc, unsigned &EvenReg,
1912 unsigned &OddReg, unsigned &BaseReg,
1914 unsigned &PredReg, ARMCC::CondCodes &Pred,
1916 bool RescheduleOps(MachineBasicBlock *MBB,
1917 SmallVectorImpl<MachineInstr *> &Ops,
1918 unsigned Base, bool isLd,
1919 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1920 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1922 char ARMPreAllocLoadStoreOpt::ID = 0;
1925 INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt",
1926 ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
1928 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1929 TD = &Fn.getDataLayout();
1930 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1931 TII = STI->getInstrInfo();
1932 TRI = STI->getRegisterInfo();
1933 MRI = &Fn.getRegInfo();
1936 bool Modified = false;
1937 for (MachineBasicBlock &MFI : Fn)
1938 Modified |= RescheduleLoadStoreInstrs(&MFI);
1943 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1944 MachineBasicBlock::iterator I,
1945 MachineBasicBlock::iterator E,
1946 SmallPtrSetImpl<MachineInstr*> &MemOps,
1947 SmallSet<unsigned, 4> &MemRegs,
1948 const TargetRegisterInfo *TRI) {
1949 // Are there stores / loads / calls between them?
1950 // FIXME: This is overly conservative. We should make use of alias information
1952 SmallSet<unsigned, 4> AddedRegPressure;
1954 if (I->isDebugValue() || MemOps.count(&*I))
1956 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1958 if (isLd && I->mayStore())
1963 // It's not safe to move the first 'str' down.
1966 // str r4, [r0, #+4]
1970 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1971 MachineOperand &MO = I->getOperand(j);
1974 unsigned Reg = MO.getReg();
1975 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1977 if (Reg != Base && !MemRegs.count(Reg))
1978 AddedRegPressure.insert(Reg);
1982 // Estimate register pressure increase due to the transformation.
1983 if (MemRegs.size() <= 4)
1984 // Ok if we are moving small number of instructions.
1986 return AddedRegPressure.size() <= MemRegs.size() * 2;
1990 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1991 DebugLoc &dl, unsigned &NewOpc,
1993 unsigned &SecondReg,
1994 unsigned &BaseReg, int &Offset,
1996 ARMCC::CondCodes &Pred,
1998 // Make sure we're allowed to generate LDRD/STRD.
1999 if (!STI->hasV5TEOps())
2002 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
2004 unsigned Opcode = Op0->getOpcode();
2005 if (Opcode == ARM::LDRi12) {
2007 } else if (Opcode == ARM::STRi12) {
2009 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
2010 NewOpc = ARM::t2LDRDi8;
2013 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2014 NewOpc = ARM::t2STRDi8;
2021 // Make sure the base address satisfies i64 ld / st alignment requirement.
2022 // At the moment, we ignore the memoryoperand's value.
2023 // If we want to use AliasAnalysis, we should check it accordingly.
2024 if (!Op0->hasOneMemOperand() ||
2025 (*Op0->memoperands_begin())->isVolatile())
2028 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
2029 const Function *Func = MF->getFunction();
2030 unsigned ReqAlign = STI->hasV6Ops()
2031 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
2032 : 8; // Pre-v6 need 8-byte align
2033 if (Align < ReqAlign)
2036 // Then make sure the immediate offset fits.
2037 int OffImm = getMemoryOpOffset(Op0);
2039 int Limit = (1 << 8) * Scale;
2040 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2044 ARM_AM::AddrOpc AddSub = ARM_AM::add;
2046 AddSub = ARM_AM::sub;
2049 int Limit = (1 << 8) * Scale;
2050 if (OffImm >= Limit || (OffImm & (Scale-1)))
2052 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2054 FirstReg = Op0->getOperand(0).getReg();
2055 SecondReg = Op1->getOperand(0).getReg();
2056 if (FirstReg == SecondReg)
2058 BaseReg = Op0->getOperand(1).getReg();
2059 Pred = getInstrPredicate(Op0, PredReg);
2060 dl = Op0->getDebugLoc();
2064 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2065 SmallVectorImpl<MachineInstr *> &Ops,
2066 unsigned Base, bool isLd,
2067 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2068 bool RetVal = false;
2070 // Sort by offset (in reverse order).
2071 std::sort(Ops.begin(), Ops.end(),
2072 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2073 int LOffset = getMemoryOpOffset(LHS);
2074 int ROffset = getMemoryOpOffset(RHS);
2075 assert(LHS == RHS || LOffset != ROffset);
2076 return LOffset > ROffset;
2079 // The loads / stores of the same base are in order. Scan them from first to
2080 // last and check for the following:
2081 // 1. Any def of base.
2083 while (Ops.size() > 1) {
2084 unsigned FirstLoc = ~0U;
2085 unsigned LastLoc = 0;
2086 MachineInstr *FirstOp = nullptr;
2087 MachineInstr *LastOp = nullptr;
2089 unsigned LastOpcode = 0;
2090 unsigned LastBytes = 0;
2091 unsigned NumMove = 0;
2092 for (int i = Ops.size() - 1; i >= 0; --i) {
2093 MachineInstr *Op = Ops[i];
2094 unsigned Loc = MI2LocMap[Op];
2095 if (Loc <= FirstLoc) {
2099 if (Loc >= LastLoc) {
2105 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2106 if (LastOpcode && LSMOpcode != LastOpcode)
2109 int Offset = getMemoryOpOffset(Op);
2110 unsigned Bytes = getLSMultipleTransferSize(Op);
2112 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2115 LastOffset = Offset;
2117 LastOpcode = LSMOpcode;
2118 if (++NumMove == 8) // FIXME: Tune this limit.
2125 SmallPtrSet<MachineInstr*, 4> MemOps;
2126 SmallSet<unsigned, 4> MemRegs;
2127 for (int i = NumMove-1; i >= 0; --i) {
2128 MemOps.insert(Ops[i]);
2129 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2132 // Be conservative, if the instructions are too far apart, don't
2133 // move them. We want to limit the increase of register pressure.
2134 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2136 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2137 MemOps, MemRegs, TRI);
2139 for (unsigned i = 0; i != NumMove; ++i)
2142 // This is the new location for the loads / stores.
2143 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2144 while (InsertPos != MBB->end()
2145 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2148 // If we are moving a pair of loads / stores, see if it makes sense
2149 // to try to allocate a pair of registers that can form register pairs.
2150 MachineInstr *Op0 = Ops.back();
2151 MachineInstr *Op1 = Ops[Ops.size()-2];
2152 unsigned FirstReg = 0, SecondReg = 0;
2153 unsigned BaseReg = 0, PredReg = 0;
2154 ARMCC::CondCodes Pred = ARMCC::AL;
2156 unsigned NewOpc = 0;
2159 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2160 FirstReg, SecondReg, BaseReg,
2161 Offset, PredReg, Pred, isT2)) {
2165 const MCInstrDesc &MCID = TII->get(NewOpc);
2166 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2167 MRI->constrainRegClass(FirstReg, TRC);
2168 MRI->constrainRegClass(SecondReg, TRC);
2170 // Form the pair instruction.
2172 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2173 .addReg(FirstReg, RegState::Define)
2174 .addReg(SecondReg, RegState::Define)
2176 // FIXME: We're converting from LDRi12 to an insn that still
2177 // uses addrmode2, so we need an explicit offset reg. It should
2178 // always by reg0 since we're transforming LDRi12s.
2181 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2182 MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
2183 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2186 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2190 // FIXME: We're converting from LDRi12 to an insn that still
2191 // uses addrmode2, so we need an explicit offset reg. It should
2192 // always by reg0 since we're transforming STRi12s.
2195 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2196 MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
2197 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2204 // Add register allocation hints to form register pairs.
2205 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2206 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2209 for (unsigned i = 0; i != NumMove; ++i) {
2210 MachineInstr *Op = Ops.back();
2212 MBB->splice(InsertPos, MBB, Op);
2216 NumLdStMoved += NumMove;
2226 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2227 bool RetVal = false;
2229 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2230 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2231 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2232 SmallVector<unsigned, 4> LdBases;
2233 SmallVector<unsigned, 4> StBases;
2236 MachineBasicBlock::iterator MBBI = MBB->begin();
2237 MachineBasicBlock::iterator E = MBB->end();
2239 for (; MBBI != E; ++MBBI) {
2240 MachineInstr *MI = MBBI;
2241 if (MI->isCall() || MI->isTerminator()) {
2242 // Stop at barriers.
2247 if (!MI->isDebugValue())
2248 MI2LocMap[MI] = ++Loc;
2250 if (!isMemoryOp(*MI))
2252 unsigned PredReg = 0;
2253 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2256 int Opc = MI->getOpcode();
2257 bool isLd = isLoadSingle(Opc);
2258 unsigned Base = MI->getOperand(1).getReg();
2259 int Offset = getMemoryOpOffset(MI);
2261 bool StopHere = false;
2263 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2264 Base2LdsMap.find(Base);
2265 if (BI != Base2LdsMap.end()) {
2266 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2267 if (Offset == getMemoryOpOffset(BI->second[i])) {
2273 BI->second.push_back(MI);
2275 Base2LdsMap[Base].push_back(MI);
2276 LdBases.push_back(Base);
2279 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2280 Base2StsMap.find(Base);
2281 if (BI != Base2StsMap.end()) {
2282 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2283 if (Offset == getMemoryOpOffset(BI->second[i])) {
2289 BI->second.push_back(MI);
2291 Base2StsMap[Base].push_back(MI);
2292 StBases.push_back(Base);
2297 // Found a duplicate (a base+offset combination that's seen earlier).
2304 // Re-schedule loads.
2305 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2306 unsigned Base = LdBases[i];
2307 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2309 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2312 // Re-schedule stores.
2313 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2314 unsigned Base = StBases[i];
2315 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2317 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2321 Base2LdsMap.clear();
2322 Base2StsMap.clear();
2332 /// Returns an instance of the load / store optimization pass.
2333 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2335 return new ARMPreAllocLoadStoreOpt();
2336 return new ARMLoadStoreOpt();