1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file This file contains a pass that performs load / store related peephole
11 /// optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterClassInfo.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/CodeGen/LivePhysRegs.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/Support/Allocator.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Target/TargetInstrInfo.h"
45 #include "llvm/Target/TargetMachine.h"
46 #include "llvm/Target/TargetRegisterInfo.h"
49 #define DEBUG_TYPE "arm-ldst-opt"
51 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
52 STATISTIC(NumSTMGened , "Number of stm instructions generated");
53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
64 /// Post- register allocation pass the combine load / store instructions to
65 /// form ldm / stm instructions.
66 struct ARMLoadStoreOpt : public MachineFunctionPass {
68 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
70 const MachineFunction *MF;
71 const TargetInstrInfo *TII;
72 const TargetRegisterInfo *TRI;
73 const MachineRegisterInfo *MRI;
74 const ARMSubtarget *STI;
75 const TargetLowering *TL;
77 LivePhysRegs LiveRegs;
78 RegisterClassInfo RegClassInfo;
79 MachineBasicBlock::const_iterator LiveRegPos;
81 bool RegClassInfoValid;
82 bool isThumb1, isThumb2;
84 bool runOnMachineFunction(MachineFunction &Fn) override;
86 const char *getPassName() const override {
87 return "ARM load / store optimization pass";
91 /// A set of load/store MachineInstrs with same base register sorted by
93 struct MemOpQueueEntry {
95 int Offset; ///< Load/Store offset.
96 unsigned Position; ///< Position as counted from end of basic block.
97 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
98 : MI(MI), Offset(Offset), Position(Position) {}
100 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
102 /// A set of MachineInstrs that fulfill (nearly all) conditions to get
103 /// merged into a LDM/STM.
104 struct MergeCandidate {
105 /// List of instructions ordered by load/store offset.
106 SmallVector<MachineInstr*, 4> Instrs;
107 /// Index in Instrs of the instruction being latest in the schedule.
108 unsigned LatestMIIdx;
109 /// Index in Instrs of the instruction being earliest in the schedule.
110 unsigned EarliestMIIdx;
111 /// Index into the basic block where the merged instruction will be
112 /// inserted. (See MemOpQueueEntry.Position)
114 /// Whether the instructions can be merged into a ldm/stm instruction.
115 bool CanMergeToLSMulti;
116 /// Whether the instructions can be merged into a ldrd/strd instruction.
117 bool CanMergeToLSDouble;
119 SpecificBumpPtrAllocator<MergeCandidate> Allocator;
120 SmallVector<const MergeCandidate*,4> Candidates;
121 SmallVector<MachineInstr*,4> MergeBaseCandidates;
123 void moveLiveRegsBefore(const MachineBasicBlock &MBB,
124 MachineBasicBlock::const_iterator Before);
125 unsigned findFreeReg(const TargetRegisterClass &RegClass);
126 void UpdateBaseRegUses(MachineBasicBlock &MBB,
127 MachineBasicBlock::iterator MBBI,
128 DebugLoc DL, unsigned Base, unsigned WordOffset,
129 ARMCC::CondCodes Pred, unsigned PredReg);
130 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
131 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
132 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
133 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
134 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
135 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
136 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
137 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
138 void FormCandidates(const MemOpQueue &MemOps);
139 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
140 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
141 MachineBasicBlock::iterator &MBBI);
142 bool MergeBaseUpdateLoadStore(MachineInstr *MI);
143 bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
144 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
145 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
146 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
148 char ARMLoadStoreOpt::ID = 0;
151 static bool definesCPSR(const MachineInstr *MI) {
152 for (const auto &MO : MI->operands()) {
155 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
156 // If the instruction has live CPSR def, then it's not safe to fold it
157 // into load / store.
164 static int getMemoryOpOffset(const MachineInstr *MI) {
165 unsigned Opcode = MI->getOpcode();
166 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
167 unsigned NumOperands = MI->getDesc().getNumOperands();
168 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
170 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
171 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
172 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
173 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
176 // Thumb1 immediate offsets are scaled by 4
177 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
178 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
181 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
182 : ARM_AM::getAM5Offset(OffField) * 4;
183 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
184 : ARM_AM::getAM5Op(OffField);
186 if (Op == ARM_AM::sub)
192 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
193 return MI.getOperand(1);
196 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
197 return MI.getOperand(0);
200 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
202 default: llvm_unreachable("Unhandled opcode!");
206 default: llvm_unreachable("Unhandled submode!");
207 case ARM_AM::ia: return ARM::LDMIA;
208 case ARM_AM::da: return ARM::LDMDA;
209 case ARM_AM::db: return ARM::LDMDB;
210 case ARM_AM::ib: return ARM::LDMIB;
215 default: llvm_unreachable("Unhandled submode!");
216 case ARM_AM::ia: return ARM::STMIA;
217 case ARM_AM::da: return ARM::STMDA;
218 case ARM_AM::db: return ARM::STMDB;
219 case ARM_AM::ib: return ARM::STMIB;
223 // tLDMIA is writeback-only - unless the base register is in the input
227 default: llvm_unreachable("Unhandled submode!");
228 case ARM_AM::ia: return ARM::tLDMIA;
232 // There is no non-writeback tSTMIA either.
235 default: llvm_unreachable("Unhandled submode!");
236 case ARM_AM::ia: return ARM::tSTMIA_UPD;
242 default: llvm_unreachable("Unhandled submode!");
243 case ARM_AM::ia: return ARM::t2LDMIA;
244 case ARM_AM::db: return ARM::t2LDMDB;
250 default: llvm_unreachable("Unhandled submode!");
251 case ARM_AM::ia: return ARM::t2STMIA;
252 case ARM_AM::db: return ARM::t2STMDB;
257 default: llvm_unreachable("Unhandled submode!");
258 case ARM_AM::ia: return ARM::VLDMSIA;
259 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
264 default: llvm_unreachable("Unhandled submode!");
265 case ARM_AM::ia: return ARM::VSTMSIA;
266 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
271 default: llvm_unreachable("Unhandled submode!");
272 case ARM_AM::ia: return ARM::VLDMDIA;
273 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
278 default: llvm_unreachable("Unhandled submode!");
279 case ARM_AM::ia: return ARM::VSTMDIA;
280 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
285 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
287 default: llvm_unreachable("Unhandled opcode!");
294 case ARM::tLDMIA_UPD:
295 case ARM::tSTMIA_UPD:
296 case ARM::t2LDMIA_RET:
298 case ARM::t2LDMIA_UPD:
300 case ARM::t2STMIA_UPD:
302 case ARM::VLDMSIA_UPD:
304 case ARM::VSTMSIA_UPD:
306 case ARM::VLDMDIA_UPD:
308 case ARM::VSTMDIA_UPD:
322 case ARM::t2LDMDB_UPD:
324 case ARM::t2STMDB_UPD:
325 case ARM::VLDMSDB_UPD:
326 case ARM::VSTMSDB_UPD:
327 case ARM::VLDMDDB_UPD:
328 case ARM::VSTMDDB_UPD:
339 static bool isT1i32Load(unsigned Opc) {
340 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
343 static bool isT2i32Load(unsigned Opc) {
344 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
347 static bool isi32Load(unsigned Opc) {
348 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
351 static bool isT1i32Store(unsigned Opc) {
352 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
355 static bool isT2i32Store(unsigned Opc) {
356 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
359 static bool isi32Store(unsigned Opc) {
360 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
363 static bool isLoadSingle(unsigned Opc) {
364 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
367 static unsigned getImmScale(unsigned Opc) {
369 default: llvm_unreachable("Unhandled opcode!");
384 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
385 switch (MI->getOpcode()) {
412 case ARM::tLDMIA_UPD:
413 case ARM::tSTMIA_UPD:
420 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
423 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
427 /// Update future uses of the base register with the offset introduced
428 /// due to writeback. This function only works on Thumb1.
430 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
431 MachineBasicBlock::iterator MBBI,
432 DebugLoc DL, unsigned Base,
434 ARMCC::CondCodes Pred, unsigned PredReg) {
435 assert(isThumb1 && "Can only update base register uses for Thumb1!");
436 // Start updating any instructions with immediate offsets. Insert a SUB before
437 // the first non-updateable instruction (if any).
438 for (; MBBI != MBB.end(); ++MBBI) {
439 bool InsertSub = false;
440 unsigned Opc = MBBI->getOpcode();
442 if (MBBI->readsRegister(Base)) {
445 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
447 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
449 if (IsLoad || IsStore) {
450 // Loads and stores with immediate offsets can be updated, but only if
451 // the new offset isn't negative.
452 // The MachineOperand containing the offset immediate is the last one
453 // before predicates.
455 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
456 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
457 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
459 // If storing the base register, it needs to be reset first.
460 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
462 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
467 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
468 !definesCPSR(MBBI)) {
469 // SUBS/ADDS using this register, with a dead def of the CPSR.
470 // Merge it with the update; if the merged offset is too large,
471 // insert a new sub instead.
473 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
474 Offset = (Opc == ARM::tSUBi8) ?
475 MO.getImm() + WordOffset * 4 :
476 MO.getImm() - WordOffset * 4 ;
477 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
478 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
481 // The base register has now been reset, so exit early.
488 // Can't update the instruction.
492 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
493 // Since SUBS sets the condition flags, we can't place the base reset
494 // after an instruction that has a live CPSR def.
495 // The base register might also contain an argument for a function call.
500 // An instruction above couldn't be updated, so insert a sub.
501 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
502 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
506 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
507 // Register got killed. Stop updating.
511 // End of block was reached.
512 if (MBB.succ_size() > 0) {
513 // FIXME: Because of a bug, live registers are sometimes missing from
514 // the successor blocks' live-in sets. This means we can't trust that
515 // information and *always* have to reset at the end of a block.
517 if (MBBI != MBB.end()) --MBBI;
519 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
520 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
524 /// Return the first register of class \p RegClass that is not in \p Regs.
525 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
526 if (!RegClassInfoValid) {
527 RegClassInfo.runOnMachineFunction(*MF);
528 RegClassInfoValid = true;
531 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
532 if (!LiveRegs.contains(Reg))
537 /// Compute live registers just before instruction \p Before (in normal schedule
538 /// direction). Computes backwards so multiple queries in the same block must
539 /// come in reverse order.
540 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
541 MachineBasicBlock::const_iterator Before) {
542 // Initialize if we never queried in this block.
543 if (!LiveRegsValid) {
545 LiveRegs.addLiveOuts(&MBB, true);
546 LiveRegPos = MBB.end();
547 LiveRegsValid = true;
549 // Move backward just before the "Before" position.
550 while (LiveRegPos != Before) {
552 LiveRegs.stepBackward(*LiveRegPos);
556 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
558 for (const std::pair<unsigned, bool> &R : Regs)
564 /// Create and insert a LDM or STM with Base as base register and registers in
565 /// Regs as the register operands that would be loaded / stored. It returns
566 /// true if the transformation is done.
567 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
568 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
569 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
570 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
571 unsigned NumRegs = Regs.size();
574 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
575 // Compute liveness information for that register to make the decision.
576 bool SafeToClobberCPSR = !isThumb1 ||
577 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
578 MachineBasicBlock::LQR_Dead);
580 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
582 // Exception: If the base register is in the input reglist, Thumb1 LDM is
584 // It's also not possible to merge an STR of the base register in Thumb1.
585 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
586 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
587 if (Opcode == ARM::tLDRi) {
589 } else if (Opcode == ARM::tSTRi) {
594 ARM_AM::AMSubMode Mode = ARM_AM::ia;
595 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
596 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
597 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
599 if (Offset == 4 && haveIBAndDA) {
601 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
603 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
604 // VLDM/VSTM do not support DB mode without also updating the base reg.
606 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
607 // Check if this is a supported opcode before inserting instructions to
608 // calculate a new base register.
609 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
611 // If starting offset isn't zero, insert a MI to materialize a new base.
612 // But only do so if it is cost effective, i.e. merging more than two
617 // On Thumb1, it's not worth materializing a new base register without
618 // clobbering the CPSR (i.e. not using ADDS/SUBS).
619 if (!SafeToClobberCPSR)
623 if (isi32Load(Opcode)) {
624 // If it is a load, then just use one of the destination register to
625 // use as the new base.
626 NewBase = Regs[NumRegs-1].first;
628 // Find a free register that we can use as scratch register.
629 moveLiveRegsBefore(MBB, InsertBefore);
630 // The merged instruction does not exist yet but will use several Regs if
632 if (!isLoadSingle(Opcode))
633 for (const std::pair<unsigned, bool> &R : Regs)
634 LiveRegs.addReg(R.first);
636 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
642 isThumb2 ? ARM::t2ADDri :
643 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
644 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
645 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
650 isThumb2 ? ARM::t2SUBri :
651 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
652 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
655 if (!TL->isLegalAddImmediate(Offset))
656 // FIXME: Try add with register operand?
657 return nullptr; // Probably not worth it then.
659 // We can only append a kill flag to the add/sub input if the value is not
660 // used in the register list of the stm as well.
661 bool KillOldBase = BaseKill &&
662 (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
665 // Thumb1: depending on immediate size, use either
666 // ADDS NewBase, Base, #imm3
669 // ADDS NewBase, #imm8.
670 if (Base != NewBase &&
671 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
672 // Need to insert a MOV to the new base first.
673 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
675 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
676 if (Pred != ARMCC::AL)
678 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
679 .addReg(Base, getKillRegState(KillOldBase));
681 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
682 .addReg(Base, getKillRegState(KillOldBase))
683 .addImm(Pred).addReg(PredReg);
685 // The following ADDS/SUBS becomes an update.
689 if (BaseOpc == ARM::tADDrSPi) {
690 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
691 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
692 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
693 .addImm(Pred).addReg(PredReg);
696 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
697 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
698 .addImm(Pred).addReg(PredReg);
700 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
701 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
702 .addImm(Pred).addReg(PredReg).addReg(0);
705 BaseKill = true; // New base is always killed straight away.
708 bool isDef = isLoadSingle(Opcode);
710 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
711 // base register writeback.
712 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
716 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
717 // - There is no writeback (LDM of base register),
718 // - the base register is killed by the merged instruction,
719 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
720 // to reset the base register.
721 // Otherwise, don't merge.
722 // It's safe to return here since the code to materialize a new base register
723 // above is also conditional on SafeToClobberCPSR.
724 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
727 MachineInstrBuilder MIB;
730 if (Opcode == ARM::tLDMIA)
731 // Update tLDMIA with writeback if necessary.
732 Opcode = ARM::tLDMIA_UPD;
734 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
736 // Thumb1: we might need to set base writeback when building the MI.
737 MIB.addReg(Base, getDefRegState(true))
738 .addReg(Base, getKillRegState(BaseKill));
740 // The base isn't dead after a merged instruction with writeback.
741 // Insert a sub instruction after the newly formed instruction to reset.
743 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
746 // No writeback, simply build the MachineInstr.
747 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
748 MIB.addReg(Base, getKillRegState(BaseKill));
751 MIB.addImm(Pred).addReg(PredReg);
753 for (const std::pair<unsigned, bool> &R : Regs)
754 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
756 return MIB.getInstr();
759 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
760 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
761 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
762 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
763 bool IsLoad = isi32Load(Opcode);
764 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
765 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
767 assert(Regs.size() == 2);
768 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
769 TII->get(LoadStoreOpcode));
771 MIB.addReg(Regs[0].first, RegState::Define)
772 .addReg(Regs[1].first, RegState::Define);
774 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
775 .addReg(Regs[1].first, getKillRegState(Regs[1].second));
777 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
778 return MIB.getInstr();
781 /// Call MergeOps and update MemOps and merges accordingly on success.
782 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
783 const MachineInstr *First = Cand.Instrs.front();
784 unsigned Opcode = First->getOpcode();
785 bool IsLoad = isLoadSingle(Opcode);
786 SmallVector<std::pair<unsigned, bool>, 8> Regs;
787 SmallVector<unsigned, 4> ImpDefs;
788 DenseSet<unsigned> KilledRegs;
789 // Determine list of registers and list of implicit super-register defs.
790 for (const MachineInstr *MI : Cand.Instrs) {
791 const MachineOperand &MO = getLoadStoreRegOp(*MI);
792 unsigned Reg = MO.getReg();
793 bool IsKill = MO.isKill();
795 KilledRegs.insert(Reg);
796 Regs.push_back(std::make_pair(Reg, IsKill));
799 // Collect any implicit defs of super-registers, after merging we can't
800 // be sure anymore that we properly preserved these live ranges and must
801 // removed these implicit operands.
802 for (const MachineOperand &MO : MI->implicit_operands()) {
803 if (!MO.isReg() || !MO.isDef() || MO.isDead())
805 assert(MO.isImplicit());
806 unsigned DefReg = MO.getReg();
808 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
810 // We can ignore cases where the super-reg is read and written.
811 if (MI->readsRegister(DefReg))
813 ImpDefs.push_back(DefReg);
818 // Attempt the merge.
819 typedef MachineBasicBlock::iterator iterator;
820 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
821 iterator InsertBefore = std::next(iterator(LatestMI));
822 MachineBasicBlock &MBB = *LatestMI->getParent();
823 unsigned Offset = getMemoryOpOffset(First);
824 unsigned Base = getLoadStoreBaseOp(*First).getReg();
825 bool BaseKill = LatestMI->killsRegister(Base);
826 unsigned PredReg = 0;
827 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
828 DebugLoc DL = First->getDebugLoc();
829 MachineInstr *Merged = nullptr;
830 if (Cand.CanMergeToLSDouble)
831 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
832 Opcode, Pred, PredReg, DL, Regs);
833 if (!Merged && Cand.CanMergeToLSMulti)
834 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
835 Opcode, Pred, PredReg, DL, Regs);
839 // Determine earliest instruction that will get removed. We then keep an
840 // iterator just above it so the following erases don't invalidated it.
841 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
842 bool EarliestAtBegin = false;
843 if (EarliestI == MBB.begin()) {
844 EarliestAtBegin = true;
846 EarliestI = std::prev(EarliestI);
849 // Remove instructions which have been merged.
850 for (MachineInstr *MI : Cand.Instrs)
853 // Determine range between the earliest removed instruction and the new one.
855 EarliestI = MBB.begin();
857 EarliestI = std::next(EarliestI);
858 auto FixupRange = make_range(EarliestI, iterator(Merged));
860 if (isLoadSingle(Opcode)) {
861 // If the previous loads defined a super-reg, then we have to mark earlier
862 // operands undef; Replicate the super-reg def on the merged instruction.
863 for (MachineInstr &MI : FixupRange) {
864 for (unsigned &ImpDefReg : ImpDefs) {
865 for (MachineOperand &MO : MI.implicit_operands()) {
866 if (!MO.isReg() || MO.getReg() != ImpDefReg)
876 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
877 for (unsigned ImpDef : ImpDefs)
878 MIB.addReg(ImpDef, RegState::ImplicitDefine);
880 // Remove kill flags: We are possibly storing the values later now.
881 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
882 for (MachineInstr &MI : FixupRange) {
883 for (MachineOperand &MO : MI.uses()) {
884 if (!MO.isReg() || !MO.isKill())
886 if (KilledRegs.count(MO.getReg()))
890 assert(ImpDefs.empty());
896 static bool isValidLSDoubleOffset(int Offset) {
897 unsigned Value = abs(Offset);
898 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
900 return (Value % 4) == 0 && Value < 1024;
903 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
904 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
905 const MachineInstr *FirstMI = MemOps[0].MI;
906 unsigned Opcode = FirstMI->getOpcode();
907 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
908 unsigned Size = getLSMultipleTransferSize(FirstMI);
909 // vldm / vstm limit are 32 for S variants, 16 for D variants.
930 unsigned EIndex = MemOps.size();
932 // Look at the first instruction.
933 const MachineInstr *MI = MemOps[SIndex].MI;
934 int Offset = MemOps[SIndex].Offset;
935 const MachineOperand &PMO = getLoadStoreRegOp(*MI);
936 unsigned PReg = PMO.getReg();
937 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
938 unsigned Latest = SIndex;
939 unsigned Earliest = SIndex;
941 bool CanMergeToLSDouble =
942 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
943 // ARM errata 602117: LDRD with base in list may result in incorrect base
944 // register when interrupted or faulted.
945 if (STI->isCortexM3() && isi32Load(Opcode) &&
946 PReg == getLoadStoreBaseOp(*MI).getReg())
947 CanMergeToLSDouble = false;
949 bool CanMergeToLSMulti = true;
950 // On swift vldm/vstm starting with an odd register number as that needs
951 // more uops than single vldrs.
952 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
953 CanMergeToLSMulti = false;
955 // Merge following instructions where possible.
956 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
957 int NewOffset = MemOps[I].Offset;
958 if (NewOffset != Offset + (int)Size)
960 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
961 unsigned Reg = MO.getReg();
962 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
964 // See if the current load/store may be part of a multi load/store.
965 bool PartOfLSMulti = CanMergeToLSMulti;
967 // Cannot load from SP
969 PartOfLSMulti = false;
970 // Register numbers must be in ascending order.
971 else if (RegNum <= PRegNum)
972 PartOfLSMulti = false;
973 // For VFP / NEON load/store multiples, the registers must be
974 // consecutive and within the limit on the number of registers per
976 else if (!isNotVFP && RegNum != PRegNum+1)
977 PartOfLSMulti = false;
979 // See if the current load/store may be part of a double load/store.
980 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
982 if (!PartOfLSMulti && !PartOfLSDouble)
984 CanMergeToLSMulti &= PartOfLSMulti;
985 CanMergeToLSDouble &= PartOfLSDouble;
986 // Track MemOp with latest and earliest position (Positions are
987 // counted in reverse).
988 unsigned Position = MemOps[I].Position;
989 if (Position < MemOps[Latest].Position)
991 else if (Position > MemOps[Earliest].Position)
993 // Prepare for next MemOp.
998 // Form a candidate from the Ops collected so far.
999 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
1000 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
1001 Candidate->Instrs.push_back(MemOps[C].MI);
1002 Candidate->LatestMIIdx = Latest - SIndex;
1003 Candidate->EarliestMIIdx = Earliest - SIndex;
1004 Candidate->InsertPos = MemOps[Latest].Position;
1006 CanMergeToLSMulti = CanMergeToLSDouble = false;
1007 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1008 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1009 Candidates.push_back(Candidate);
1010 // Continue after the chain.
1012 } while (SIndex < EIndex);
1015 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1016 ARM_AM::AMSubMode Mode) {
1018 default: llvm_unreachable("Unhandled opcode!");
1024 default: llvm_unreachable("Unhandled submode!");
1025 case ARM_AM::ia: return ARM::LDMIA_UPD;
1026 case ARM_AM::ib: return ARM::LDMIB_UPD;
1027 case ARM_AM::da: return ARM::LDMDA_UPD;
1028 case ARM_AM::db: return ARM::LDMDB_UPD;
1035 default: llvm_unreachable("Unhandled submode!");
1036 case ARM_AM::ia: return ARM::STMIA_UPD;
1037 case ARM_AM::ib: return ARM::STMIB_UPD;
1038 case ARM_AM::da: return ARM::STMDA_UPD;
1039 case ARM_AM::db: return ARM::STMDB_UPD;
1044 default: llvm_unreachable("Unhandled submode!");
1045 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1046 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1051 default: llvm_unreachable("Unhandled submode!");
1052 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1053 case ARM_AM::db: return ARM::t2STMDB_UPD;
1057 default: llvm_unreachable("Unhandled submode!");
1058 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1059 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1063 default: llvm_unreachable("Unhandled submode!");
1064 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1065 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1069 default: llvm_unreachable("Unhandled submode!");
1070 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1071 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1075 default: llvm_unreachable("Unhandled submode!");
1076 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1077 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1082 /// Check if the given instruction increments or decrements a register and
1083 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
1084 /// generated by the instruction are possibly read as well.
1085 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
1086 ARMCC::CondCodes Pred, unsigned PredReg) {
1089 switch (MI.getOpcode()) {
1090 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
1091 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
1093 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
1095 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
1096 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
1097 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
1102 if (MI.getOperand(0).getReg() != Reg ||
1103 MI.getOperand(1).getReg() != Reg ||
1104 getInstrPredicate(&MI, MIPredReg) != Pred ||
1105 MIPredReg != PredReg)
1108 if (CheckCPSRDef && definesCPSR(&MI))
1110 return MI.getOperand(2).getImm() * Scale;
1113 /// Searches for an increment or decrement of \p Reg before \p MBBI.
1114 static MachineBasicBlock::iterator
1115 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
1116 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1118 MachineBasicBlock &MBB = *MBBI->getParent();
1119 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1120 MachineBasicBlock::iterator EndMBBI = MBB.end();
1121 if (MBBI == BeginMBBI)
1124 // Skip debug values.
1125 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1126 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
1129 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
1130 return Offset == 0 ? EndMBBI : PrevMBBI;
1133 /// Searches for a increment or decrement of \p Reg after \p MBBI.
1134 static MachineBasicBlock::iterator
1135 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
1136 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1138 MachineBasicBlock &MBB = *MBBI->getParent();
1139 MachineBasicBlock::iterator EndMBBI = MBB.end();
1140 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1141 // Skip debug values.
1142 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1144 if (NextMBBI == EndMBBI)
1147 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
1148 return Offset == 0 ? EndMBBI : NextMBBI;
1151 /// Fold proceeding/trailing inc/dec of base register into the
1152 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1154 /// stmia rn, <ra, rb, rc>
1155 /// rn := rn + 4 * 3;
1157 /// stmia rn!, <ra, rb, rc>
1159 /// rn := rn - 4 * 3;
1160 /// ldmia rn, <ra, rb, rc>
1162 /// ldmdb rn!, <ra, rb, rc>
1163 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1164 // Thumb1 is already using updating loads/stores.
1165 if (isThumb1) return false;
1167 const MachineOperand &BaseOP = MI->getOperand(0);
1168 unsigned Base = BaseOP.getReg();
1169 bool BaseKill = BaseOP.isKill();
1170 unsigned PredReg = 0;
1171 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1172 unsigned Opcode = MI->getOpcode();
1173 DebugLoc DL = MI->getDebugLoc();
1175 // Can't use an updating ld/st if the base register is also a dest
1176 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1177 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1178 if (MI->getOperand(i).getReg() == Base)
1181 int Bytes = getLSMultipleTransferSize(MI);
1182 MachineBasicBlock &MBB = *MI->getParent();
1183 MachineBasicBlock::iterator MBBI(MI);
1185 MachineBasicBlock::iterator MergeInstr
1186 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1187 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
1188 if (Mode == ARM_AM::ia && Offset == -Bytes) {
1190 } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
1193 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1194 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
1195 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
1198 MBB.erase(MergeInstr);
1200 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1201 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1202 .addReg(Base, getDefRegState(true)) // WB base register
1203 .addReg(Base, getKillRegState(BaseKill))
1204 .addImm(Pred).addReg(PredReg);
1206 // Transfer the rest of operands.
1207 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1208 MIB.addOperand(MI->getOperand(OpNum));
1210 // Transfer memoperands.
1211 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1217 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1218 ARM_AM::AddrOpc Mode) {
1221 return ARM::LDR_PRE_IMM;
1223 return ARM::STR_PRE_IMM;
1225 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1227 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1229 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1231 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1234 return ARM::t2LDR_PRE;
1237 return ARM::t2STR_PRE;
1238 default: llvm_unreachable("Unhandled opcode!");
1242 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1243 ARM_AM::AddrOpc Mode) {
1246 return ARM::LDR_POST_IMM;
1248 return ARM::STR_POST_IMM;
1250 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1252 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1254 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1256 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1259 return ARM::t2LDR_POST;
1262 return ARM::t2STR_POST;
1263 default: llvm_unreachable("Unhandled opcode!");
1267 /// Fold proceeding/trailing inc/dec of base register into the
1268 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1269 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1270 // Thumb1 doesn't have updating LDR/STR.
1271 // FIXME: Use LDM/STM with single register instead.
1272 if (isThumb1) return false;
1274 unsigned Base = getLoadStoreBaseOp(*MI).getReg();
1275 bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1276 unsigned Opcode = MI->getOpcode();
1277 DebugLoc DL = MI->getDebugLoc();
1278 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1279 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1280 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1281 if (isi32Load(Opcode) || isi32Store(Opcode))
1282 if (MI->getOperand(2).getImm() != 0)
1284 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1287 // Can't do the merge if the destination register is the same as the would-be
1288 // writeback register.
1289 if (MI->getOperand(0).getReg() == Base)
1292 unsigned PredReg = 0;
1293 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1294 int Bytes = getLSMultipleTransferSize(MI);
1295 MachineBasicBlock &MBB = *MI->getParent();
1296 MachineBasicBlock::iterator MBBI(MI);
1298 MachineBasicBlock::iterator MergeInstr
1299 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1301 if (!isAM5 && Offset == Bytes) {
1302 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1303 } else if (Offset == -Bytes) {
1304 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1306 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1307 if (Offset == Bytes) {
1308 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1309 } else if (!isAM5 && Offset == -Bytes) {
1310 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1314 MBB.erase(MergeInstr);
1316 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
1318 bool isLd = isLoadSingle(Opcode);
1320 // VLDM[SD]_UPD, VSTM[SD]_UPD
1321 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1322 // updating load/store-multiple instructions can be used with only one
1324 MachineOperand &MO = MI->getOperand(0);
1325 BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1326 .addReg(Base, getDefRegState(true)) // WB base register
1327 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1328 .addImm(Pred).addReg(PredReg)
1329 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1330 getKillRegState(MO.isKill())));
1333 // LDR_PRE, LDR_POST
1334 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1335 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1336 .addReg(Base, RegState::Define)
1337 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1339 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1340 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1341 .addReg(Base, RegState::Define)
1342 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
1345 // t2LDR_PRE, t2LDR_POST
1346 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1347 .addReg(Base, RegState::Define)
1348 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1351 MachineOperand &MO = MI->getOperand(0);
1352 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1353 // the vestigal zero-reg offset register. When that's fixed, this clause
1354 // can be removed entirely.
1355 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1356 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1357 // STR_PRE, STR_POST
1358 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1359 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1360 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
1362 // t2STR_PRE, t2STR_POST
1363 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1364 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1365 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1373 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
1374 unsigned Opcode = MI.getOpcode();
1375 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1376 "Must have t2STRDi8 or t2LDRDi8");
1377 if (MI.getOperand(3).getImm() != 0)
1380 // Behaviour for writeback is undefined if base register is the same as one
1382 const MachineOperand &BaseOp = MI.getOperand(2);
1383 unsigned Base = BaseOp.getReg();
1384 const MachineOperand &Reg0Op = MI.getOperand(0);
1385 const MachineOperand &Reg1Op = MI.getOperand(1);
1386 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
1390 ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
1391 MachineBasicBlock::iterator MBBI(MI);
1392 MachineBasicBlock &MBB = *MI.getParent();
1394 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
1397 if (Offset == 8 || Offset == -8) {
1398 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1400 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1401 if (Offset == 8 || Offset == -8) {
1402 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1406 MBB.erase(MergeInstr);
1408 DebugLoc DL = MI.getDebugLoc();
1409 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
1410 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1411 MIB.addOperand(Reg0Op).addOperand(Reg1Op)
1412 .addReg(BaseOp.getReg(), RegState::Define);
1414 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1415 MIB.addReg(BaseOp.getReg(), RegState::Define)
1416 .addOperand(Reg0Op).addOperand(Reg1Op);
1418 MIB.addReg(BaseOp.getReg(), RegState::Kill)
1419 .addImm(Offset).addImm(Pred).addReg(PredReg);
1420 assert(TII->get(Opcode).getNumOperands() == 6 &&
1421 TII->get(NewOpc).getNumOperands() == 7 &&
1422 "Unexpected number of operands in Opcode specification.");
1424 // Transfer implicit operands.
1425 for (const MachineOperand &MO : MI.implicit_operands())
1427 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
1433 /// Returns true if instruction is a memory operation that this pass is capable
1434 /// of operating on.
1435 static bool isMemoryOp(const MachineInstr *MI) {
1436 // When no memory operands are present, conservatively assume unaligned,
1437 // volatile, unfoldable.
1438 if (!MI->hasOneMemOperand())
1441 const MachineMemOperand *MMO = *MI->memoperands_begin();
1443 // Don't touch volatile memory accesses - we may be changing their order.
1444 if (MMO->isVolatile())
1447 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1449 if (MMO->getAlignment() < 4)
1452 // str <undef> could probably be eliminated entirely, but for now we just want
1453 // to avoid making a mess of it.
1454 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1455 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1456 MI->getOperand(0).isUndef())
1459 // Likewise don't mess with references to undefined addresses.
1460 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1461 MI->getOperand(1).isUndef())
1464 unsigned Opcode = MI->getOpcode();
1469 return MI->getOperand(1).isReg();
1472 return MI->getOperand(1).isReg();
1483 return MI->getOperand(1).isReg();
1488 static void InsertLDR_STR(MachineBasicBlock &MBB,
1489 MachineBasicBlock::iterator &MBBI,
1490 int Offset, bool isDef,
1491 DebugLoc DL, unsigned NewOpc,
1492 unsigned Reg, bool RegDeadKill, bool RegUndef,
1493 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1494 bool OffKill, bool OffUndef,
1495 ARMCC::CondCodes Pred, unsigned PredReg,
1496 const TargetInstrInfo *TII, bool isT2) {
1498 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1500 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1501 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1502 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1504 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1506 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1507 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1508 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1512 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1513 MachineBasicBlock::iterator &MBBI) {
1514 MachineInstr *MI = &*MBBI;
1515 unsigned Opcode = MI->getOpcode();
1516 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1519 const MachineOperand &BaseOp = MI->getOperand(2);
1520 unsigned BaseReg = BaseOp.getReg();
1521 unsigned EvenReg = MI->getOperand(0).getReg();
1522 unsigned OddReg = MI->getOperand(1).getReg();
1523 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1524 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1526 // ARM errata 602117: LDRD with base in list may result in incorrect base
1527 // register when interrupted or faulted.
1528 bool Errata602117 = EvenReg == BaseReg &&
1529 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1530 // ARM LDRD/STRD needs consecutive registers.
1531 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1532 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1534 if (!Errata602117 && !NonConsecutiveRegs)
1537 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1538 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1539 bool EvenDeadKill = isLd ?
1540 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1541 bool EvenUndef = MI->getOperand(0).isUndef();
1542 bool OddDeadKill = isLd ?
1543 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1544 bool OddUndef = MI->getOperand(1).isUndef();
1545 bool BaseKill = BaseOp.isKill();
1546 bool BaseUndef = BaseOp.isUndef();
1547 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1548 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1549 int OffImm = getMemoryOpOffset(MI);
1550 unsigned PredReg = 0;
1551 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1553 if (OddRegNum > EvenRegNum && OffImm == 0) {
1554 // Ascending register numbers and no offset. It's safe to change it to a
1556 unsigned NewOpc = (isLd)
1557 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1558 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1560 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1561 .addReg(BaseReg, getKillRegState(BaseKill))
1562 .addImm(Pred).addReg(PredReg)
1563 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1564 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1567 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1568 .addReg(BaseReg, getKillRegState(BaseKill))
1569 .addImm(Pred).addReg(PredReg)
1571 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1573 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1577 // Split into two instructions.
1578 unsigned NewOpc = (isLd)
1579 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1580 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1581 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1582 // so adjust and use t2LDRi12 here for that.
1583 unsigned NewOpc2 = (isLd)
1584 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1585 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1586 DebugLoc dl = MBBI->getDebugLoc();
1587 // If this is a load and base register is killed, it may have been
1588 // re-defed by the load, make sure the first load does not clobber it.
1590 (BaseKill || OffKill) &&
1591 (TRI->regsOverlap(EvenReg, BaseReg))) {
1592 assert(!TRI->regsOverlap(OddReg, BaseReg));
1593 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1594 OddReg, OddDeadKill, false,
1595 BaseReg, false, BaseUndef, false, OffUndef,
1596 Pred, PredReg, TII, isT2);
1597 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1598 EvenReg, EvenDeadKill, false,
1599 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1600 Pred, PredReg, TII, isT2);
1602 if (OddReg == EvenReg && EvenDeadKill) {
1603 // If the two source operands are the same, the kill marker is
1604 // probably on the first one. e.g.
1605 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1606 EvenDeadKill = false;
1609 // Never kill the base register in the first instruction.
1610 if (EvenReg == BaseReg)
1611 EvenDeadKill = false;
1612 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1613 EvenReg, EvenDeadKill, EvenUndef,
1614 BaseReg, false, BaseUndef, false, OffUndef,
1615 Pred, PredReg, TII, isT2);
1616 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1617 OddReg, OddDeadKill, OddUndef,
1618 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1619 Pred, PredReg, TII, isT2);
1627 MBBI = MBB.erase(MBBI);
1631 /// An optimization pass to turn multiple LDR / STR ops of the same base and
1632 /// incrementing offset into LDM / STM ops.
1633 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1635 unsigned CurrBase = 0;
1636 unsigned CurrOpc = ~0u;
1637 unsigned CurrSize = 0;
1638 ARMCC::CondCodes CurrPred = ARMCC::AL;
1639 unsigned CurrPredReg = 0;
1640 unsigned Position = 0;
1641 assert(Candidates.size() == 0);
1642 assert(MergeBaseCandidates.size() == 0);
1643 LiveRegsValid = false;
1645 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
1647 // The instruction in front of the iterator is the one we look at.
1648 MBBI = std::prev(I);
1649 if (FixInvalidRegPairOp(MBB, MBBI))
1653 if (isMemoryOp(MBBI)) {
1654 unsigned Opcode = MBBI->getOpcode();
1655 unsigned Size = getLSMultipleTransferSize(MBBI);
1656 const MachineOperand &MO = MBBI->getOperand(0);
1657 unsigned Reg = MO.getReg();
1658 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
1659 unsigned PredReg = 0;
1660 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1661 int Offset = getMemoryOpOffset(MBBI);
1662 if (CurrBase == 0) {
1663 // Start of a new chain.
1668 CurrPredReg = PredReg;
1669 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1672 // Note: No need to match PredReg in the next if.
1673 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1675 // r4 := ldr [r0, #8]
1676 // r4 := ldr [r0, #4]
1679 // If a load overrides the base register or a register loaded by
1680 // another load in our chain, we cannot take this instruction.
1681 bool Overlap = false;
1682 if (isLoadSingle(Opcode)) {
1683 Overlap = (Base == Reg);
1685 for (const MemOpQueueEntry &E : MemOps) {
1686 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1695 // Check offset and sort memory operation into the current chain.
1696 if (Offset > MemOps.back().Offset) {
1697 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1700 MemOpQueue::iterator MI, ME;
1701 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1702 if (Offset < MI->Offset) {
1703 // Found a place to insert.
1706 if (Offset == MI->Offset) {
1707 // Collision, abort.
1712 if (MI != MemOps.end()) {
1713 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
1720 // Don't advance the iterator; The op will start a new chain next.
1723 // Fallthrough to look into existing chain.
1724 } else if (MBBI->isDebugValue()) {
1726 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
1727 MBBI->getOpcode() == ARM::t2STRDi8) {
1728 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
1729 // remember them because we may still be able to merge add/sub into them.
1730 MergeBaseCandidates.push_back(MBBI);
1734 // If we are here then the chain is broken; Extract candidates for a merge.
1735 if (MemOps.size() > 0) {
1736 FormCandidates(MemOps);
1737 // Reset for the next chain.
1741 CurrPred = ARMCC::AL;
1746 if (MemOps.size() > 0)
1747 FormCandidates(MemOps);
1749 // Sort candidates so they get processed from end to begin of the basic
1750 // block later; This is necessary for liveness calculation.
1751 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1752 return M0->InsertPos < M1->InsertPos;
1754 std::sort(Candidates.begin(), Candidates.end(), LessThan);
1756 // Go through list of candidates and merge.
1757 bool Changed = false;
1758 for (const MergeCandidate *Candidate : Candidates) {
1759 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1760 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1761 // Merge preceding/trailing base inc/dec into the merged op.
1764 unsigned Opcode = Merged->getOpcode();
1765 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
1766 MergeBaseUpdateLSDouble(*Merged);
1768 MergeBaseUpdateLSMultiple(Merged);
1770 for (MachineInstr *MI : Candidate->Instrs) {
1771 if (MergeBaseUpdateLoadStore(MI))
1776 assert(Candidate->Instrs.size() == 1);
1777 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1782 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
1783 for (MachineInstr *MI : MergeBaseCandidates)
1784 MergeBaseUpdateLSDouble(*MI);
1785 MergeBaseCandidates.clear();
1790 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
1791 /// into the preceding stack restore so it directly restore the value of LR
1793 /// ldmfd sp!, {..., lr}
1796 /// ldmfd sp!, {..., lr}
1799 /// ldmfd sp!, {..., pc}
1800 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1801 // Thumb1 LDM doesn't allow high registers.
1802 if (isThumb1) return false;
1803 if (MBB.empty()) return false;
1805 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1806 if (MBBI != MBB.begin() &&
1807 (MBBI->getOpcode() == ARM::BX_RET ||
1808 MBBI->getOpcode() == ARM::tBX_RET ||
1809 MBBI->getOpcode() == ARM::MOVPCLR)) {
1810 MachineInstr *PrevMI = std::prev(MBBI);
1811 unsigned Opcode = PrevMI->getOpcode();
1812 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1813 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1814 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1815 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1816 if (MO.getReg() != ARM::LR)
1818 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1819 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1820 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1821 PrevMI->setDesc(TII->get(NewOpc));
1823 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1831 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1833 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1834 TL = STI->getTargetLowering();
1835 AFI = Fn.getInfo<ARMFunctionInfo>();
1836 TII = STI->getInstrInfo();
1837 TRI = STI->getRegisterInfo();
1838 MRI = &Fn.getRegInfo();
1839 RegClassInfoValid = false;
1840 isThumb2 = AFI->isThumb2Function();
1841 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1843 bool Modified = false;
1844 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1846 MachineBasicBlock &MBB = *MFI;
1847 Modified |= LoadStoreMultipleOpti(MBB);
1848 if (STI->hasV5TOps())
1849 Modified |= MergeReturnIntoLDM(MBB);
1852 Allocator.DestroyAll();
1857 /// Pre- register allocation pass that move load / stores from consecutive
1858 /// locations close to make it more likely they will be combined later.
1859 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1861 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1863 const DataLayout *TD;
1864 const TargetInstrInfo *TII;
1865 const TargetRegisterInfo *TRI;
1866 const ARMSubtarget *STI;
1867 MachineRegisterInfo *MRI;
1868 MachineFunction *MF;
1870 bool runOnMachineFunction(MachineFunction &Fn) override;
1872 const char *getPassName() const override {
1873 return "ARM pre- register allocation load / store optimization pass";
1877 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1878 unsigned &NewOpc, unsigned &EvenReg,
1879 unsigned &OddReg, unsigned &BaseReg,
1881 unsigned &PredReg, ARMCC::CondCodes &Pred,
1883 bool RescheduleOps(MachineBasicBlock *MBB,
1884 SmallVectorImpl<MachineInstr *> &Ops,
1885 unsigned Base, bool isLd,
1886 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1887 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1889 char ARMPreAllocLoadStoreOpt::ID = 0;
1892 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1893 TD = Fn.getTarget().getDataLayout();
1894 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1895 TII = STI->getInstrInfo();
1896 TRI = STI->getRegisterInfo();
1897 MRI = &Fn.getRegInfo();
1900 bool Modified = false;
1901 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1903 Modified |= RescheduleLoadStoreInstrs(MFI);
1908 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1909 MachineBasicBlock::iterator I,
1910 MachineBasicBlock::iterator E,
1911 SmallPtrSetImpl<MachineInstr*> &MemOps,
1912 SmallSet<unsigned, 4> &MemRegs,
1913 const TargetRegisterInfo *TRI) {
1914 // Are there stores / loads / calls between them?
1915 // FIXME: This is overly conservative. We should make use of alias information
1917 SmallSet<unsigned, 4> AddedRegPressure;
1919 if (I->isDebugValue() || MemOps.count(&*I))
1921 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1923 if (isLd && I->mayStore())
1928 // It's not safe to move the first 'str' down.
1931 // str r4, [r0, #+4]
1935 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1936 MachineOperand &MO = I->getOperand(j);
1939 unsigned Reg = MO.getReg();
1940 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1942 if (Reg != Base && !MemRegs.count(Reg))
1943 AddedRegPressure.insert(Reg);
1947 // Estimate register pressure increase due to the transformation.
1948 if (MemRegs.size() <= 4)
1949 // Ok if we are moving small number of instructions.
1951 return AddedRegPressure.size() <= MemRegs.size() * 2;
1955 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
1956 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1957 MachineInstr *Op1) {
1958 assert(MI->memoperands_empty() && "expected a new machineinstr");
1959 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1960 + (Op1->memoperands_end() - Op1->memoperands_begin());
1962 MachineFunction *MF = MI->getParent()->getParent();
1963 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1964 MachineSDNode::mmo_iterator MemEnd =
1965 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1967 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1968 MI->setMemRefs(MemBegin, MemEnd);
1972 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1973 DebugLoc &dl, unsigned &NewOpc,
1975 unsigned &SecondReg,
1976 unsigned &BaseReg, int &Offset,
1978 ARMCC::CondCodes &Pred,
1980 // Make sure we're allowed to generate LDRD/STRD.
1981 if (!STI->hasV5TEOps())
1984 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1986 unsigned Opcode = Op0->getOpcode();
1987 if (Opcode == ARM::LDRi12) {
1989 } else if (Opcode == ARM::STRi12) {
1991 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1992 NewOpc = ARM::t2LDRDi8;
1995 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1996 NewOpc = ARM::t2STRDi8;
2003 // Make sure the base address satisfies i64 ld / st alignment requirement.
2004 // At the moment, we ignore the memoryoperand's value.
2005 // If we want to use AliasAnalysis, we should check it accordingly.
2006 if (!Op0->hasOneMemOperand() ||
2007 (*Op0->memoperands_begin())->isVolatile())
2010 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
2011 const Function *Func = MF->getFunction();
2012 unsigned ReqAlign = STI->hasV6Ops()
2013 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
2014 : 8; // Pre-v6 need 8-byte align
2015 if (Align < ReqAlign)
2018 // Then make sure the immediate offset fits.
2019 int OffImm = getMemoryOpOffset(Op0);
2021 int Limit = (1 << 8) * Scale;
2022 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2026 ARM_AM::AddrOpc AddSub = ARM_AM::add;
2028 AddSub = ARM_AM::sub;
2031 int Limit = (1 << 8) * Scale;
2032 if (OffImm >= Limit || (OffImm & (Scale-1)))
2034 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2036 FirstReg = Op0->getOperand(0).getReg();
2037 SecondReg = Op1->getOperand(0).getReg();
2038 if (FirstReg == SecondReg)
2040 BaseReg = Op0->getOperand(1).getReg();
2041 Pred = getInstrPredicate(Op0, PredReg);
2042 dl = Op0->getDebugLoc();
2046 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2047 SmallVectorImpl<MachineInstr *> &Ops,
2048 unsigned Base, bool isLd,
2049 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2050 bool RetVal = false;
2052 // Sort by offset (in reverse order).
2053 std::sort(Ops.begin(), Ops.end(),
2054 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2055 int LOffset = getMemoryOpOffset(LHS);
2056 int ROffset = getMemoryOpOffset(RHS);
2057 assert(LHS == RHS || LOffset != ROffset);
2058 return LOffset > ROffset;
2061 // The loads / stores of the same base are in order. Scan them from first to
2062 // last and check for the following:
2063 // 1. Any def of base.
2065 while (Ops.size() > 1) {
2066 unsigned FirstLoc = ~0U;
2067 unsigned LastLoc = 0;
2068 MachineInstr *FirstOp = nullptr;
2069 MachineInstr *LastOp = nullptr;
2071 unsigned LastOpcode = 0;
2072 unsigned LastBytes = 0;
2073 unsigned NumMove = 0;
2074 for (int i = Ops.size() - 1; i >= 0; --i) {
2075 MachineInstr *Op = Ops[i];
2076 unsigned Loc = MI2LocMap[Op];
2077 if (Loc <= FirstLoc) {
2081 if (Loc >= LastLoc) {
2087 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2088 if (LastOpcode && LSMOpcode != LastOpcode)
2091 int Offset = getMemoryOpOffset(Op);
2092 unsigned Bytes = getLSMultipleTransferSize(Op);
2094 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2097 LastOffset = Offset;
2099 LastOpcode = LSMOpcode;
2100 if (++NumMove == 8) // FIXME: Tune this limit.
2107 SmallPtrSet<MachineInstr*, 4> MemOps;
2108 SmallSet<unsigned, 4> MemRegs;
2109 for (int i = NumMove-1; i >= 0; --i) {
2110 MemOps.insert(Ops[i]);
2111 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2114 // Be conservative, if the instructions are too far apart, don't
2115 // move them. We want to limit the increase of register pressure.
2116 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2118 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2119 MemOps, MemRegs, TRI);
2121 for (unsigned i = 0; i != NumMove; ++i)
2124 // This is the new location for the loads / stores.
2125 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2126 while (InsertPos != MBB->end()
2127 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2130 // If we are moving a pair of loads / stores, see if it makes sense
2131 // to try to allocate a pair of registers that can form register pairs.
2132 MachineInstr *Op0 = Ops.back();
2133 MachineInstr *Op1 = Ops[Ops.size()-2];
2134 unsigned FirstReg = 0, SecondReg = 0;
2135 unsigned BaseReg = 0, PredReg = 0;
2136 ARMCC::CondCodes Pred = ARMCC::AL;
2138 unsigned NewOpc = 0;
2141 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2142 FirstReg, SecondReg, BaseReg,
2143 Offset, PredReg, Pred, isT2)) {
2147 const MCInstrDesc &MCID = TII->get(NewOpc);
2148 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2149 MRI->constrainRegClass(FirstReg, TRC);
2150 MRI->constrainRegClass(SecondReg, TRC);
2152 // Form the pair instruction.
2154 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2155 .addReg(FirstReg, RegState::Define)
2156 .addReg(SecondReg, RegState::Define)
2158 // FIXME: We're converting from LDRi12 to an insn that still
2159 // uses addrmode2, so we need an explicit offset reg. It should
2160 // always by reg0 since we're transforming LDRi12s.
2163 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2164 concatenateMemOperands(MIB, Op0, Op1);
2165 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2168 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2172 // FIXME: We're converting from LDRi12 to an insn that still
2173 // uses addrmode2, so we need an explicit offset reg. It should
2174 // always by reg0 since we're transforming STRi12s.
2177 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2178 concatenateMemOperands(MIB, Op0, Op1);
2179 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2186 // Add register allocation hints to form register pairs.
2187 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2188 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2191 for (unsigned i = 0; i != NumMove; ++i) {
2192 MachineInstr *Op = Ops.back();
2194 MBB->splice(InsertPos, MBB, Op);
2198 NumLdStMoved += NumMove;
2208 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2209 bool RetVal = false;
2211 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2212 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2213 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2214 SmallVector<unsigned, 4> LdBases;
2215 SmallVector<unsigned, 4> StBases;
2218 MachineBasicBlock::iterator MBBI = MBB->begin();
2219 MachineBasicBlock::iterator E = MBB->end();
2221 for (; MBBI != E; ++MBBI) {
2222 MachineInstr *MI = MBBI;
2223 if (MI->isCall() || MI->isTerminator()) {
2224 // Stop at barriers.
2229 if (!MI->isDebugValue())
2230 MI2LocMap[MI] = ++Loc;
2232 if (!isMemoryOp(MI))
2234 unsigned PredReg = 0;
2235 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2238 int Opc = MI->getOpcode();
2239 bool isLd = isLoadSingle(Opc);
2240 unsigned Base = MI->getOperand(1).getReg();
2241 int Offset = getMemoryOpOffset(MI);
2243 bool StopHere = false;
2245 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2246 Base2LdsMap.find(Base);
2247 if (BI != Base2LdsMap.end()) {
2248 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2249 if (Offset == getMemoryOpOffset(BI->second[i])) {
2255 BI->second.push_back(MI);
2257 Base2LdsMap[Base].push_back(MI);
2258 LdBases.push_back(Base);
2261 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2262 Base2StsMap.find(Base);
2263 if (BI != Base2StsMap.end()) {
2264 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2265 if (Offset == getMemoryOpOffset(BI->second[i])) {
2271 BI->second.push_back(MI);
2273 Base2StsMap[Base].push_back(MI);
2274 StBases.push_back(Base);
2279 // Found a duplicate (a base+offset combination that's seen earlier).
2286 // Re-schedule loads.
2287 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2288 unsigned Base = LdBases[i];
2289 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2291 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2294 // Re-schedule stores.
2295 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2296 unsigned Base = StBases[i];
2297 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2299 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2303 Base2LdsMap.clear();
2304 Base2StsMap.clear();
2314 /// Returns an instance of the load / store optimization pass.
2315 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2317 return new ARMPreAllocLoadStoreOpt();
2318 return new ARMLoadStoreOpt();