1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file This file contains a pass that performs load / store related peephole
11 /// optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterClassInfo.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/CodeGen/LivePhysRegs.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/Support/Allocator.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Target/TargetInstrInfo.h"
45 #include "llvm/Target/TargetMachine.h"
46 #include "llvm/Target/TargetRegisterInfo.h"
49 #define DEBUG_TYPE "arm-ldst-opt"
51 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
52 STATISTIC(NumSTMGened , "Number of stm instructions generated");
53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
64 /// Post- register allocation pass the combine load / store instructions to
65 /// form ldm / stm instructions.
66 struct ARMLoadStoreOpt : public MachineFunctionPass {
68 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
70 const MachineFunction *MF;
71 const TargetInstrInfo *TII;
72 const TargetRegisterInfo *TRI;
73 const MachineRegisterInfo *MRI;
74 const ARMSubtarget *STI;
75 const TargetLowering *TL;
77 LivePhysRegs LiveRegs;
78 RegisterClassInfo RegClassInfo;
79 MachineBasicBlock::const_iterator LiveRegPos;
81 bool RegClassInfoValid;
82 bool isThumb1, isThumb2;
84 bool runOnMachineFunction(MachineFunction &Fn) override;
86 const char *getPassName() const override {
87 return "ARM load / store optimization pass";
91 /// A set of load/store MachineInstrs with same base register sorted by
93 struct MemOpQueueEntry {
95 int Offset; ///< Load/Store offset.
96 unsigned Position; ///< Position as counted from end of basic block.
97 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
98 : MI(MI), Offset(Offset), Position(Position) {}
100 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
102 /// A set of MachineInstrs that fulfill (nearly all) conditions to get
103 /// merged into a LDM/STM.
104 struct MergeCandidate {
105 /// List of instructions ordered by load/store offset.
106 SmallVector<MachineInstr*, 4> Instrs;
107 /// Index in Instrs of the instruction being latest in the schedule.
108 unsigned LatestMIIdx;
109 /// Index in Instrs of the instruction being earliest in the schedule.
110 unsigned EarliestMIIdx;
111 /// Index into the basic block where the merged instruction will be
112 /// inserted. (See MemOpQueueEntry.Position)
114 /// Whether the instructions can be merged into a ldm/stm instruction.
115 bool CanMergeToLSMulti;
116 /// Whether the instructions can be merged into a ldrd/strd instruction.
117 bool CanMergeToLSDouble;
119 BumpPtrAllocator Allocator;
120 SmallVector<const MergeCandidate*,4> Candidates;
121 SmallVector<MachineInstr*,4> MergeBaseCandidates;
123 void moveLiveRegsBefore(const MachineBasicBlock &MBB,
124 MachineBasicBlock::const_iterator Before);
125 unsigned findFreeReg(const TargetRegisterClass &RegClass);
126 void UpdateBaseRegUses(MachineBasicBlock &MBB,
127 MachineBasicBlock::iterator MBBI,
128 DebugLoc DL, unsigned Base, unsigned WordOffset,
129 ARMCC::CondCodes Pred, unsigned PredReg);
130 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
131 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
132 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
133 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
134 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
135 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
136 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
137 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
138 void FormCandidates(const MemOpQueue &MemOps);
139 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
140 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
141 MachineBasicBlock::iterator &MBBI);
142 bool MergeBaseUpdateLoadStore(MachineInstr *MI);
143 bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
144 bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
145 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
146 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
148 char ARMLoadStoreOpt::ID = 0;
151 static bool definesCPSR(const MachineInstr *MI) {
152 for (const auto &MO : MI->operands()) {
155 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
156 // If the instruction has live CPSR def, then it's not safe to fold it
157 // into load / store.
164 static int getMemoryOpOffset(const MachineInstr *MI) {
165 unsigned Opcode = MI->getOpcode();
166 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
167 unsigned NumOperands = MI->getDesc().getNumOperands();
168 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
170 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
171 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
172 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
173 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
176 // Thumb1 immediate offsets are scaled by 4
177 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
178 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
181 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
182 : ARM_AM::getAM5Offset(OffField) * 4;
183 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
184 : ARM_AM::getAM5Op(OffField);
186 if (Op == ARM_AM::sub)
192 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
193 return MI.getOperand(1);
196 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
197 return MI.getOperand(0);
200 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
202 default: llvm_unreachable("Unhandled opcode!");
206 default: llvm_unreachable("Unhandled submode!");
207 case ARM_AM::ia: return ARM::LDMIA;
208 case ARM_AM::da: return ARM::LDMDA;
209 case ARM_AM::db: return ARM::LDMDB;
210 case ARM_AM::ib: return ARM::LDMIB;
215 default: llvm_unreachable("Unhandled submode!");
216 case ARM_AM::ia: return ARM::STMIA;
217 case ARM_AM::da: return ARM::STMDA;
218 case ARM_AM::db: return ARM::STMDB;
219 case ARM_AM::ib: return ARM::STMIB;
223 // tLDMIA is writeback-only - unless the base register is in the input
227 default: llvm_unreachable("Unhandled submode!");
228 case ARM_AM::ia: return ARM::tLDMIA;
232 // There is no non-writeback tSTMIA either.
235 default: llvm_unreachable("Unhandled submode!");
236 case ARM_AM::ia: return ARM::tSTMIA_UPD;
242 default: llvm_unreachable("Unhandled submode!");
243 case ARM_AM::ia: return ARM::t2LDMIA;
244 case ARM_AM::db: return ARM::t2LDMDB;
250 default: llvm_unreachable("Unhandled submode!");
251 case ARM_AM::ia: return ARM::t2STMIA;
252 case ARM_AM::db: return ARM::t2STMDB;
257 default: llvm_unreachable("Unhandled submode!");
258 case ARM_AM::ia: return ARM::VLDMSIA;
259 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
264 default: llvm_unreachable("Unhandled submode!");
265 case ARM_AM::ia: return ARM::VSTMSIA;
266 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
271 default: llvm_unreachable("Unhandled submode!");
272 case ARM_AM::ia: return ARM::VLDMDIA;
273 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
278 default: llvm_unreachable("Unhandled submode!");
279 case ARM_AM::ia: return ARM::VSTMDIA;
280 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
285 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
287 default: llvm_unreachable("Unhandled opcode!");
294 case ARM::tLDMIA_UPD:
295 case ARM::tSTMIA_UPD:
296 case ARM::t2LDMIA_RET:
298 case ARM::t2LDMIA_UPD:
300 case ARM::t2STMIA_UPD:
302 case ARM::VLDMSIA_UPD:
304 case ARM::VSTMSIA_UPD:
306 case ARM::VLDMDIA_UPD:
308 case ARM::VSTMDIA_UPD:
322 case ARM::t2LDMDB_UPD:
324 case ARM::t2STMDB_UPD:
325 case ARM::VLDMSDB_UPD:
326 case ARM::VSTMSDB_UPD:
327 case ARM::VLDMDDB_UPD:
328 case ARM::VSTMDDB_UPD:
339 static bool isT1i32Load(unsigned Opc) {
340 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
343 static bool isT2i32Load(unsigned Opc) {
344 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
347 static bool isi32Load(unsigned Opc) {
348 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
351 static bool isT1i32Store(unsigned Opc) {
352 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
355 static bool isT2i32Store(unsigned Opc) {
356 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
359 static bool isi32Store(unsigned Opc) {
360 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
363 static bool isLoadSingle(unsigned Opc) {
364 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
367 static unsigned getImmScale(unsigned Opc) {
369 default: llvm_unreachable("Unhandled opcode!");
384 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
385 switch (MI->getOpcode()) {
412 case ARM::tLDMIA_UPD:
413 case ARM::tSTMIA_UPD:
420 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
423 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
427 /// Update future uses of the base register with the offset introduced
428 /// due to writeback. This function only works on Thumb1.
430 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
431 MachineBasicBlock::iterator MBBI,
432 DebugLoc DL, unsigned Base,
434 ARMCC::CondCodes Pred, unsigned PredReg) {
435 assert(isThumb1 && "Can only update base register uses for Thumb1!");
436 // Start updating any instructions with immediate offsets. Insert a SUB before
437 // the first non-updateable instruction (if any).
438 for (; MBBI != MBB.end(); ++MBBI) {
439 bool InsertSub = false;
440 unsigned Opc = MBBI->getOpcode();
442 if (MBBI->readsRegister(Base)) {
445 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
447 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
449 if (IsLoad || IsStore) {
450 // Loads and stores with immediate offsets can be updated, but only if
451 // the new offset isn't negative.
452 // The MachineOperand containing the offset immediate is the last one
453 // before predicates.
455 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
456 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
457 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
459 // If storing the base register, it needs to be reset first.
460 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
462 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
467 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
468 !definesCPSR(MBBI)) {
469 // SUBS/ADDS using this register, with a dead def of the CPSR.
470 // Merge it with the update; if the merged offset is too large,
471 // insert a new sub instead.
473 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
474 Offset = (Opc == ARM::tSUBi8) ?
475 MO.getImm() + WordOffset * 4 :
476 MO.getImm() - WordOffset * 4 ;
477 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
478 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
481 // The base register has now been reset, so exit early.
488 // Can't update the instruction.
492 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
493 // Since SUBS sets the condition flags, we can't place the base reset
494 // after an instruction that has a live CPSR def.
495 // The base register might also contain an argument for a function call.
500 // An instruction above couldn't be updated, so insert a sub.
501 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
502 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
506 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
507 // Register got killed. Stop updating.
511 // End of block was reached.
512 if (MBB.succ_size() > 0) {
513 // FIXME: Because of a bug, live registers are sometimes missing from
514 // the successor blocks' live-in sets. This means we can't trust that
515 // information and *always* have to reset at the end of a block.
517 if (MBBI != MBB.end()) --MBBI;
519 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
520 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
524 /// Return the first register of class \p RegClass that is not in \p Regs.
525 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
526 if (!RegClassInfoValid) {
527 RegClassInfo.runOnMachineFunction(*MF);
528 RegClassInfoValid = true;
531 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
532 if (!LiveRegs.contains(Reg))
537 /// Compute live registers just before instruction \p Before (in normal schedule
538 /// direction). Computes backwards so multiple queries in the same block must
539 /// come in reverse order.
540 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
541 MachineBasicBlock::const_iterator Before) {
542 // Initialize if we never queried in this block.
543 if (!LiveRegsValid) {
545 LiveRegs.addLiveOuts(&MBB, true);
546 LiveRegPos = MBB.end();
547 LiveRegsValid = true;
549 // Move backward just before the "Before" position.
550 while (LiveRegPos != Before) {
552 LiveRegs.stepBackward(*LiveRegPos);
556 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
558 for (const std::pair<unsigned, bool> &R : Regs)
564 /// Create and insert a LDM or STM with Base as base register and registers in
565 /// Regs as the register operands that would be loaded / stored. It returns
566 /// true if the transformation is done.
567 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
568 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
569 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
570 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
571 unsigned NumRegs = Regs.size();
574 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
575 // Compute liveness information for that register to make the decision.
576 bool SafeToClobberCPSR = !isThumb1 ||
577 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
578 MachineBasicBlock::LQR_Dead);
580 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
582 // Exception: If the base register is in the input reglist, Thumb1 LDM is
584 // It's also not possible to merge an STR of the base register in Thumb1.
585 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
586 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
587 if (Opcode == ARM::tLDRi) {
589 } else if (Opcode == ARM::tSTRi) {
594 ARM_AM::AMSubMode Mode = ARM_AM::ia;
595 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
596 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
597 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
599 if (Offset == 4 && haveIBAndDA) {
601 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
603 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
604 // VLDM/VSTM do not support DB mode without also updating the base reg.
606 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
607 // Check if this is a supported opcode before inserting instructions to
608 // calculate a new base register.
609 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
611 // If starting offset isn't zero, insert a MI to materialize a new base.
612 // But only do so if it is cost effective, i.e. merging more than two
617 // On Thumb1, it's not worth materializing a new base register without
618 // clobbering the CPSR (i.e. not using ADDS/SUBS).
619 if (!SafeToClobberCPSR)
623 if (isi32Load(Opcode)) {
624 // If it is a load, then just use one of the destination register to
625 // use as the new base.
626 NewBase = Regs[NumRegs-1].first;
628 // Find a free register that we can use as scratch register.
629 moveLiveRegsBefore(MBB, InsertBefore);
630 // The merged instruction does not exist yet but will use several Regs if
632 if (!isLoadSingle(Opcode))
633 for (const std::pair<unsigned, bool> &R : Regs)
634 LiveRegs.addReg(R.first);
636 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
642 isThumb2 ? ARM::t2ADDri :
643 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
644 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
645 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
650 isThumb2 ? ARM::t2SUBri :
651 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
652 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
655 if (!TL->isLegalAddImmediate(Offset))
656 // FIXME: Try add with register operand?
657 return nullptr; // Probably not worth it then.
659 // We can only append a kill flag to the add/sub input if the value is not
660 // used in the register list of the stm as well.
661 bool KillOldBase = BaseKill &&
662 (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
665 // Thumb1: depending on immediate size, use either
666 // ADDS NewBase, Base, #imm3
669 // ADDS NewBase, #imm8.
670 if (Base != NewBase &&
671 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
672 // Need to insert a MOV to the new base first.
673 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
675 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
676 if (Pred != ARMCC::AL)
678 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
679 .addReg(Base, getKillRegState(KillOldBase));
681 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
682 .addReg(Base, getKillRegState(KillOldBase))
683 .addImm(Pred).addReg(PredReg);
685 // The following ADDS/SUBS becomes an update.
689 if (BaseOpc == ARM::tADDrSPi) {
690 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
691 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
692 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
693 .addImm(Pred).addReg(PredReg);
696 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
697 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
698 .addImm(Pred).addReg(PredReg);
700 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
701 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
702 .addImm(Pred).addReg(PredReg).addReg(0);
705 BaseKill = true; // New base is always killed straight away.
708 bool isDef = isLoadSingle(Opcode);
710 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
711 // base register writeback.
712 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
716 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
717 // - There is no writeback (LDM of base register),
718 // - the base register is killed by the merged instruction,
719 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
720 // to reset the base register.
721 // Otherwise, don't merge.
722 // It's safe to return here since the code to materialize a new base register
723 // above is also conditional on SafeToClobberCPSR.
724 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
727 MachineInstrBuilder MIB;
730 if (Opcode == ARM::tLDMIA)
731 // Update tLDMIA with writeback if necessary.
732 Opcode = ARM::tLDMIA_UPD;
734 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
736 // Thumb1: we might need to set base writeback when building the MI.
737 MIB.addReg(Base, getDefRegState(true))
738 .addReg(Base, getKillRegState(BaseKill));
740 // The base isn't dead after a merged instruction with writeback.
741 // Insert a sub instruction after the newly formed instruction to reset.
743 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
746 // No writeback, simply build the MachineInstr.
747 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
748 MIB.addReg(Base, getKillRegState(BaseKill));
751 MIB.addImm(Pred).addReg(PredReg);
753 for (const std::pair<unsigned, bool> &R : Regs)
754 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
756 return MIB.getInstr();
759 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
760 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
761 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
762 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
763 bool IsLoad = isi32Load(Opcode);
764 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
765 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
767 assert(Regs.size() == 2);
768 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
769 TII->get(LoadStoreOpcode));
771 MIB.addReg(Regs[0].first, RegState::Define)
772 .addReg(Regs[1].first, RegState::Define);
774 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
775 .addReg(Regs[1].first, getKillRegState(Regs[1].second));
777 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
778 return MIB.getInstr();
781 /// Call MergeOps and update MemOps and merges accordingly on success.
782 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
783 const MachineInstr *First = Cand.Instrs.front();
784 unsigned Opcode = First->getOpcode();
785 bool IsLoad = isLoadSingle(Opcode);
786 SmallVector<std::pair<unsigned, bool>, 8> Regs;
787 SmallVector<unsigned, 4> ImpDefs;
788 DenseSet<unsigned> KilledRegs;
789 DenseSet<unsigned> UsedRegs;
790 // Determine list of registers and list of implicit super-register defs.
791 for (const MachineInstr *MI : Cand.Instrs) {
792 const MachineOperand &MO = getLoadStoreRegOp(*MI);
793 unsigned Reg = MO.getReg();
794 bool IsKill = MO.isKill();
796 KilledRegs.insert(Reg);
797 Regs.push_back(std::make_pair(Reg, IsKill));
798 UsedRegs.insert(Reg);
801 // Collect any implicit defs of super-registers, after merging we can't
802 // be sure anymore that we properly preserved these live ranges and must
803 // removed these implicit operands.
804 for (const MachineOperand &MO : MI->implicit_operands()) {
805 if (!MO.isReg() || !MO.isDef() || MO.isDead())
807 assert(MO.isImplicit());
808 unsigned DefReg = MO.getReg();
810 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
812 // We can ignore cases where the super-reg is read and written.
813 if (MI->readsRegister(DefReg))
815 ImpDefs.push_back(DefReg);
820 // Attempt the merge.
821 typedef MachineBasicBlock::iterator iterator;
822 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
823 iterator InsertBefore = std::next(iterator(LatestMI));
824 MachineBasicBlock &MBB = *LatestMI->getParent();
825 unsigned Offset = getMemoryOpOffset(First);
826 unsigned Base = getLoadStoreBaseOp(*First).getReg();
827 bool BaseKill = LatestMI->killsRegister(Base);
828 unsigned PredReg = 0;
829 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
830 DebugLoc DL = First->getDebugLoc();
831 MachineInstr *Merged = nullptr;
832 if (Cand.CanMergeToLSDouble)
833 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
834 Opcode, Pred, PredReg, DL, Regs);
835 if (!Merged && Cand.CanMergeToLSMulti)
836 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
837 Opcode, Pred, PredReg, DL, Regs);
841 // Determine earliest instruction that will get removed. We then keep an
842 // iterator just above it so the following erases don't invalidated it.
843 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
844 bool EarliestAtBegin = false;
845 if (EarliestI == MBB.begin()) {
846 EarliestAtBegin = true;
848 EarliestI = std::prev(EarliestI);
851 // Remove instructions which have been merged.
852 for (MachineInstr *MI : Cand.Instrs)
855 // Determine range between the earliest removed instruction and the new one.
857 EarliestI = MBB.begin();
859 EarliestI = std::next(EarliestI);
860 auto FixupRange = make_range(EarliestI, iterator(Merged));
862 if (isLoadSingle(Opcode)) {
863 // If the previous loads defined a super-reg, then we have to mark earlier
864 // operands undef; Replicate the super-reg def on the merged instruction.
865 for (MachineInstr &MI : FixupRange) {
866 for (unsigned &ImpDefReg : ImpDefs) {
867 for (MachineOperand &MO : MI.implicit_operands()) {
868 if (!MO.isReg() || MO.getReg() != ImpDefReg)
878 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
879 for (unsigned ImpDef : ImpDefs)
880 MIB.addReg(ImpDef, RegState::ImplicitDefine);
882 // Remove kill flags: We are possibly storing the values later now.
883 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
884 for (MachineInstr &MI : FixupRange) {
885 for (MachineOperand &MO : MI.uses()) {
886 if (!MO.isReg() || !MO.isKill())
888 if (UsedRegs.count(MO.getReg()))
892 assert(ImpDefs.empty());
898 static bool isValidLSDoubleOffset(int Offset) {
899 unsigned Value = abs(Offset);
900 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
902 return (Value % 4) == 0 && Value < 1024;
905 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
906 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
907 const MachineInstr *FirstMI = MemOps[0].MI;
908 unsigned Opcode = FirstMI->getOpcode();
909 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
910 unsigned Size = getLSMultipleTransferSize(FirstMI);
913 unsigned EIndex = MemOps.size();
915 // Look at the first instruction.
916 const MachineInstr *MI = MemOps[SIndex].MI;
917 int Offset = MemOps[SIndex].Offset;
918 const MachineOperand &PMO = getLoadStoreRegOp(*MI);
919 unsigned PReg = PMO.getReg();
920 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
921 unsigned Latest = SIndex;
922 unsigned Earliest = SIndex;
924 bool CanMergeToLSDouble =
925 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
926 // ARM errata 602117: LDRD with base in list may result in incorrect base
927 // register when interrupted or faulted.
928 if (STI->isCortexM3() && isi32Load(Opcode) &&
929 PReg == getLoadStoreBaseOp(*MI).getReg())
930 CanMergeToLSDouble = false;
932 bool CanMergeToLSMulti = true;
933 // On swift vldm/vstm starting with an odd register number as that needs
934 // more uops than single vldrs.
935 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
936 CanMergeToLSMulti = false;
938 // Merge following instructions where possible.
939 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
940 int NewOffset = MemOps[I].Offset;
941 if (NewOffset != Offset + (int)Size)
943 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
944 unsigned Reg = MO.getReg();
945 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
947 // See if the current load/store may be part of a multi load/store.
948 bool PartOfLSMulti = CanMergeToLSMulti;
950 // Cannot load from SP
952 PartOfLSMulti = false;
953 // Register numbers must be in ascending order.
954 else if (RegNum <= PRegNum)
955 PartOfLSMulti = false;
956 // For VFP / NEON load/store multiples, the registers must be
957 // consecutive and within the limit on the number of registers per
959 else if (!isNotVFP && RegNum != PRegNum+1)
960 PartOfLSMulti = false;
962 // See if the current load/store may be part of a double load/store.
963 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
965 if (!PartOfLSMulti && !PartOfLSDouble)
967 CanMergeToLSMulti &= PartOfLSMulti;
968 CanMergeToLSDouble &= PartOfLSDouble;
969 // Track MemOp with latest and earliest position (Positions are
970 // counted in reverse).
971 unsigned Position = MemOps[I].Position;
972 if (Position < MemOps[Latest].Position)
974 else if (Position > MemOps[Earliest].Position)
976 // Prepare for next MemOp.
981 // Form a candidate from the Ops collected so far.
982 MergeCandidate *Candidate = new(Allocator) MergeCandidate;
983 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
984 Candidate->Instrs.push_back(MemOps[C].MI);
985 Candidate->LatestMIIdx = Latest - SIndex;
986 Candidate->EarliestMIIdx = Earliest - SIndex;
987 Candidate->InsertPos = MemOps[Latest].Position;
989 CanMergeToLSMulti = CanMergeToLSDouble = false;
990 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
991 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
992 Candidates.push_back(Candidate);
993 // Continue after the chain.
995 } while (SIndex < EIndex);
998 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
999 ARM_AM::AMSubMode Mode) {
1001 default: llvm_unreachable("Unhandled opcode!");
1007 default: llvm_unreachable("Unhandled submode!");
1008 case ARM_AM::ia: return ARM::LDMIA_UPD;
1009 case ARM_AM::ib: return ARM::LDMIB_UPD;
1010 case ARM_AM::da: return ARM::LDMDA_UPD;
1011 case ARM_AM::db: return ARM::LDMDB_UPD;
1018 default: llvm_unreachable("Unhandled submode!");
1019 case ARM_AM::ia: return ARM::STMIA_UPD;
1020 case ARM_AM::ib: return ARM::STMIB_UPD;
1021 case ARM_AM::da: return ARM::STMDA_UPD;
1022 case ARM_AM::db: return ARM::STMDB_UPD;
1027 default: llvm_unreachable("Unhandled submode!");
1028 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1029 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1034 default: llvm_unreachable("Unhandled submode!");
1035 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1036 case ARM_AM::db: return ARM::t2STMDB_UPD;
1040 default: llvm_unreachable("Unhandled submode!");
1041 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1042 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1046 default: llvm_unreachable("Unhandled submode!");
1047 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1048 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1052 default: llvm_unreachable("Unhandled submode!");
1053 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1054 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1058 default: llvm_unreachable("Unhandled submode!");
1059 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1060 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1065 /// Check if the given instruction increments or decrements a register and
1066 /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
1067 /// generated by the instruction are possibly read as well.
1068 static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
1069 ARMCC::CondCodes Pred, unsigned PredReg) {
1072 switch (MI.getOpcode()) {
1073 case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
1074 case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
1076 case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
1078 case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
1079 case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
1080 case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
1085 if (MI.getOperand(0).getReg() != Reg ||
1086 MI.getOperand(1).getReg() != Reg ||
1087 getInstrPredicate(&MI, MIPredReg) != Pred ||
1088 MIPredReg != PredReg)
1091 if (CheckCPSRDef && definesCPSR(&MI))
1093 return MI.getOperand(2).getImm() * Scale;
1096 /// Searches for an increment or decrement of \p Reg before \p MBBI.
1097 static MachineBasicBlock::iterator
1098 findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
1099 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1101 MachineBasicBlock &MBB = *MBBI->getParent();
1102 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1103 MachineBasicBlock::iterator EndMBBI = MBB.end();
1104 if (MBBI == BeginMBBI)
1107 // Skip debug values.
1108 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1109 while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
1112 Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
1113 return Offset == 0 ? EndMBBI : PrevMBBI;
1116 /// Searches for a increment or decrement of \p Reg after \p MBBI.
1117 static MachineBasicBlock::iterator
1118 findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
1119 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1121 MachineBasicBlock &MBB = *MBBI->getParent();
1122 MachineBasicBlock::iterator EndMBBI = MBB.end();
1123 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1124 // Skip debug values.
1125 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1127 if (NextMBBI == EndMBBI)
1130 Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
1131 return Offset == 0 ? EndMBBI : NextMBBI;
1134 /// Fold proceeding/trailing inc/dec of base register into the
1135 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1137 /// stmia rn, <ra, rb, rc>
1138 /// rn := rn + 4 * 3;
1140 /// stmia rn!, <ra, rb, rc>
1142 /// rn := rn - 4 * 3;
1143 /// ldmia rn, <ra, rb, rc>
1145 /// ldmdb rn!, <ra, rb, rc>
1146 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1147 // Thumb1 is already using updating loads/stores.
1148 if (isThumb1) return false;
1150 const MachineOperand &BaseOP = MI->getOperand(0);
1151 unsigned Base = BaseOP.getReg();
1152 bool BaseKill = BaseOP.isKill();
1153 unsigned PredReg = 0;
1154 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1155 unsigned Opcode = MI->getOpcode();
1156 DebugLoc DL = MI->getDebugLoc();
1158 // Can't use an updating ld/st if the base register is also a dest
1159 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1160 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1161 if (MI->getOperand(i).getReg() == Base)
1164 int Bytes = getLSMultipleTransferSize(MI);
1165 MachineBasicBlock &MBB = *MI->getParent();
1166 MachineBasicBlock::iterator MBBI(MI);
1168 MachineBasicBlock::iterator MergeInstr
1169 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1170 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
1171 if (Mode == ARM_AM::ia && Offset == -Bytes) {
1173 } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
1176 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1177 if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
1178 ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
1181 MBB.erase(MergeInstr);
1183 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1184 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1185 .addReg(Base, getDefRegState(true)) // WB base register
1186 .addReg(Base, getKillRegState(BaseKill))
1187 .addImm(Pred).addReg(PredReg);
1189 // Transfer the rest of operands.
1190 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1191 MIB.addOperand(MI->getOperand(OpNum));
1193 // Transfer memoperands.
1194 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1200 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1201 ARM_AM::AddrOpc Mode) {
1204 return ARM::LDR_PRE_IMM;
1206 return ARM::STR_PRE_IMM;
1208 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1210 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1212 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1214 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1217 return ARM::t2LDR_PRE;
1220 return ARM::t2STR_PRE;
1221 default: llvm_unreachable("Unhandled opcode!");
1225 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1226 ARM_AM::AddrOpc Mode) {
1229 return ARM::LDR_POST_IMM;
1231 return ARM::STR_POST_IMM;
1233 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1235 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1237 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1239 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1242 return ARM::t2LDR_POST;
1245 return ARM::t2STR_POST;
1246 default: llvm_unreachable("Unhandled opcode!");
1250 /// Fold proceeding/trailing inc/dec of base register into the
1251 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1252 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1253 // Thumb1 doesn't have updating LDR/STR.
1254 // FIXME: Use LDM/STM with single register instead.
1255 if (isThumb1) return false;
1257 unsigned Base = getLoadStoreBaseOp(*MI).getReg();
1258 bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1259 unsigned Opcode = MI->getOpcode();
1260 DebugLoc DL = MI->getDebugLoc();
1261 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1262 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1263 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1264 if (isi32Load(Opcode) || isi32Store(Opcode))
1265 if (MI->getOperand(2).getImm() != 0)
1267 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1270 // Can't do the merge if the destination register is the same as the would-be
1271 // writeback register.
1272 if (MI->getOperand(0).getReg() == Base)
1275 unsigned PredReg = 0;
1276 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1277 int Bytes = getLSMultipleTransferSize(MI);
1278 MachineBasicBlock &MBB = *MI->getParent();
1279 MachineBasicBlock::iterator MBBI(MI);
1281 MachineBasicBlock::iterator MergeInstr
1282 = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1284 if (!isAM5 && Offset == Bytes) {
1285 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1286 } else if (Offset == -Bytes) {
1287 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1289 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1290 if (Offset == Bytes) {
1291 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1292 } else if (!isAM5 && Offset == -Bytes) {
1293 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1297 MBB.erase(MergeInstr);
1299 ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
1301 bool isLd = isLoadSingle(Opcode);
1303 // VLDM[SD]_UPD, VSTM[SD]_UPD
1304 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1305 // updating load/store-multiple instructions can be used with only one
1307 MachineOperand &MO = MI->getOperand(0);
1308 BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1309 .addReg(Base, getDefRegState(true)) // WB base register
1310 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1311 .addImm(Pred).addReg(PredReg)
1312 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1313 getKillRegState(MO.isKill())));
1316 // LDR_PRE, LDR_POST
1317 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1318 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1319 .addReg(Base, RegState::Define)
1320 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1322 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1323 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1324 .addReg(Base, RegState::Define)
1325 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
1328 // t2LDR_PRE, t2LDR_POST
1329 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1330 .addReg(Base, RegState::Define)
1331 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1334 MachineOperand &MO = MI->getOperand(0);
1335 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1336 // the vestigal zero-reg offset register. When that's fixed, this clause
1337 // can be removed entirely.
1338 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1339 int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1340 // STR_PRE, STR_POST
1341 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1342 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1343 .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
1345 // t2STR_PRE, t2STR_POST
1346 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1347 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1348 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1356 bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
1357 unsigned Opcode = MI.getOpcode();
1358 assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1359 "Must have t2STRDi8 or t2LDRDi8");
1360 if (MI.getOperand(3).getImm() != 0)
1363 // Behaviour for writeback is undefined if base register is the same as one
1365 const MachineOperand &BaseOp = MI.getOperand(2);
1366 unsigned Base = BaseOp.getReg();
1367 const MachineOperand &Reg0Op = MI.getOperand(0);
1368 const MachineOperand &Reg1Op = MI.getOperand(1);
1369 if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
1373 ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
1374 MachineBasicBlock::iterator MBBI(MI);
1375 MachineBasicBlock &MBB = *MI.getParent();
1377 MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
1380 if (Offset == 8 || Offset == -8) {
1381 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1383 MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1384 if (Offset == 8 || Offset == -8) {
1385 NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1389 MBB.erase(MergeInstr);
1391 DebugLoc DL = MI.getDebugLoc();
1392 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
1393 if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1394 MIB.addOperand(Reg0Op).addOperand(Reg1Op)
1395 .addReg(BaseOp.getReg(), RegState::Define);
1397 assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1398 MIB.addReg(BaseOp.getReg(), RegState::Define)
1399 .addOperand(Reg0Op).addOperand(Reg1Op);
1401 MIB.addReg(BaseOp.getReg(), RegState::Kill)
1402 .addImm(Offset).addImm(Pred).addReg(PredReg);
1403 assert(TII->get(Opcode).getNumOperands() == 6 &&
1404 TII->get(NewOpc).getNumOperands() == 7 &&
1405 "Unexpected number of operands in Opcode specification.");
1407 // Transfer implicit operands.
1408 for (const MachineOperand &MO : MI.implicit_operands())
1410 MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
1416 /// Returns true if instruction is a memory operation that this pass is capable
1417 /// of operating on.
1418 static bool isMemoryOp(const MachineInstr *MI) {
1419 // When no memory operands are present, conservatively assume unaligned,
1420 // volatile, unfoldable.
1421 if (!MI->hasOneMemOperand())
1424 const MachineMemOperand *MMO = *MI->memoperands_begin();
1426 // Don't touch volatile memory accesses - we may be changing their order.
1427 if (MMO->isVolatile())
1430 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1432 if (MMO->getAlignment() < 4)
1435 // str <undef> could probably be eliminated entirely, but for now we just want
1436 // to avoid making a mess of it.
1437 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1438 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1439 MI->getOperand(0).isUndef())
1442 // Likewise don't mess with references to undefined addresses.
1443 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1444 MI->getOperand(1).isUndef())
1447 unsigned Opcode = MI->getOpcode();
1452 return MI->getOperand(1).isReg();
1455 return MI->getOperand(1).isReg();
1466 return MI->getOperand(1).isReg();
1471 static void InsertLDR_STR(MachineBasicBlock &MBB,
1472 MachineBasicBlock::iterator &MBBI,
1473 int Offset, bool isDef,
1474 DebugLoc DL, unsigned NewOpc,
1475 unsigned Reg, bool RegDeadKill, bool RegUndef,
1476 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1477 bool OffKill, bool OffUndef,
1478 ARMCC::CondCodes Pred, unsigned PredReg,
1479 const TargetInstrInfo *TII, bool isT2) {
1481 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1483 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1484 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1485 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1487 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1489 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1490 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1491 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1495 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1496 MachineBasicBlock::iterator &MBBI) {
1497 MachineInstr *MI = &*MBBI;
1498 unsigned Opcode = MI->getOpcode();
1499 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1502 const MachineOperand &BaseOp = MI->getOperand(2);
1503 unsigned BaseReg = BaseOp.getReg();
1504 unsigned EvenReg = MI->getOperand(0).getReg();
1505 unsigned OddReg = MI->getOperand(1).getReg();
1506 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1507 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1509 // ARM errata 602117: LDRD with base in list may result in incorrect base
1510 // register when interrupted or faulted.
1511 bool Errata602117 = EvenReg == BaseReg &&
1512 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1513 // ARM LDRD/STRD needs consecutive registers.
1514 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1515 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1517 if (!Errata602117 && !NonConsecutiveRegs)
1520 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1521 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1522 bool EvenDeadKill = isLd ?
1523 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1524 bool EvenUndef = MI->getOperand(0).isUndef();
1525 bool OddDeadKill = isLd ?
1526 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1527 bool OddUndef = MI->getOperand(1).isUndef();
1528 bool BaseKill = BaseOp.isKill();
1529 bool BaseUndef = BaseOp.isUndef();
1530 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1531 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1532 int OffImm = getMemoryOpOffset(MI);
1533 unsigned PredReg = 0;
1534 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1536 if (OddRegNum > EvenRegNum && OffImm == 0) {
1537 // Ascending register numbers and no offset. It's safe to change it to a
1539 unsigned NewOpc = (isLd)
1540 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1541 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1543 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1544 .addReg(BaseReg, getKillRegState(BaseKill))
1545 .addImm(Pred).addReg(PredReg)
1546 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1547 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1550 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1551 .addReg(BaseReg, getKillRegState(BaseKill))
1552 .addImm(Pred).addReg(PredReg)
1554 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1556 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1560 // Split into two instructions.
1561 unsigned NewOpc = (isLd)
1562 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1563 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1564 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1565 // so adjust and use t2LDRi12 here for that.
1566 unsigned NewOpc2 = (isLd)
1567 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1568 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1569 DebugLoc dl = MBBI->getDebugLoc();
1570 // If this is a load and base register is killed, it may have been
1571 // re-defed by the load, make sure the first load does not clobber it.
1573 (BaseKill || OffKill) &&
1574 (TRI->regsOverlap(EvenReg, BaseReg))) {
1575 assert(!TRI->regsOverlap(OddReg, BaseReg));
1576 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1577 OddReg, OddDeadKill, false,
1578 BaseReg, false, BaseUndef, false, OffUndef,
1579 Pred, PredReg, TII, isT2);
1580 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1581 EvenReg, EvenDeadKill, false,
1582 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1583 Pred, PredReg, TII, isT2);
1585 if (OddReg == EvenReg && EvenDeadKill) {
1586 // If the two source operands are the same, the kill marker is
1587 // probably on the first one. e.g.
1588 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1589 EvenDeadKill = false;
1592 // Never kill the base register in the first instruction.
1593 if (EvenReg == BaseReg)
1594 EvenDeadKill = false;
1595 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1596 EvenReg, EvenDeadKill, EvenUndef,
1597 BaseReg, false, BaseUndef, false, OffUndef,
1598 Pred, PredReg, TII, isT2);
1599 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1600 OddReg, OddDeadKill, OddUndef,
1601 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1602 Pred, PredReg, TII, isT2);
1610 MBBI = MBB.erase(MBBI);
1614 /// An optimization pass to turn multiple LDR / STR ops of the same base and
1615 /// incrementing offset into LDM / STM ops.
1616 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1618 unsigned CurrBase = 0;
1619 unsigned CurrOpc = ~0u;
1620 ARMCC::CondCodes CurrPred = ARMCC::AL;
1621 unsigned Position = 0;
1622 assert(Candidates.size() == 0);
1623 assert(MergeBaseCandidates.size() == 0);
1624 LiveRegsValid = false;
1626 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
1628 // The instruction in front of the iterator is the one we look at.
1629 MBBI = std::prev(I);
1630 if (FixInvalidRegPairOp(MBB, MBBI))
1634 if (isMemoryOp(MBBI)) {
1635 unsigned Opcode = MBBI->getOpcode();
1636 const MachineOperand &MO = MBBI->getOperand(0);
1637 unsigned Reg = MO.getReg();
1638 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
1639 unsigned PredReg = 0;
1640 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1641 int Offset = getMemoryOpOffset(MBBI);
1642 if (CurrBase == 0) {
1643 // Start of a new chain.
1647 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1650 // Note: No need to match PredReg in the next if.
1651 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1653 // r4 := ldr [r0, #8]
1654 // r4 := ldr [r0, #4]
1657 // If a load overrides the base register or a register loaded by
1658 // another load in our chain, we cannot take this instruction.
1659 bool Overlap = false;
1660 if (isLoadSingle(Opcode)) {
1661 Overlap = (Base == Reg);
1663 for (const MemOpQueueEntry &E : MemOps) {
1664 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1673 // Check offset and sort memory operation into the current chain.
1674 if (Offset > MemOps.back().Offset) {
1675 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1678 MemOpQueue::iterator MI, ME;
1679 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1680 if (Offset < MI->Offset) {
1681 // Found a place to insert.
1684 if (Offset == MI->Offset) {
1685 // Collision, abort.
1690 if (MI != MemOps.end()) {
1691 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
1698 // Don't advance the iterator; The op will start a new chain next.
1701 // Fallthrough to look into existing chain.
1702 } else if (MBBI->isDebugValue()) {
1704 } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
1705 MBBI->getOpcode() == ARM::t2STRDi8) {
1706 // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
1707 // remember them because we may still be able to merge add/sub into them.
1708 MergeBaseCandidates.push_back(MBBI);
1712 // If we are here then the chain is broken; Extract candidates for a merge.
1713 if (MemOps.size() > 0) {
1714 FormCandidates(MemOps);
1715 // Reset for the next chain.
1718 CurrPred = ARMCC::AL;
1722 if (MemOps.size() > 0)
1723 FormCandidates(MemOps);
1725 // Sort candidates so they get processed from end to begin of the basic
1726 // block later; This is necessary for liveness calculation.
1727 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1728 return M0->InsertPos < M1->InsertPos;
1730 std::sort(Candidates.begin(), Candidates.end(), LessThan);
1732 // Go through list of candidates and merge.
1733 bool Changed = false;
1734 for (const MergeCandidate *Candidate : Candidates) {
1735 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1736 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1737 // Merge preceding/trailing base inc/dec into the merged op.
1740 unsigned Opcode = Merged->getOpcode();
1741 if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
1742 MergeBaseUpdateLSDouble(*Merged);
1744 MergeBaseUpdateLSMultiple(Merged);
1746 for (MachineInstr *MI : Candidate->Instrs) {
1747 if (MergeBaseUpdateLoadStore(MI))
1752 assert(Candidate->Instrs.size() == 1);
1753 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1758 // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
1759 for (MachineInstr *MI : MergeBaseCandidates)
1760 MergeBaseUpdateLSDouble(*MI);
1761 MergeBaseCandidates.clear();
1766 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
1767 /// into the preceding stack restore so it directly restore the value of LR
1769 /// ldmfd sp!, {..., lr}
1772 /// ldmfd sp!, {..., lr}
1775 /// ldmfd sp!, {..., pc}
1776 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1777 // Thumb1 LDM doesn't allow high registers.
1778 if (isThumb1) return false;
1779 if (MBB.empty()) return false;
1781 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1782 if (MBBI != MBB.begin() &&
1783 (MBBI->getOpcode() == ARM::BX_RET ||
1784 MBBI->getOpcode() == ARM::tBX_RET ||
1785 MBBI->getOpcode() == ARM::MOVPCLR)) {
1786 MachineInstr *PrevMI = std::prev(MBBI);
1787 unsigned Opcode = PrevMI->getOpcode();
1788 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1789 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1790 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1791 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1792 if (MO.getReg() != ARM::LR)
1794 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1795 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1796 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1797 PrevMI->setDesc(TII->get(NewOpc));
1799 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1807 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1809 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1810 TL = STI->getTargetLowering();
1811 AFI = Fn.getInfo<ARMFunctionInfo>();
1812 TII = STI->getInstrInfo();
1813 TRI = STI->getRegisterInfo();
1814 MRI = &Fn.getRegInfo();
1815 RegClassInfoValid = false;
1816 isThumb2 = AFI->isThumb2Function();
1817 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1819 bool Modified = false;
1820 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1822 MachineBasicBlock &MBB = *MFI;
1823 Modified |= LoadStoreMultipleOpti(MBB);
1824 if (STI->hasV5TOps())
1825 Modified |= MergeReturnIntoLDM(MBB);
1833 /// Pre- register allocation pass that move load / stores from consecutive
1834 /// locations close to make it more likely they will be combined later.
1835 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1837 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1839 const DataLayout *TD;
1840 const TargetInstrInfo *TII;
1841 const TargetRegisterInfo *TRI;
1842 const ARMSubtarget *STI;
1843 MachineRegisterInfo *MRI;
1844 MachineFunction *MF;
1846 bool runOnMachineFunction(MachineFunction &Fn) override;
1848 const char *getPassName() const override {
1849 return "ARM pre- register allocation load / store optimization pass";
1853 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1854 unsigned &NewOpc, unsigned &EvenReg,
1855 unsigned &OddReg, unsigned &BaseReg,
1857 unsigned &PredReg, ARMCC::CondCodes &Pred,
1859 bool RescheduleOps(MachineBasicBlock *MBB,
1860 SmallVectorImpl<MachineInstr *> &Ops,
1861 unsigned Base, bool isLd,
1862 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1863 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1865 char ARMPreAllocLoadStoreOpt::ID = 0;
1868 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1869 TD = &Fn.getDataLayout();
1870 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1871 TII = STI->getInstrInfo();
1872 TRI = STI->getRegisterInfo();
1873 MRI = &Fn.getRegInfo();
1876 bool Modified = false;
1877 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1879 Modified |= RescheduleLoadStoreInstrs(MFI);
1884 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1885 MachineBasicBlock::iterator I,
1886 MachineBasicBlock::iterator E,
1887 SmallPtrSetImpl<MachineInstr*> &MemOps,
1888 SmallSet<unsigned, 4> &MemRegs,
1889 const TargetRegisterInfo *TRI) {
1890 // Are there stores / loads / calls between them?
1891 // FIXME: This is overly conservative. We should make use of alias information
1893 SmallSet<unsigned, 4> AddedRegPressure;
1895 if (I->isDebugValue() || MemOps.count(&*I))
1897 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1899 if (isLd && I->mayStore())
1904 // It's not safe to move the first 'str' down.
1907 // str r4, [r0, #+4]
1911 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1912 MachineOperand &MO = I->getOperand(j);
1915 unsigned Reg = MO.getReg();
1916 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1918 if (Reg != Base && !MemRegs.count(Reg))
1919 AddedRegPressure.insert(Reg);
1923 // Estimate register pressure increase due to the transformation.
1924 if (MemRegs.size() <= 4)
1925 // Ok if we are moving small number of instructions.
1927 return AddedRegPressure.size() <= MemRegs.size() * 2;
1931 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
1932 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1933 MachineInstr *Op1) {
1934 assert(MI->memoperands_empty() && "expected a new machineinstr");
1935 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1936 + (Op1->memoperands_end() - Op1->memoperands_begin());
1938 MachineFunction *MF = MI->getParent()->getParent();
1939 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1940 MachineSDNode::mmo_iterator MemEnd =
1941 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1943 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1944 MI->setMemRefs(MemBegin, MemEnd);
1948 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1949 DebugLoc &dl, unsigned &NewOpc,
1951 unsigned &SecondReg,
1952 unsigned &BaseReg, int &Offset,
1954 ARMCC::CondCodes &Pred,
1956 // Make sure we're allowed to generate LDRD/STRD.
1957 if (!STI->hasV5TEOps())
1960 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1962 unsigned Opcode = Op0->getOpcode();
1963 if (Opcode == ARM::LDRi12) {
1965 } else if (Opcode == ARM::STRi12) {
1967 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1968 NewOpc = ARM::t2LDRDi8;
1971 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1972 NewOpc = ARM::t2STRDi8;
1979 // Make sure the base address satisfies i64 ld / st alignment requirement.
1980 // At the moment, we ignore the memoryoperand's value.
1981 // If we want to use AliasAnalysis, we should check it accordingly.
1982 if (!Op0->hasOneMemOperand() ||
1983 (*Op0->memoperands_begin())->isVolatile())
1986 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1987 const Function *Func = MF->getFunction();
1988 unsigned ReqAlign = STI->hasV6Ops()
1989 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1990 : 8; // Pre-v6 need 8-byte align
1991 if (Align < ReqAlign)
1994 // Then make sure the immediate offset fits.
1995 int OffImm = getMemoryOpOffset(Op0);
1997 int Limit = (1 << 8) * Scale;
1998 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2002 ARM_AM::AddrOpc AddSub = ARM_AM::add;
2004 AddSub = ARM_AM::sub;
2007 int Limit = (1 << 8) * Scale;
2008 if (OffImm >= Limit || (OffImm & (Scale-1)))
2010 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2012 FirstReg = Op0->getOperand(0).getReg();
2013 SecondReg = Op1->getOperand(0).getReg();
2014 if (FirstReg == SecondReg)
2016 BaseReg = Op0->getOperand(1).getReg();
2017 Pred = getInstrPredicate(Op0, PredReg);
2018 dl = Op0->getDebugLoc();
2022 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2023 SmallVectorImpl<MachineInstr *> &Ops,
2024 unsigned Base, bool isLd,
2025 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2026 bool RetVal = false;
2028 // Sort by offset (in reverse order).
2029 std::sort(Ops.begin(), Ops.end(),
2030 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2031 int LOffset = getMemoryOpOffset(LHS);
2032 int ROffset = getMemoryOpOffset(RHS);
2033 assert(LHS == RHS || LOffset != ROffset);
2034 return LOffset > ROffset;
2037 // The loads / stores of the same base are in order. Scan them from first to
2038 // last and check for the following:
2039 // 1. Any def of base.
2041 while (Ops.size() > 1) {
2042 unsigned FirstLoc = ~0U;
2043 unsigned LastLoc = 0;
2044 MachineInstr *FirstOp = nullptr;
2045 MachineInstr *LastOp = nullptr;
2047 unsigned LastOpcode = 0;
2048 unsigned LastBytes = 0;
2049 unsigned NumMove = 0;
2050 for (int i = Ops.size() - 1; i >= 0; --i) {
2051 MachineInstr *Op = Ops[i];
2052 unsigned Loc = MI2LocMap[Op];
2053 if (Loc <= FirstLoc) {
2057 if (Loc >= LastLoc) {
2063 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2064 if (LastOpcode && LSMOpcode != LastOpcode)
2067 int Offset = getMemoryOpOffset(Op);
2068 unsigned Bytes = getLSMultipleTransferSize(Op);
2070 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2073 LastOffset = Offset;
2075 LastOpcode = LSMOpcode;
2076 if (++NumMove == 8) // FIXME: Tune this limit.
2083 SmallPtrSet<MachineInstr*, 4> MemOps;
2084 SmallSet<unsigned, 4> MemRegs;
2085 for (int i = NumMove-1; i >= 0; --i) {
2086 MemOps.insert(Ops[i]);
2087 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2090 // Be conservative, if the instructions are too far apart, don't
2091 // move them. We want to limit the increase of register pressure.
2092 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2094 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2095 MemOps, MemRegs, TRI);
2097 for (unsigned i = 0; i != NumMove; ++i)
2100 // This is the new location for the loads / stores.
2101 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2102 while (InsertPos != MBB->end()
2103 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2106 // If we are moving a pair of loads / stores, see if it makes sense
2107 // to try to allocate a pair of registers that can form register pairs.
2108 MachineInstr *Op0 = Ops.back();
2109 MachineInstr *Op1 = Ops[Ops.size()-2];
2110 unsigned FirstReg = 0, SecondReg = 0;
2111 unsigned BaseReg = 0, PredReg = 0;
2112 ARMCC::CondCodes Pred = ARMCC::AL;
2114 unsigned NewOpc = 0;
2117 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2118 FirstReg, SecondReg, BaseReg,
2119 Offset, PredReg, Pred, isT2)) {
2123 const MCInstrDesc &MCID = TII->get(NewOpc);
2124 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2125 MRI->constrainRegClass(FirstReg, TRC);
2126 MRI->constrainRegClass(SecondReg, TRC);
2128 // Form the pair instruction.
2130 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2131 .addReg(FirstReg, RegState::Define)
2132 .addReg(SecondReg, RegState::Define)
2134 // FIXME: We're converting from LDRi12 to an insn that still
2135 // uses addrmode2, so we need an explicit offset reg. It should
2136 // always by reg0 since we're transforming LDRi12s.
2139 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2140 concatenateMemOperands(MIB, Op0, Op1);
2141 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2144 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2148 // FIXME: We're converting from LDRi12 to an insn that still
2149 // uses addrmode2, so we need an explicit offset reg. It should
2150 // always by reg0 since we're transforming STRi12s.
2153 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2154 concatenateMemOperands(MIB, Op0, Op1);
2155 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2162 // Add register allocation hints to form register pairs.
2163 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2164 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2167 for (unsigned i = 0; i != NumMove; ++i) {
2168 MachineInstr *Op = Ops.back();
2170 MBB->splice(InsertPos, MBB, Op);
2174 NumLdStMoved += NumMove;
2184 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2185 bool RetVal = false;
2187 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2188 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2189 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2190 SmallVector<unsigned, 4> LdBases;
2191 SmallVector<unsigned, 4> StBases;
2194 MachineBasicBlock::iterator MBBI = MBB->begin();
2195 MachineBasicBlock::iterator E = MBB->end();
2197 for (; MBBI != E; ++MBBI) {
2198 MachineInstr *MI = MBBI;
2199 if (MI->isCall() || MI->isTerminator()) {
2200 // Stop at barriers.
2205 if (!MI->isDebugValue())
2206 MI2LocMap[MI] = ++Loc;
2208 if (!isMemoryOp(MI))
2210 unsigned PredReg = 0;
2211 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2214 int Opc = MI->getOpcode();
2215 bool isLd = isLoadSingle(Opc);
2216 unsigned Base = MI->getOperand(1).getReg();
2217 int Offset = getMemoryOpOffset(MI);
2219 bool StopHere = false;
2221 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2222 Base2LdsMap.find(Base);
2223 if (BI != Base2LdsMap.end()) {
2224 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2225 if (Offset == getMemoryOpOffset(BI->second[i])) {
2231 BI->second.push_back(MI);
2233 Base2LdsMap[Base].push_back(MI);
2234 LdBases.push_back(Base);
2237 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2238 Base2StsMap.find(Base);
2239 if (BI != Base2StsMap.end()) {
2240 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2241 if (Offset == getMemoryOpOffset(BI->second[i])) {
2247 BI->second.push_back(MI);
2249 Base2StsMap[Base].push_back(MI);
2250 StBases.push_back(Base);
2255 // Found a duplicate (a base+offset combination that's seen earlier).
2262 // Re-schedule loads.
2263 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2264 unsigned Base = LdBases[i];
2265 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2267 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2270 // Re-schedule stores.
2271 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2272 unsigned Base = StBases[i];
2273 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2275 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2279 Base2LdsMap.clear();
2280 Base2StsMap.clear();
2290 /// Returns an instance of the load / store optimization pass.
2291 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2293 return new ARMPreAllocLoadStoreOpt();
2294 return new ARMLoadStoreOpt();