1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file This file contains a pass that performs load / store related peephole
11 /// optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterClassInfo.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/CodeGen/LivePhysRegs.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/Support/Allocator.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Target/TargetInstrInfo.h"
45 #include "llvm/Target/TargetMachine.h"
46 #include "llvm/Target/TargetRegisterInfo.h"
49 #define DEBUG_TYPE "arm-ldst-opt"
51 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
52 STATISTIC(NumSTMGened , "Number of stm instructions generated");
53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
64 /// Post- register allocation pass the combine load / store instructions to
65 /// form ldm / stm instructions.
66 struct ARMLoadStoreOpt : public MachineFunctionPass {
68 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
70 const MachineFunction *MF;
71 const TargetInstrInfo *TII;
72 const TargetRegisterInfo *TRI;
73 const MachineRegisterInfo *MRI;
74 const ARMSubtarget *STI;
75 const TargetLowering *TL;
77 LivePhysRegs LiveRegs;
78 RegisterClassInfo RegClassInfo;
79 MachineBasicBlock::const_iterator LiveRegPos;
81 bool RegClassInfoValid;
82 bool isThumb1, isThumb2;
84 bool runOnMachineFunction(MachineFunction &Fn) override;
86 const char *getPassName() const override {
87 return "ARM load / store optimization pass";
91 /// A set of load/store MachineInstrs with same base register sorted by
93 struct MemOpQueueEntry {
95 int Offset; ///< Load/Store offset.
96 unsigned Position; ///< Position as counted from end of basic block.
97 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
98 : MI(MI), Offset(Offset), Position(Position) {}
100 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
102 /// A set of MachineInstrs that fulfill (nearly all) conditions to get
103 /// merged into a LDM/STM.
104 struct MergeCandidate {
105 /// List of instructions ordered by load/store offset.
106 SmallVector<MachineInstr*, 4> Instrs;
107 /// Index in Instrs of the instruction being latest in the schedule.
108 unsigned LatestMIIdx;
109 /// Index in Instrs of the instruction being earliest in the schedule.
110 unsigned EarliestMIIdx;
111 /// Index into the basic block where the merged instruction will be
112 /// inserted. (See MemOpQueueEntry.Position)
114 /// Whether the instructions can be merged into a ldm/stm instruction.
115 bool CanMergeToLSMulti;
116 /// Whether the instructions can be merged into a ldrd/strd instruction.
117 bool CanMergeToLSDouble;
119 SpecificBumpPtrAllocator<MergeCandidate> Allocator;
120 SmallVector<const MergeCandidate*,4> Candidates;
122 void moveLiveRegsBefore(const MachineBasicBlock &MBB,
123 MachineBasicBlock::const_iterator Before);
124 unsigned findFreeReg(const TargetRegisterClass &RegClass);
125 void UpdateBaseRegUses(MachineBasicBlock &MBB,
126 MachineBasicBlock::iterator MBBI,
127 DebugLoc DL, unsigned Base, unsigned WordOffset,
128 ARMCC::CondCodes Pred, unsigned PredReg);
129 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
130 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
131 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
132 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
133 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
134 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
135 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
136 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
137 void FormCandidates(const MemOpQueue &MemOps);
138 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
139 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
140 MachineBasicBlock::iterator &MBBI);
141 bool MergeBaseUpdateLoadStore(MachineInstr *MI);
142 bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
143 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
144 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
146 char ARMLoadStoreOpt::ID = 0;
149 static bool definesCPSR(const MachineInstr *MI) {
150 for (const auto &MO : MI->operands()) {
153 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
154 // If the instruction has live CPSR def, then it's not safe to fold it
155 // into load / store.
162 static int getMemoryOpOffset(const MachineInstr *MI) {
163 unsigned Opcode = MI->getOpcode();
164 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
165 unsigned NumOperands = MI->getDesc().getNumOperands();
166 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
168 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
169 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
170 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
171 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
174 // Thumb1 immediate offsets are scaled by 4
175 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
176 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
179 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
180 : ARM_AM::getAM5Offset(OffField) * 4;
181 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
182 : ARM_AM::getAM5Op(OffField);
184 if (Op == ARM_AM::sub)
190 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
191 return MI.getOperand(1);
194 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
195 return MI.getOperand(0);
198 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
200 default: llvm_unreachable("Unhandled opcode!");
204 default: llvm_unreachable("Unhandled submode!");
205 case ARM_AM::ia: return ARM::LDMIA;
206 case ARM_AM::da: return ARM::LDMDA;
207 case ARM_AM::db: return ARM::LDMDB;
208 case ARM_AM::ib: return ARM::LDMIB;
213 default: llvm_unreachable("Unhandled submode!");
214 case ARM_AM::ia: return ARM::STMIA;
215 case ARM_AM::da: return ARM::STMDA;
216 case ARM_AM::db: return ARM::STMDB;
217 case ARM_AM::ib: return ARM::STMIB;
221 // tLDMIA is writeback-only - unless the base register is in the input
225 default: llvm_unreachable("Unhandled submode!");
226 case ARM_AM::ia: return ARM::tLDMIA;
230 // There is no non-writeback tSTMIA either.
233 default: llvm_unreachable("Unhandled submode!");
234 case ARM_AM::ia: return ARM::tSTMIA_UPD;
240 default: llvm_unreachable("Unhandled submode!");
241 case ARM_AM::ia: return ARM::t2LDMIA;
242 case ARM_AM::db: return ARM::t2LDMDB;
248 default: llvm_unreachable("Unhandled submode!");
249 case ARM_AM::ia: return ARM::t2STMIA;
250 case ARM_AM::db: return ARM::t2STMDB;
255 default: llvm_unreachable("Unhandled submode!");
256 case ARM_AM::ia: return ARM::VLDMSIA;
257 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
262 default: llvm_unreachable("Unhandled submode!");
263 case ARM_AM::ia: return ARM::VSTMSIA;
264 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
269 default: llvm_unreachable("Unhandled submode!");
270 case ARM_AM::ia: return ARM::VLDMDIA;
271 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
276 default: llvm_unreachable("Unhandled submode!");
277 case ARM_AM::ia: return ARM::VSTMDIA;
278 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
283 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
285 default: llvm_unreachable("Unhandled opcode!");
292 case ARM::tLDMIA_UPD:
293 case ARM::tSTMIA_UPD:
294 case ARM::t2LDMIA_RET:
296 case ARM::t2LDMIA_UPD:
298 case ARM::t2STMIA_UPD:
300 case ARM::VLDMSIA_UPD:
302 case ARM::VSTMSIA_UPD:
304 case ARM::VLDMDIA_UPD:
306 case ARM::VSTMDIA_UPD:
320 case ARM::t2LDMDB_UPD:
322 case ARM::t2STMDB_UPD:
323 case ARM::VLDMSDB_UPD:
324 case ARM::VSTMSDB_UPD:
325 case ARM::VLDMDDB_UPD:
326 case ARM::VSTMDDB_UPD:
337 static bool isT1i32Load(unsigned Opc) {
338 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
341 static bool isT2i32Load(unsigned Opc) {
342 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
345 static bool isi32Load(unsigned Opc) {
346 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
349 static bool isT1i32Store(unsigned Opc) {
350 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
353 static bool isT2i32Store(unsigned Opc) {
354 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
357 static bool isi32Store(unsigned Opc) {
358 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
361 static bool isLoadSingle(unsigned Opc) {
362 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
365 static unsigned getImmScale(unsigned Opc) {
367 default: llvm_unreachable("Unhandled opcode!");
382 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
383 switch (MI->getOpcode()) {
410 case ARM::tLDMIA_UPD:
411 case ARM::tSTMIA_UPD:
418 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
421 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
425 /// Update future uses of the base register with the offset introduced
426 /// due to writeback. This function only works on Thumb1.
428 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
429 MachineBasicBlock::iterator MBBI,
430 DebugLoc DL, unsigned Base,
432 ARMCC::CondCodes Pred, unsigned PredReg) {
433 assert(isThumb1 && "Can only update base register uses for Thumb1!");
434 // Start updating any instructions with immediate offsets. Insert a SUB before
435 // the first non-updateable instruction (if any).
436 for (; MBBI != MBB.end(); ++MBBI) {
437 bool InsertSub = false;
438 unsigned Opc = MBBI->getOpcode();
440 if (MBBI->readsRegister(Base)) {
443 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
445 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
447 if (IsLoad || IsStore) {
448 // Loads and stores with immediate offsets can be updated, but only if
449 // the new offset isn't negative.
450 // The MachineOperand containing the offset immediate is the last one
451 // before predicates.
453 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
454 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
455 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
457 // If storing the base register, it needs to be reset first.
458 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
460 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
465 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
466 !definesCPSR(MBBI)) {
467 // SUBS/ADDS using this register, with a dead def of the CPSR.
468 // Merge it with the update; if the merged offset is too large,
469 // insert a new sub instead.
471 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
472 Offset = (Opc == ARM::tSUBi8) ?
473 MO.getImm() + WordOffset * 4 :
474 MO.getImm() - WordOffset * 4 ;
475 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
476 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
479 // The base register has now been reset, so exit early.
486 // Can't update the instruction.
490 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
491 // Since SUBS sets the condition flags, we can't place the base reset
492 // after an instruction that has a live CPSR def.
493 // The base register might also contain an argument for a function call.
498 // An instruction above couldn't be updated, so insert a sub.
499 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
500 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
504 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
505 // Register got killed. Stop updating.
509 // End of block was reached.
510 if (MBB.succ_size() > 0) {
511 // FIXME: Because of a bug, live registers are sometimes missing from
512 // the successor blocks' live-in sets. This means we can't trust that
513 // information and *always* have to reset at the end of a block.
515 if (MBBI != MBB.end()) --MBBI;
517 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
518 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
522 /// Return the first register of class \p RegClass that is not in \p Regs.
523 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
524 if (!RegClassInfoValid) {
525 RegClassInfo.runOnMachineFunction(*MF);
526 RegClassInfoValid = true;
529 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
530 if (!LiveRegs.contains(Reg))
535 /// Compute live registers just before instruction \p Before (in normal schedule
536 /// direction). Computes backwards so multiple queries in the same block must
537 /// come in reverse order.
538 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
539 MachineBasicBlock::const_iterator Before) {
540 // Initialize if we never queried in this block.
541 if (!LiveRegsValid) {
543 LiveRegs.addLiveOuts(&MBB, true);
544 LiveRegPos = MBB.end();
545 LiveRegsValid = true;
547 // Move backward just before the "Before" position.
548 while (LiveRegPos != Before) {
550 LiveRegs.stepBackward(*LiveRegPos);
554 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
556 for (const std::pair<unsigned, bool> &R : Regs)
562 /// Create and insert a LDM or STM with Base as base register and registers in
563 /// Regs as the register operands that would be loaded / stored. It returns
564 /// true if the transformation is done.
565 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
566 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
567 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
568 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
569 unsigned NumRegs = Regs.size();
572 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
573 // Compute liveness information for that register to make the decision.
574 bool SafeToClobberCPSR = !isThumb1 ||
575 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
576 MachineBasicBlock::LQR_Dead);
578 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
580 // Exception: If the base register is in the input reglist, Thumb1 LDM is
582 // It's also not possible to merge an STR of the base register in Thumb1.
583 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
584 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
585 if (Opcode == ARM::tLDRi) {
587 } else if (Opcode == ARM::tSTRi) {
592 ARM_AM::AMSubMode Mode = ARM_AM::ia;
593 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
594 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
595 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
597 if (Offset == 4 && haveIBAndDA) {
599 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
601 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
602 // VLDM/VSTM do not support DB mode without also updating the base reg.
604 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
605 // Check if this is a supported opcode before inserting instructions to
606 // calculate a new base register.
607 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
609 // If starting offset isn't zero, insert a MI to materialize a new base.
610 // But only do so if it is cost effective, i.e. merging more than two
615 // On Thumb1, it's not worth materializing a new base register without
616 // clobbering the CPSR (i.e. not using ADDS/SUBS).
617 if (!SafeToClobberCPSR)
621 if (isi32Load(Opcode)) {
622 // If it is a load, then just use one of the destination register to
623 // use as the new base.
624 NewBase = Regs[NumRegs-1].first;
626 // Find a free register that we can use as scratch register.
627 moveLiveRegsBefore(MBB, InsertBefore);
628 // The merged instruction does not exist yet but will use several Regs if
630 if (!isLoadSingle(Opcode))
631 for (const std::pair<unsigned, bool> &R : Regs)
632 LiveRegs.addReg(R.first);
634 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
640 isThumb2 ? ARM::t2ADDri :
641 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
642 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
643 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
648 isThumb2 ? ARM::t2SUBri :
649 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
650 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
653 if (!TL->isLegalAddImmediate(Offset))
654 // FIXME: Try add with register operand?
655 return nullptr; // Probably not worth it then.
657 // We can only append a kill flag to the add/sub input if the value is not
658 // used in the register list of the stm as well.
659 bool KillOldBase = BaseKill &&
660 (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
663 // Thumb1: depending on immediate size, use either
664 // ADDS NewBase, Base, #imm3
667 // ADDS NewBase, #imm8.
668 if (Base != NewBase &&
669 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
670 // Need to insert a MOV to the new base first.
671 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
673 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
674 if (Pred != ARMCC::AL)
676 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
677 .addReg(Base, getKillRegState(KillOldBase));
679 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
680 .addReg(Base, getKillRegState(KillOldBase))
681 .addImm(Pred).addReg(PredReg);
683 // The following ADDS/SUBS becomes an update.
687 if (BaseOpc == ARM::tADDrSPi) {
688 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
689 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
690 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
691 .addImm(Pred).addReg(PredReg);
694 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
695 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
696 .addImm(Pred).addReg(PredReg);
698 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
699 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
700 .addImm(Pred).addReg(PredReg).addReg(0);
703 BaseKill = true; // New base is always killed straight away.
706 bool isDef = isLoadSingle(Opcode);
708 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
709 // base register writeback.
710 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
714 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
715 // - There is no writeback (LDM of base register),
716 // - the base register is killed by the merged instruction,
717 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
718 // to reset the base register.
719 // Otherwise, don't merge.
720 // It's safe to return here since the code to materialize a new base register
721 // above is also conditional on SafeToClobberCPSR.
722 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
725 MachineInstrBuilder MIB;
728 if (Opcode == ARM::tLDMIA)
729 // Update tLDMIA with writeback if necessary.
730 Opcode = ARM::tLDMIA_UPD;
732 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
734 // Thumb1: we might need to set base writeback when building the MI.
735 MIB.addReg(Base, getDefRegState(true))
736 .addReg(Base, getKillRegState(BaseKill));
738 // The base isn't dead after a merged instruction with writeback.
739 // Insert a sub instruction after the newly formed instruction to reset.
741 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
744 // No writeback, simply build the MachineInstr.
745 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
746 MIB.addReg(Base, getKillRegState(BaseKill));
749 MIB.addImm(Pred).addReg(PredReg);
751 for (const std::pair<unsigned, bool> &R : Regs)
752 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
754 return MIB.getInstr();
757 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
758 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
759 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
760 DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
761 bool IsLoad = isi32Load(Opcode);
762 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
763 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
765 assert(Regs.size() == 2);
766 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
767 TII->get(LoadStoreOpcode));
769 MIB.addReg(Regs[0].first, RegState::Define)
770 .addReg(Regs[1].first, RegState::Define);
772 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
773 .addReg(Regs[1].first, getKillRegState(Regs[1].second));
775 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
776 return MIB.getInstr();
779 /// Call MergeOps and update MemOps and merges accordingly on success.
780 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
781 const MachineInstr *First = Cand.Instrs.front();
782 unsigned Opcode = First->getOpcode();
783 bool IsLoad = isLoadSingle(Opcode);
784 SmallVector<std::pair<unsigned, bool>, 8> Regs;
785 SmallVector<unsigned, 4> ImpDefs;
786 DenseSet<unsigned> KilledRegs;
787 DenseSet<unsigned> UsedRegs;
788 // Determine list of registers and list of implicit super-register defs.
789 for (const MachineInstr *MI : Cand.Instrs) {
790 const MachineOperand &MO = getLoadStoreRegOp(*MI);
791 unsigned Reg = MO.getReg();
792 bool IsKill = MO.isKill();
794 KilledRegs.insert(Reg);
795 Regs.push_back(std::make_pair(Reg, IsKill));
796 UsedRegs.insert(Reg);
799 // Collect any implicit defs of super-registers, after merging we can't
800 // be sure anymore that we properly preserved these live ranges and must
801 // removed these implicit operands.
802 for (const MachineOperand &MO : MI->implicit_operands()) {
803 if (!MO.isReg() || !MO.isDef() || MO.isDead())
805 assert(MO.isImplicit());
806 unsigned DefReg = MO.getReg();
808 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
810 // We can ignore cases where the super-reg is read and written.
811 if (MI->readsRegister(DefReg))
813 ImpDefs.push_back(DefReg);
818 // Attempt the merge.
819 typedef MachineBasicBlock::iterator iterator;
820 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
821 iterator InsertBefore = std::next(iterator(LatestMI));
822 MachineBasicBlock &MBB = *LatestMI->getParent();
823 unsigned Offset = getMemoryOpOffset(First);
824 unsigned Base = getLoadStoreBaseOp(*First).getReg();
825 bool BaseKill = LatestMI->killsRegister(Base);
826 unsigned PredReg = 0;
827 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
828 DebugLoc DL = First->getDebugLoc();
829 MachineInstr *Merged = nullptr;
830 if (Cand.CanMergeToLSDouble)
831 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
832 Opcode, Pred, PredReg, DL, Regs);
833 if (!Merged && Cand.CanMergeToLSMulti)
834 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
835 Opcode, Pred, PredReg, DL, Regs);
839 // Determine earliest instruction that will get removed. We then keep an
840 // iterator just above it so the following erases don't invalidated it.
841 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
842 bool EarliestAtBegin = false;
843 if (EarliestI == MBB.begin()) {
844 EarliestAtBegin = true;
846 EarliestI = std::prev(EarliestI);
849 // Remove instructions which have been merged.
850 for (MachineInstr *MI : Cand.Instrs)
853 // Determine range between the earliest removed instruction and the new one.
855 EarliestI = MBB.begin();
857 EarliestI = std::next(EarliestI);
858 auto FixupRange = make_range(EarliestI, iterator(Merged));
860 if (isLoadSingle(Opcode)) {
861 // If the previous loads defined a super-reg, then we have to mark earlier
862 // operands undef; Replicate the super-reg def on the merged instruction.
863 for (MachineInstr &MI : FixupRange) {
864 for (unsigned &ImpDefReg : ImpDefs) {
865 for (MachineOperand &MO : MI.implicit_operands()) {
866 if (!MO.isReg() || MO.getReg() != ImpDefReg)
876 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
877 for (unsigned ImpDef : ImpDefs)
878 MIB.addReg(ImpDef, RegState::ImplicitDefine);
880 // Remove kill flags: We are possibly storing the values later now.
881 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
882 for (MachineInstr &MI : FixupRange) {
883 for (MachineOperand &MO : MI.uses()) {
884 if (!MO.isReg() || !MO.isKill())
886 if (UsedRegs.count(MO.getReg()))
890 assert(ImpDefs.empty());
896 static bool isValidLSDoubleOffset(int Offset) {
897 unsigned Value = abs(Offset);
898 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
900 return (Value % 4) == 0 && Value < 1024;
903 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
904 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
905 const MachineInstr *FirstMI = MemOps[0].MI;
906 unsigned Opcode = FirstMI->getOpcode();
907 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
908 unsigned Size = getLSMultipleTransferSize(FirstMI);
911 unsigned EIndex = MemOps.size();
913 // Look at the first instruction.
914 const MachineInstr *MI = MemOps[SIndex].MI;
915 int Offset = MemOps[SIndex].Offset;
916 const MachineOperand &PMO = getLoadStoreRegOp(*MI);
917 unsigned PReg = PMO.getReg();
918 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
919 unsigned Latest = SIndex;
920 unsigned Earliest = SIndex;
922 bool CanMergeToLSDouble =
923 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
924 // ARM errata 602117: LDRD with base in list may result in incorrect base
925 // register when interrupted or faulted.
926 if (STI->isCortexM3() && isi32Load(Opcode) &&
927 PReg == getLoadStoreBaseOp(*MI).getReg())
928 CanMergeToLSDouble = false;
930 bool CanMergeToLSMulti = true;
931 // On swift vldm/vstm starting with an odd register number as that needs
932 // more uops than single vldrs.
933 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
934 CanMergeToLSMulti = false;
936 // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
937 // deprecated; LDM to PC is fine but cannot happen here.
938 if (PReg == ARM::SP || PReg == ARM::PC)
939 CanMergeToLSMulti = CanMergeToLSDouble = false;
941 // Merge following instructions where possible.
942 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
943 int NewOffset = MemOps[I].Offset;
944 if (NewOffset != Offset + (int)Size)
946 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
947 unsigned Reg = MO.getReg();
948 if (Reg == ARM::SP || Reg == ARM::PC)
951 // See if the current load/store may be part of a multi load/store.
952 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
953 bool PartOfLSMulti = CanMergeToLSMulti;
955 // Register numbers must be in ascending order.
956 if (RegNum <= PRegNum)
957 PartOfLSMulti = false;
958 // For VFP / NEON load/store multiples, the registers must be
959 // consecutive and within the limit on the number of registers per
961 else if (!isNotVFP && RegNum != PRegNum+1)
962 PartOfLSMulti = false;
964 // See if the current load/store may be part of a double load/store.
965 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
967 if (!PartOfLSMulti && !PartOfLSDouble)
969 CanMergeToLSMulti &= PartOfLSMulti;
970 CanMergeToLSDouble &= PartOfLSDouble;
971 // Track MemOp with latest and earliest position (Positions are
972 // counted in reverse).
973 unsigned Position = MemOps[I].Position;
974 if (Position < MemOps[Latest].Position)
976 else if (Position > MemOps[Earliest].Position)
978 // Prepare for next MemOp.
983 // Form a candidate from the Ops collected so far.
984 MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
985 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
986 Candidate->Instrs.push_back(MemOps[C].MI);
987 Candidate->LatestMIIdx = Latest - SIndex;
988 Candidate->EarliestMIIdx = Earliest - SIndex;
989 Candidate->InsertPos = MemOps[Latest].Position;
991 CanMergeToLSMulti = CanMergeToLSDouble = false;
992 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
993 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
994 Candidates.push_back(Candidate);
995 // Continue after the chain.
997 } while (SIndex < EIndex);
1000 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
1001 unsigned Bytes, unsigned Limit,
1002 ARMCC::CondCodes Pred, unsigned PredReg) {
1003 unsigned MyPredReg = 0;
1007 bool CheckCPSRDef = false;
1008 switch (MI->getOpcode()) {
1009 default: return false;
1013 CheckCPSRDef = true;
1019 // Make sure the offset fits in 8 bits.
1020 if (Bytes == 0 || (Limit && Bytes >= Limit))
1023 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
1024 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
1025 if (!(MI->getOperand(0).getReg() == Base &&
1026 MI->getOperand(1).getReg() == Base &&
1027 (MI->getOperand(2).getImm() * Scale) == Bytes &&
1028 getInstrPredicate(MI, MyPredReg) == Pred &&
1029 MyPredReg == PredReg))
1032 return CheckCPSRDef ? !definesCPSR(MI) : true;
1035 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
1036 unsigned Bytes, unsigned Limit,
1037 ARMCC::CondCodes Pred, unsigned PredReg) {
1038 unsigned MyPredReg = 0;
1042 bool CheckCPSRDef = false;
1043 switch (MI->getOpcode()) {
1044 default: return false;
1048 CheckCPSRDef = true;
1054 if (Bytes == 0 || (Limit && Bytes >= Limit))
1055 // Make sure the offset fits in 8 bits.
1058 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
1059 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
1060 if (!(MI->getOperand(0).getReg() == Base &&
1061 MI->getOperand(1).getReg() == Base &&
1062 (MI->getOperand(2).getImm() * Scale) == Bytes &&
1063 getInstrPredicate(MI, MyPredReg) == Pred &&
1064 MyPredReg == PredReg))
1067 return CheckCPSRDef ? !definesCPSR(MI) : true;
1070 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1071 ARM_AM::AMSubMode Mode) {
1073 default: llvm_unreachable("Unhandled opcode!");
1079 default: llvm_unreachable("Unhandled submode!");
1080 case ARM_AM::ia: return ARM::LDMIA_UPD;
1081 case ARM_AM::ib: return ARM::LDMIB_UPD;
1082 case ARM_AM::da: return ARM::LDMDA_UPD;
1083 case ARM_AM::db: return ARM::LDMDB_UPD;
1090 default: llvm_unreachable("Unhandled submode!");
1091 case ARM_AM::ia: return ARM::STMIA_UPD;
1092 case ARM_AM::ib: return ARM::STMIB_UPD;
1093 case ARM_AM::da: return ARM::STMDA_UPD;
1094 case ARM_AM::db: return ARM::STMDB_UPD;
1099 default: llvm_unreachable("Unhandled submode!");
1100 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1101 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1106 default: llvm_unreachable("Unhandled submode!");
1107 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1108 case ARM_AM::db: return ARM::t2STMDB_UPD;
1112 default: llvm_unreachable("Unhandled submode!");
1113 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1114 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1118 default: llvm_unreachable("Unhandled submode!");
1119 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1120 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1124 default: llvm_unreachable("Unhandled submode!");
1125 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1126 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1130 default: llvm_unreachable("Unhandled submode!");
1131 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1132 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1137 /// Fold proceeding/trailing inc/dec of base register into the
1138 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1140 /// stmia rn, <ra, rb, rc>
1141 /// rn := rn + 4 * 3;
1143 /// stmia rn!, <ra, rb, rc>
1145 /// rn := rn - 4 * 3;
1146 /// ldmia rn, <ra, rb, rc>
1148 /// ldmdb rn!, <ra, rb, rc>
1149 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1150 // Thumb1 is already using updating loads/stores.
1151 if (isThumb1) return false;
1153 const MachineOperand &BaseOP = MI->getOperand(0);
1154 unsigned Base = BaseOP.getReg();
1155 bool BaseKill = BaseOP.isKill();
1156 unsigned Bytes = getLSMultipleTransferSize(MI);
1157 unsigned PredReg = 0;
1158 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1159 unsigned Opcode = MI->getOpcode();
1160 DebugLoc DL = MI->getDebugLoc();
1162 // Can't use an updating ld/st if the base register is also a dest
1163 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1164 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1165 if (MI->getOperand(i).getReg() == Base)
1168 bool DoMerge = false;
1169 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
1171 // Try merging with the previous instruction.
1172 MachineBasicBlock &MBB = *MI->getParent();
1173 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1174 MachineBasicBlock::iterator MBBI(MI);
1175 if (MBBI != BeginMBBI) {
1176 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1177 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1179 if (Mode == ARM_AM::ia &&
1180 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1183 } else if (Mode == ARM_AM::ib &&
1184 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1189 MBB.erase(PrevMBBI);
1192 // Try merging with the next instruction.
1193 MachineBasicBlock::iterator EndMBBI = MBB.end();
1194 if (!DoMerge && MBBI != EndMBBI) {
1195 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1196 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1198 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
1199 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1201 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
1202 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1206 MBB.erase(NextMBBI);
1212 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1213 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1214 .addReg(Base, getDefRegState(true)) // WB base register
1215 .addReg(Base, getKillRegState(BaseKill))
1216 .addImm(Pred).addReg(PredReg);
1218 // Transfer the rest of operands.
1219 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1220 MIB.addOperand(MI->getOperand(OpNum));
1222 // Transfer memoperands.
1223 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1229 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1230 ARM_AM::AddrOpc Mode) {
1233 return ARM::LDR_PRE_IMM;
1235 return ARM::STR_PRE_IMM;
1237 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1239 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1241 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1243 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1246 return ARM::t2LDR_PRE;
1249 return ARM::t2STR_PRE;
1250 default: llvm_unreachable("Unhandled opcode!");
1254 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1255 ARM_AM::AddrOpc Mode) {
1258 return ARM::LDR_POST_IMM;
1260 return ARM::STR_POST_IMM;
1262 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1264 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1266 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1268 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1271 return ARM::t2LDR_POST;
1274 return ARM::t2STR_POST;
1275 default: llvm_unreachable("Unhandled opcode!");
1279 /// Fold proceeding/trailing inc/dec of base register into the
1280 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1281 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1282 // Thumb1 doesn't have updating LDR/STR.
1283 // FIXME: Use LDM/STM with single register instead.
1284 if (isThumb1) return false;
1286 unsigned Base = getLoadStoreBaseOp(*MI).getReg();
1287 bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1288 unsigned Bytes = getLSMultipleTransferSize(MI);
1289 unsigned Opcode = MI->getOpcode();
1290 DebugLoc DL = MI->getDebugLoc();
1291 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1292 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1293 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1294 if (isi32Load(Opcode) || isi32Store(Opcode))
1295 if (MI->getOperand(2).getImm() != 0)
1297 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1300 bool isLd = isLoadSingle(Opcode);
1301 // Can't do the merge if the destination register is the same as the would-be
1302 // writeback register.
1303 if (MI->getOperand(0).getReg() == Base)
1306 unsigned PredReg = 0;
1307 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1308 bool DoMerge = false;
1309 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1310 unsigned NewOpc = 0;
1311 // AM2 - 12 bits, thumb2 - 8 bits.
1312 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1314 // Try merging with the previous instruction.
1315 MachineBasicBlock &MBB = *MI->getParent();
1316 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1317 MachineBasicBlock::iterator MBBI(MI);
1318 if (MBBI != BeginMBBI) {
1319 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1320 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1322 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1324 AddSub = ARM_AM::sub;
1325 } else if (!isAM5 &&
1326 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1330 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1331 MBB.erase(PrevMBBI);
1335 // Try merging with the next instruction.
1336 MachineBasicBlock::iterator EndMBBI = MBB.end();
1337 if (!DoMerge && MBBI != EndMBBI) {
1338 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1339 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1342 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1344 AddSub = ARM_AM::sub;
1345 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1349 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1350 MBB.erase(NextMBBI);
1358 // VLDM[SD]_UPD, VSTM[SD]_UPD
1359 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1360 // updating load/store-multiple instructions can be used with only one
1362 MachineOperand &MO = MI->getOperand(0);
1363 BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1364 .addReg(Base, getDefRegState(true)) // WB base register
1365 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1366 .addImm(Pred).addReg(PredReg)
1367 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1368 getKillRegState(MO.isKill())));
1371 // LDR_PRE, LDR_POST
1372 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1373 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1374 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1375 .addReg(Base, RegState::Define)
1376 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1378 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1379 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1380 .addReg(Base, RegState::Define)
1381 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1384 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1385 // t2LDR_PRE, t2LDR_POST
1386 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1387 .addReg(Base, RegState::Define)
1388 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1391 MachineOperand &MO = MI->getOperand(0);
1392 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1393 // the vestigal zero-reg offset register. When that's fixed, this clause
1394 // can be removed entirely.
1395 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1396 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1397 // STR_PRE, STR_POST
1398 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1399 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1400 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1402 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1403 // t2STR_PRE, t2STR_POST
1404 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1405 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1406 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1414 /// Returns true if instruction is a memory operation that this pass is capable
1415 /// of operating on.
1416 static bool isMemoryOp(const MachineInstr *MI) {
1417 // When no memory operands are present, conservatively assume unaligned,
1418 // volatile, unfoldable.
1419 if (!MI->hasOneMemOperand())
1422 const MachineMemOperand *MMO = *MI->memoperands_begin();
1424 // Don't touch volatile memory accesses - we may be changing their order.
1425 if (MMO->isVolatile())
1428 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1430 if (MMO->getAlignment() < 4)
1433 // str <undef> could probably be eliminated entirely, but for now we just want
1434 // to avoid making a mess of it.
1435 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1436 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1437 MI->getOperand(0).isUndef())
1440 // Likewise don't mess with references to undefined addresses.
1441 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1442 MI->getOperand(1).isUndef())
1445 unsigned Opcode = MI->getOpcode();
1450 return MI->getOperand(1).isReg();
1453 return MI->getOperand(1).isReg();
1464 return MI->getOperand(1).isReg();
1469 static void InsertLDR_STR(MachineBasicBlock &MBB,
1470 MachineBasicBlock::iterator &MBBI,
1471 int Offset, bool isDef,
1472 DebugLoc DL, unsigned NewOpc,
1473 unsigned Reg, bool RegDeadKill, bool RegUndef,
1474 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1475 bool OffKill, bool OffUndef,
1476 ARMCC::CondCodes Pred, unsigned PredReg,
1477 const TargetInstrInfo *TII, bool isT2) {
1479 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1481 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1482 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1483 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1485 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1487 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1488 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1489 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1493 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1494 MachineBasicBlock::iterator &MBBI) {
1495 MachineInstr *MI = &*MBBI;
1496 unsigned Opcode = MI->getOpcode();
1497 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1500 const MachineOperand &BaseOp = MI->getOperand(2);
1501 unsigned BaseReg = BaseOp.getReg();
1502 unsigned EvenReg = MI->getOperand(0).getReg();
1503 unsigned OddReg = MI->getOperand(1).getReg();
1504 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1505 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1507 // ARM errata 602117: LDRD with base in list may result in incorrect base
1508 // register when interrupted or faulted.
1509 bool Errata602117 = EvenReg == BaseReg &&
1510 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1511 // ARM LDRD/STRD needs consecutive registers.
1512 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1513 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1515 if (!Errata602117 && !NonConsecutiveRegs)
1518 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1519 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1520 bool EvenDeadKill = isLd ?
1521 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1522 bool EvenUndef = MI->getOperand(0).isUndef();
1523 bool OddDeadKill = isLd ?
1524 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1525 bool OddUndef = MI->getOperand(1).isUndef();
1526 bool BaseKill = BaseOp.isKill();
1527 bool BaseUndef = BaseOp.isUndef();
1528 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1529 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1530 int OffImm = getMemoryOpOffset(MI);
1531 unsigned PredReg = 0;
1532 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1534 if (OddRegNum > EvenRegNum && OffImm == 0) {
1535 // Ascending register numbers and no offset. It's safe to change it to a
1537 unsigned NewOpc = (isLd)
1538 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1539 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1541 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1542 .addReg(BaseReg, getKillRegState(BaseKill))
1543 .addImm(Pred).addReg(PredReg)
1544 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1545 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1548 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1549 .addReg(BaseReg, getKillRegState(BaseKill))
1550 .addImm(Pred).addReg(PredReg)
1552 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1554 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1558 // Split into two instructions.
1559 unsigned NewOpc = (isLd)
1560 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1561 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1562 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1563 // so adjust and use t2LDRi12 here for that.
1564 unsigned NewOpc2 = (isLd)
1565 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1566 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1567 DebugLoc dl = MBBI->getDebugLoc();
1568 // If this is a load and base register is killed, it may have been
1569 // re-defed by the load, make sure the first load does not clobber it.
1571 (BaseKill || OffKill) &&
1572 (TRI->regsOverlap(EvenReg, BaseReg))) {
1573 assert(!TRI->regsOverlap(OddReg, BaseReg));
1574 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1575 OddReg, OddDeadKill, false,
1576 BaseReg, false, BaseUndef, false, OffUndef,
1577 Pred, PredReg, TII, isT2);
1578 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1579 EvenReg, EvenDeadKill, false,
1580 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1581 Pred, PredReg, TII, isT2);
1583 if (OddReg == EvenReg && EvenDeadKill) {
1584 // If the two source operands are the same, the kill marker is
1585 // probably on the first one. e.g.
1586 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1587 EvenDeadKill = false;
1590 // Never kill the base register in the first instruction.
1591 if (EvenReg == BaseReg)
1592 EvenDeadKill = false;
1593 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1594 EvenReg, EvenDeadKill, EvenUndef,
1595 BaseReg, false, BaseUndef, false, OffUndef,
1596 Pred, PredReg, TII, isT2);
1597 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1598 OddReg, OddDeadKill, OddUndef,
1599 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1600 Pred, PredReg, TII, isT2);
1608 MBBI = MBB.erase(MBBI);
1612 /// An optimization pass to turn multiple LDR / STR ops of the same base and
1613 /// incrementing offset into LDM / STM ops.
1614 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1616 unsigned CurrBase = 0;
1617 unsigned CurrOpc = ~0u;
1618 ARMCC::CondCodes CurrPred = ARMCC::AL;
1619 unsigned Position = 0;
1620 assert(Candidates.size() == 0);
1621 LiveRegsValid = false;
1623 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
1625 // The instruction in front of the iterator is the one we look at.
1626 MBBI = std::prev(I);
1627 if (FixInvalidRegPairOp(MBB, MBBI))
1631 if (isMemoryOp(MBBI)) {
1632 unsigned Opcode = MBBI->getOpcode();
1633 const MachineOperand &MO = MBBI->getOperand(0);
1634 unsigned Reg = MO.getReg();
1635 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
1636 unsigned PredReg = 0;
1637 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1638 int Offset = getMemoryOpOffset(MBBI);
1639 if (CurrBase == 0) {
1640 // Start of a new chain.
1644 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1647 // Note: No need to match PredReg in the next if.
1648 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1650 // r4 := ldr [r0, #8]
1651 // r4 := ldr [r0, #4]
1654 // If a load overrides the base register or a register loaded by
1655 // another load in our chain, we cannot take this instruction.
1656 bool Overlap = false;
1657 if (isLoadSingle(Opcode)) {
1658 Overlap = (Base == Reg);
1660 for (const MemOpQueueEntry &E : MemOps) {
1661 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1670 // Check offset and sort memory operation into the current chain.
1671 if (Offset > MemOps.back().Offset) {
1672 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1675 MemOpQueue::iterator MI, ME;
1676 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1677 if (Offset < MI->Offset) {
1678 // Found a place to insert.
1681 if (Offset == MI->Offset) {
1682 // Collision, abort.
1687 if (MI != MemOps.end()) {
1688 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
1695 // Don't advance the iterator; The op will start a new chain next.
1698 // Fallthrough to look into existing chain.
1699 } else if (MBBI->isDebugValue())
1702 // If we are here then the chain is broken; Extract candidates for a merge.
1703 if (MemOps.size() > 0) {
1704 FormCandidates(MemOps);
1705 // Reset for the next chain.
1708 CurrPred = ARMCC::AL;
1712 if (MemOps.size() > 0)
1713 FormCandidates(MemOps);
1715 // Sort candidates so they get processed from end to begin of the basic
1716 // block later; This is necessary for liveness calculation.
1717 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1718 return M0->InsertPos < M1->InsertPos;
1720 std::sort(Candidates.begin(), Candidates.end(), LessThan);
1722 // Go through list of candidates and merge.
1723 bool Changed = false;
1724 for (const MergeCandidate *Candidate : Candidates) {
1725 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1726 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1727 // Merge preceding/trailing base inc/dec into the merged op.
1730 unsigned Opcode = Merged->getOpcode();
1731 if (Opcode != ARM::t2STRDi8 && Opcode != ARM::t2LDRDi8)
1732 MergeBaseUpdateLSMultiple(Merged);
1734 for (MachineInstr *MI : Candidate->Instrs) {
1735 if (MergeBaseUpdateLoadStore(MI))
1740 assert(Candidate->Instrs.size() == 1);
1741 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1750 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
1751 /// into the preceding stack restore so it directly restore the value of LR
1753 /// ldmfd sp!, {..., lr}
1756 /// ldmfd sp!, {..., lr}
1759 /// ldmfd sp!, {..., pc}
1760 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1761 // Thumb1 LDM doesn't allow high registers.
1762 if (isThumb1) return false;
1763 if (MBB.empty()) return false;
1765 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1766 if (MBBI != MBB.begin() &&
1767 (MBBI->getOpcode() == ARM::BX_RET ||
1768 MBBI->getOpcode() == ARM::tBX_RET ||
1769 MBBI->getOpcode() == ARM::MOVPCLR)) {
1770 MachineInstr *PrevMI = std::prev(MBBI);
1771 unsigned Opcode = PrevMI->getOpcode();
1772 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1773 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1774 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1775 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1776 if (MO.getReg() != ARM::LR)
1778 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1779 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1780 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1781 PrevMI->setDesc(TII->get(NewOpc));
1783 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1791 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1793 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1794 TL = STI->getTargetLowering();
1795 AFI = Fn.getInfo<ARMFunctionInfo>();
1796 TII = STI->getInstrInfo();
1797 TRI = STI->getRegisterInfo();
1798 MRI = &Fn.getRegInfo();
1799 RegClassInfoValid = false;
1800 isThumb2 = AFI->isThumb2Function();
1801 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1803 bool Modified = false;
1804 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1806 MachineBasicBlock &MBB = *MFI;
1807 Modified |= LoadStoreMultipleOpti(MBB);
1808 if (STI->hasV5TOps())
1809 Modified |= MergeReturnIntoLDM(MBB);
1812 Allocator.DestroyAll();
1817 /// Pre- register allocation pass that move load / stores from consecutive
1818 /// locations close to make it more likely they will be combined later.
1819 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1821 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1823 const DataLayout *TD;
1824 const TargetInstrInfo *TII;
1825 const TargetRegisterInfo *TRI;
1826 const ARMSubtarget *STI;
1827 MachineRegisterInfo *MRI;
1828 MachineFunction *MF;
1830 bool runOnMachineFunction(MachineFunction &Fn) override;
1832 const char *getPassName() const override {
1833 return "ARM pre- register allocation load / store optimization pass";
1837 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1838 unsigned &NewOpc, unsigned &EvenReg,
1839 unsigned &OddReg, unsigned &BaseReg,
1841 unsigned &PredReg, ARMCC::CondCodes &Pred,
1843 bool RescheduleOps(MachineBasicBlock *MBB,
1844 SmallVectorImpl<MachineInstr *> &Ops,
1845 unsigned Base, bool isLd,
1846 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1847 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1849 char ARMPreAllocLoadStoreOpt::ID = 0;
1852 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1853 TD = &Fn.getDataLayout();
1854 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1855 TII = STI->getInstrInfo();
1856 TRI = STI->getRegisterInfo();
1857 MRI = &Fn.getRegInfo();
1860 bool Modified = false;
1861 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1863 Modified |= RescheduleLoadStoreInstrs(MFI);
1868 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1869 MachineBasicBlock::iterator I,
1870 MachineBasicBlock::iterator E,
1871 SmallPtrSetImpl<MachineInstr*> &MemOps,
1872 SmallSet<unsigned, 4> &MemRegs,
1873 const TargetRegisterInfo *TRI) {
1874 // Are there stores / loads / calls between them?
1875 // FIXME: This is overly conservative. We should make use of alias information
1877 SmallSet<unsigned, 4> AddedRegPressure;
1879 if (I->isDebugValue() || MemOps.count(&*I))
1881 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1883 if (isLd && I->mayStore())
1888 // It's not safe to move the first 'str' down.
1891 // str r4, [r0, #+4]
1895 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1896 MachineOperand &MO = I->getOperand(j);
1899 unsigned Reg = MO.getReg();
1900 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1902 if (Reg != Base && !MemRegs.count(Reg))
1903 AddedRegPressure.insert(Reg);
1907 // Estimate register pressure increase due to the transformation.
1908 if (MemRegs.size() <= 4)
1909 // Ok if we are moving small number of instructions.
1911 return AddedRegPressure.size() <= MemRegs.size() * 2;
1915 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
1916 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1917 MachineInstr *Op1) {
1918 assert(MI->memoperands_empty() && "expected a new machineinstr");
1919 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1920 + (Op1->memoperands_end() - Op1->memoperands_begin());
1922 MachineFunction *MF = MI->getParent()->getParent();
1923 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1924 MachineSDNode::mmo_iterator MemEnd =
1925 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1927 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1928 MI->setMemRefs(MemBegin, MemEnd);
1932 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1933 DebugLoc &dl, unsigned &NewOpc,
1935 unsigned &SecondReg,
1936 unsigned &BaseReg, int &Offset,
1938 ARMCC::CondCodes &Pred,
1940 // Make sure we're allowed to generate LDRD/STRD.
1941 if (!STI->hasV5TEOps())
1944 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1946 unsigned Opcode = Op0->getOpcode();
1947 if (Opcode == ARM::LDRi12) {
1949 } else if (Opcode == ARM::STRi12) {
1951 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1952 NewOpc = ARM::t2LDRDi8;
1955 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1956 NewOpc = ARM::t2STRDi8;
1963 // Make sure the base address satisfies i64 ld / st alignment requirement.
1964 // At the moment, we ignore the memoryoperand's value.
1965 // If we want to use AliasAnalysis, we should check it accordingly.
1966 if (!Op0->hasOneMemOperand() ||
1967 (*Op0->memoperands_begin())->isVolatile())
1970 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1971 const Function *Func = MF->getFunction();
1972 unsigned ReqAlign = STI->hasV6Ops()
1973 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1974 : 8; // Pre-v6 need 8-byte align
1975 if (Align < ReqAlign)
1978 // Then make sure the immediate offset fits.
1979 int OffImm = getMemoryOpOffset(Op0);
1981 int Limit = (1 << 8) * Scale;
1982 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1986 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1988 AddSub = ARM_AM::sub;
1991 int Limit = (1 << 8) * Scale;
1992 if (OffImm >= Limit || (OffImm & (Scale-1)))
1994 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1996 FirstReg = Op0->getOperand(0).getReg();
1997 SecondReg = Op1->getOperand(0).getReg();
1998 if (FirstReg == SecondReg)
2000 BaseReg = Op0->getOperand(1).getReg();
2001 Pred = getInstrPredicate(Op0, PredReg);
2002 dl = Op0->getDebugLoc();
2006 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2007 SmallVectorImpl<MachineInstr *> &Ops,
2008 unsigned Base, bool isLd,
2009 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2010 bool RetVal = false;
2012 // Sort by offset (in reverse order).
2013 std::sort(Ops.begin(), Ops.end(),
2014 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2015 int LOffset = getMemoryOpOffset(LHS);
2016 int ROffset = getMemoryOpOffset(RHS);
2017 assert(LHS == RHS || LOffset != ROffset);
2018 return LOffset > ROffset;
2021 // The loads / stores of the same base are in order. Scan them from first to
2022 // last and check for the following:
2023 // 1. Any def of base.
2025 while (Ops.size() > 1) {
2026 unsigned FirstLoc = ~0U;
2027 unsigned LastLoc = 0;
2028 MachineInstr *FirstOp = nullptr;
2029 MachineInstr *LastOp = nullptr;
2031 unsigned LastOpcode = 0;
2032 unsigned LastBytes = 0;
2033 unsigned NumMove = 0;
2034 for (int i = Ops.size() - 1; i >= 0; --i) {
2035 MachineInstr *Op = Ops[i];
2036 unsigned Loc = MI2LocMap[Op];
2037 if (Loc <= FirstLoc) {
2041 if (Loc >= LastLoc) {
2047 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2048 if (LastOpcode && LSMOpcode != LastOpcode)
2051 int Offset = getMemoryOpOffset(Op);
2052 unsigned Bytes = getLSMultipleTransferSize(Op);
2054 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2057 LastOffset = Offset;
2059 LastOpcode = LSMOpcode;
2060 if (++NumMove == 8) // FIXME: Tune this limit.
2067 SmallPtrSet<MachineInstr*, 4> MemOps;
2068 SmallSet<unsigned, 4> MemRegs;
2069 for (int i = NumMove-1; i >= 0; --i) {
2070 MemOps.insert(Ops[i]);
2071 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2074 // Be conservative, if the instructions are too far apart, don't
2075 // move them. We want to limit the increase of register pressure.
2076 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2078 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2079 MemOps, MemRegs, TRI);
2081 for (unsigned i = 0; i != NumMove; ++i)
2084 // This is the new location for the loads / stores.
2085 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2086 while (InsertPos != MBB->end()
2087 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2090 // If we are moving a pair of loads / stores, see if it makes sense
2091 // to try to allocate a pair of registers that can form register pairs.
2092 MachineInstr *Op0 = Ops.back();
2093 MachineInstr *Op1 = Ops[Ops.size()-2];
2094 unsigned FirstReg = 0, SecondReg = 0;
2095 unsigned BaseReg = 0, PredReg = 0;
2096 ARMCC::CondCodes Pred = ARMCC::AL;
2098 unsigned NewOpc = 0;
2101 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2102 FirstReg, SecondReg, BaseReg,
2103 Offset, PredReg, Pred, isT2)) {
2107 const MCInstrDesc &MCID = TII->get(NewOpc);
2108 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2109 MRI->constrainRegClass(FirstReg, TRC);
2110 MRI->constrainRegClass(SecondReg, TRC);
2112 // Form the pair instruction.
2114 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2115 .addReg(FirstReg, RegState::Define)
2116 .addReg(SecondReg, RegState::Define)
2118 // FIXME: We're converting from LDRi12 to an insn that still
2119 // uses addrmode2, so we need an explicit offset reg. It should
2120 // always by reg0 since we're transforming LDRi12s.
2123 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2124 concatenateMemOperands(MIB, Op0, Op1);
2125 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2128 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2132 // FIXME: We're converting from LDRi12 to an insn that still
2133 // uses addrmode2, so we need an explicit offset reg. It should
2134 // always by reg0 since we're transforming STRi12s.
2137 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2138 concatenateMemOperands(MIB, Op0, Op1);
2139 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2146 // Add register allocation hints to form register pairs.
2147 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2148 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2151 for (unsigned i = 0; i != NumMove; ++i) {
2152 MachineInstr *Op = Ops.back();
2154 MBB->splice(InsertPos, MBB, Op);
2158 NumLdStMoved += NumMove;
2168 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2169 bool RetVal = false;
2171 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2172 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2173 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2174 SmallVector<unsigned, 4> LdBases;
2175 SmallVector<unsigned, 4> StBases;
2178 MachineBasicBlock::iterator MBBI = MBB->begin();
2179 MachineBasicBlock::iterator E = MBB->end();
2181 for (; MBBI != E; ++MBBI) {
2182 MachineInstr *MI = MBBI;
2183 if (MI->isCall() || MI->isTerminator()) {
2184 // Stop at barriers.
2189 if (!MI->isDebugValue())
2190 MI2LocMap[MI] = ++Loc;
2192 if (!isMemoryOp(MI))
2194 unsigned PredReg = 0;
2195 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2198 int Opc = MI->getOpcode();
2199 bool isLd = isLoadSingle(Opc);
2200 unsigned Base = MI->getOperand(1).getReg();
2201 int Offset = getMemoryOpOffset(MI);
2203 bool StopHere = false;
2205 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2206 Base2LdsMap.find(Base);
2207 if (BI != Base2LdsMap.end()) {
2208 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2209 if (Offset == getMemoryOpOffset(BI->second[i])) {
2215 BI->second.push_back(MI);
2217 Base2LdsMap[Base].push_back(MI);
2218 LdBases.push_back(Base);
2221 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2222 Base2StsMap.find(Base);
2223 if (BI != Base2StsMap.end()) {
2224 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2225 if (Offset == getMemoryOpOffset(BI->second[i])) {
2231 BI->second.push_back(MI);
2233 Base2StsMap[Base].push_back(MI);
2234 StBases.push_back(Base);
2239 // Found a duplicate (a base+offset combination that's seen earlier).
2246 // Re-schedule loads.
2247 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2248 unsigned Base = LdBases[i];
2249 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2251 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2254 // Re-schedule stores.
2255 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2256 unsigned Base = StBases[i];
2257 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2259 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2263 Base2LdsMap.clear();
2264 Base2StsMap.clear();
2274 /// Returns an instance of the load / store optimization pass.
2275 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2277 return new ARMPreAllocLoadStoreOpt();
2278 return new ARMLoadStoreOpt();