1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file This file contains a pass that performs load / store related peephole
11 /// optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterClassInfo.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/CodeGen/LivePhysRegs.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/Support/Allocator.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Target/TargetInstrInfo.h"
45 #include "llvm/Target/TargetMachine.h"
46 #include "llvm/Target/TargetRegisterInfo.h"
49 #define DEBUG_TYPE "arm-ldst-opt"
51 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
52 STATISTIC(NumSTMGened , "Number of stm instructions generated");
53 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
54 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
55 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
56 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
57 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
58 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
59 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
60 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
61 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
64 /// Post- register allocation pass the combine load / store instructions to
65 /// form ldm / stm instructions.
66 struct ARMLoadStoreOpt : public MachineFunctionPass {
68 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
70 const MachineFunction *MF;
71 const TargetInstrInfo *TII;
72 const TargetRegisterInfo *TRI;
73 const MachineRegisterInfo *MRI;
74 const ARMSubtarget *STI;
75 const TargetLowering *TL;
77 LivePhysRegs LiveRegs;
78 RegisterClassInfo RegClassInfo;
79 MachineBasicBlock::const_iterator LiveRegPos;
81 bool RegClassInfoValid;
82 bool isThumb1, isThumb2;
84 bool runOnMachineFunction(MachineFunction &Fn) override;
86 const char *getPassName() const override {
87 return "ARM load / store optimization pass";
91 /// A set of load/store MachineInstrs with same base register sorted by
93 struct MemOpQueueEntry {
95 int Offset; ///< Load/Store offset.
96 unsigned Position; ///< Position as counted from end of basic block.
97 MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
98 : MI(MI), Offset(Offset), Position(Position) {}
100 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
102 /// A set of MachineInstrs that fulfill (nearly all) conditions to get
103 /// merged into a LDM/STM.
104 struct MergeCandidate {
105 /// List of instructions ordered by load/store offset.
106 SmallVector<MachineInstr*, 4> Instrs;
107 /// Index in Instrs of the instruction being latest in the schedule.
108 unsigned LatestMIIdx;
109 /// Index in Instrs of the instruction being earliest in the schedule.
110 unsigned EarliestMIIdx;
111 /// Index into the basic block where the merged instruction will be
112 /// inserted. (See MemOpQueueEntry.Position)
115 BumpPtrAllocator Allocator;
116 SmallVector<const MergeCandidate*,4> Candidates;
118 void moveLiveRegsBefore(const MachineBasicBlock &MBB,
119 MachineBasicBlock::const_iterator Before);
120 unsigned findFreeReg(const TargetRegisterClass &RegClass);
121 void UpdateBaseRegUses(MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator MBBI,
123 DebugLoc DL, unsigned Base, unsigned WordOffset,
124 ARMCC::CondCodes Pred, unsigned PredReg);
125 MachineInstr *MergeOps(MachineBasicBlock &MBB,
126 MachineBasicBlock::iterator InsertBefore, int Offset,
127 unsigned Base, bool BaseKill, unsigned Opcode,
128 ARMCC::CondCodes Pred, unsigned PredReg, DebugLoc DL,
129 ArrayRef<std::pair<unsigned, bool>> Regs);
130 void FormCandidates(const MemOpQueue &MemOps);
131 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
132 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
133 MachineBasicBlock::iterator &MBBI);
134 bool MergeBaseUpdateLoadStore(MachineInstr *MI);
135 bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
136 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
137 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
139 char ARMLoadStoreOpt::ID = 0;
142 static bool definesCPSR(const MachineInstr *MI) {
143 for (const auto &MO : MI->operands()) {
146 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
147 // If the instruction has live CPSR def, then it's not safe to fold it
148 // into load / store.
155 static int getMemoryOpOffset(const MachineInstr *MI) {
156 unsigned Opcode = MI->getOpcode();
157 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
158 unsigned NumOperands = MI->getDesc().getNumOperands();
159 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
161 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
162 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
163 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
164 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
167 // Thumb1 immediate offsets are scaled by 4
168 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
169 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
172 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
173 : ARM_AM::getAM5Offset(OffField) * 4;
174 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
175 : ARM_AM::getAM5Op(OffField);
177 if (Op == ARM_AM::sub)
183 static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
184 return MI.getOperand(1);
187 static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
188 return MI.getOperand(0);
191 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
193 default: llvm_unreachable("Unhandled opcode!");
197 default: llvm_unreachable("Unhandled submode!");
198 case ARM_AM::ia: return ARM::LDMIA;
199 case ARM_AM::da: return ARM::LDMDA;
200 case ARM_AM::db: return ARM::LDMDB;
201 case ARM_AM::ib: return ARM::LDMIB;
206 default: llvm_unreachable("Unhandled submode!");
207 case ARM_AM::ia: return ARM::STMIA;
208 case ARM_AM::da: return ARM::STMDA;
209 case ARM_AM::db: return ARM::STMDB;
210 case ARM_AM::ib: return ARM::STMIB;
214 // tLDMIA is writeback-only - unless the base register is in the input
218 default: llvm_unreachable("Unhandled submode!");
219 case ARM_AM::ia: return ARM::tLDMIA;
223 // There is no non-writeback tSTMIA either.
226 default: llvm_unreachable("Unhandled submode!");
227 case ARM_AM::ia: return ARM::tSTMIA_UPD;
233 default: llvm_unreachable("Unhandled submode!");
234 case ARM_AM::ia: return ARM::t2LDMIA;
235 case ARM_AM::db: return ARM::t2LDMDB;
241 default: llvm_unreachable("Unhandled submode!");
242 case ARM_AM::ia: return ARM::t2STMIA;
243 case ARM_AM::db: return ARM::t2STMDB;
248 default: llvm_unreachable("Unhandled submode!");
249 case ARM_AM::ia: return ARM::VLDMSIA;
250 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
255 default: llvm_unreachable("Unhandled submode!");
256 case ARM_AM::ia: return ARM::VSTMSIA;
257 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
262 default: llvm_unreachable("Unhandled submode!");
263 case ARM_AM::ia: return ARM::VLDMDIA;
264 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
269 default: llvm_unreachable("Unhandled submode!");
270 case ARM_AM::ia: return ARM::VSTMDIA;
271 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
276 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
278 default: llvm_unreachable("Unhandled opcode!");
285 case ARM::tLDMIA_UPD:
286 case ARM::tSTMIA_UPD:
287 case ARM::t2LDMIA_RET:
289 case ARM::t2LDMIA_UPD:
291 case ARM::t2STMIA_UPD:
293 case ARM::VLDMSIA_UPD:
295 case ARM::VSTMSIA_UPD:
297 case ARM::VLDMDIA_UPD:
299 case ARM::VSTMDIA_UPD:
313 case ARM::t2LDMDB_UPD:
315 case ARM::t2STMDB_UPD:
316 case ARM::VLDMSDB_UPD:
317 case ARM::VSTMSDB_UPD:
318 case ARM::VLDMDDB_UPD:
319 case ARM::VSTMDDB_UPD:
330 static bool isT1i32Load(unsigned Opc) {
331 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
334 static bool isT2i32Load(unsigned Opc) {
335 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
338 static bool isi32Load(unsigned Opc) {
339 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
342 static bool isT1i32Store(unsigned Opc) {
343 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
346 static bool isT2i32Store(unsigned Opc) {
347 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
350 static bool isi32Store(unsigned Opc) {
351 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
354 static bool isLoadSingle(unsigned Opc) {
355 return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
358 static unsigned getImmScale(unsigned Opc) {
360 default: llvm_unreachable("Unhandled opcode!");
375 static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
376 switch (MI->getOpcode()) {
403 case ARM::tLDMIA_UPD:
404 case ARM::tSTMIA_UPD:
411 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
414 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
418 /// Update future uses of the base register with the offset introduced
419 /// due to writeback. This function only works on Thumb1.
421 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
422 MachineBasicBlock::iterator MBBI,
423 DebugLoc DL, unsigned Base,
425 ARMCC::CondCodes Pred, unsigned PredReg) {
426 assert(isThumb1 && "Can only update base register uses for Thumb1!");
427 // Start updating any instructions with immediate offsets. Insert a SUB before
428 // the first non-updateable instruction (if any).
429 for (; MBBI != MBB.end(); ++MBBI) {
430 bool InsertSub = false;
431 unsigned Opc = MBBI->getOpcode();
433 if (MBBI->readsRegister(Base)) {
436 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
438 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
440 if (IsLoad || IsStore) {
441 // Loads and stores with immediate offsets can be updated, but only if
442 // the new offset isn't negative.
443 // The MachineOperand containing the offset immediate is the last one
444 // before predicates.
446 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
447 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
448 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
450 // If storing the base register, it needs to be reset first.
451 unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
453 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
458 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
459 !definesCPSR(MBBI)) {
460 // SUBS/ADDS using this register, with a dead def of the CPSR.
461 // Merge it with the update; if the merged offset is too large,
462 // insert a new sub instead.
464 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
465 Offset = (Opc == ARM::tSUBi8) ?
466 MO.getImm() + WordOffset * 4 :
467 MO.getImm() - WordOffset * 4 ;
468 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
469 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
472 // The base register has now been reset, so exit early.
479 // Can't update the instruction.
483 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
484 // Since SUBS sets the condition flags, we can't place the base reset
485 // after an instruction that has a live CPSR def.
486 // The base register might also contain an argument for a function call.
491 // An instruction above couldn't be updated, so insert a sub.
492 AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
493 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
497 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
498 // Register got killed. Stop updating.
502 // End of block was reached.
503 if (MBB.succ_size() > 0) {
504 // FIXME: Because of a bug, live registers are sometimes missing from
505 // the successor blocks' live-in sets. This means we can't trust that
506 // information and *always* have to reset at the end of a block.
508 if (MBBI != MBB.end()) --MBBI;
510 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
511 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
515 /// Return the first register of class \p RegClass that is not in \p Regs.
516 unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
517 if (!RegClassInfoValid) {
518 RegClassInfo.runOnMachineFunction(*MF);
519 RegClassInfoValid = true;
522 for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
523 if (!LiveRegs.contains(Reg))
528 /// Compute live registers just before instruction \p Before (in normal schedule
529 /// direction). Computes backwards so multiple queries in the same block must
530 /// come in reverse order.
531 void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
532 MachineBasicBlock::const_iterator Before) {
533 // Initialize if we never queried in this block.
534 if (!LiveRegsValid) {
536 LiveRegs.addLiveOuts(&MBB, true);
537 LiveRegPos = MBB.end();
538 LiveRegsValid = true;
540 // Move backward just before the "Before" position.
541 while (LiveRegPos != Before) {
543 LiveRegs.stepBackward(*LiveRegPos);
547 static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
549 for (const std::pair<unsigned, bool> &R : Regs)
555 /// Create and insert a LDM or STM with Base as base register and registers in
556 /// Regs as the register operands that would be loaded / stored. It returns
557 /// true if the transformation is done.
559 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
560 MachineBasicBlock::iterator InsertBefore, int Offset,
561 unsigned Base, bool BaseKill, unsigned Opcode,
562 ARMCC::CondCodes Pred, unsigned PredReg, DebugLoc DL,
563 ArrayRef<std::pair<unsigned, bool>> Regs) {
564 unsigned NumRegs = Regs.size();
567 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
568 // Compute liveness information for that register to make the decision.
569 bool SafeToClobberCPSR = !isThumb1 ||
570 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
571 MachineBasicBlock::LQR_Dead);
573 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
575 // Exception: If the base register is in the input reglist, Thumb1 LDM is
577 // It's also not possible to merge an STR of the base register in Thumb1.
578 if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
579 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
580 if (Opcode == ARM::tLDRi) {
582 } else if (Opcode == ARM::tSTRi) {
587 ARM_AM::AMSubMode Mode = ARM_AM::ia;
588 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
589 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
590 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
592 if (Offset == 4 && haveIBAndDA) {
594 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
596 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
597 // VLDM/VSTM do not support DB mode without also updating the base reg.
599 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
600 // Check if this is a supported opcode before inserting instructions to
601 // calculate a new base register.
602 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
604 // If starting offset isn't zero, insert a MI to materialize a new base.
605 // But only do so if it is cost effective, i.e. merging more than two
610 // On Thumb1, it's not worth materializing a new base register without
611 // clobbering the CPSR (i.e. not using ADDS/SUBS).
612 if (!SafeToClobberCPSR)
616 if (isi32Load(Opcode)) {
617 // If it is a load, then just use one of the destination register to
618 // use as the new base.
619 NewBase = Regs[NumRegs-1].first;
621 // Find a free register that we can use as scratch register.
622 moveLiveRegsBefore(MBB, InsertBefore);
623 // The merged instruction does not exist yet but will use several Regs if
625 if (!isLoadSingle(Opcode))
626 for (const std::pair<unsigned, bool> &R : Regs)
627 LiveRegs.addReg(R.first);
629 NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
635 isThumb2 ? ARM::t2ADDri :
636 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
637 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
638 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
643 isThumb2 ? ARM::t2SUBri :
644 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
645 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
648 if (!TL->isLegalAddImmediate(Offset))
649 // FIXME: Try add with register operand?
650 return nullptr; // Probably not worth it then.
652 // We can only append a kill flag to the add/sub input if the value is not
653 // used in the register list of the stm as well.
654 bool KillOldBase = BaseKill &&
655 (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
658 // Thumb1: depending on immediate size, use either
659 // ADDS NewBase, Base, #imm3
662 // ADDS NewBase, #imm8.
663 if (Base != NewBase &&
664 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
665 // Need to insert a MOV to the new base first.
666 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
668 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
669 if (Pred != ARMCC::AL)
671 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
672 .addReg(Base, getKillRegState(KillOldBase));
674 BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
675 .addReg(Base, getKillRegState(KillOldBase))
676 .addImm(Pred).addReg(PredReg);
678 // The following ADDS/SUBS becomes an update.
682 if (BaseOpc == ARM::tADDrSPi) {
683 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
684 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
685 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
686 .addImm(Pred).addReg(PredReg);
689 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
690 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
691 .addImm(Pred).addReg(PredReg);
693 BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
694 .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
695 .addImm(Pred).addReg(PredReg).addReg(0);
698 BaseKill = true; // New base is always killed straight away.
701 bool isDef = isLoadSingle(Opcode);
703 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
704 // base register writeback.
705 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
709 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
710 // - There is no writeback (LDM of base register),
711 // - the base register is killed by the merged instruction,
712 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
713 // to reset the base register.
714 // Otherwise, don't merge.
715 // It's safe to return here since the code to materialize a new base register
716 // above is also conditional on SafeToClobberCPSR.
717 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
720 MachineInstrBuilder MIB;
723 if (Opcode == ARM::tLDMIA)
724 // Update tLDMIA with writeback if necessary.
725 Opcode = ARM::tLDMIA_UPD;
727 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
729 // Thumb1: we might need to set base writeback when building the MI.
730 MIB.addReg(Base, getDefRegState(true))
731 .addReg(Base, getKillRegState(BaseKill));
733 // The base isn't dead after a merged instruction with writeback.
734 // Insert a sub instruction after the newly formed instruction to reset.
736 UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
739 // No writeback, simply build the MachineInstr.
740 MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
741 MIB.addReg(Base, getKillRegState(BaseKill));
744 MIB.addImm(Pred).addReg(PredReg);
746 for (const std::pair<unsigned, bool> &R : Regs)
747 MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
749 return MIB.getInstr();
752 /// Call MergeOps and update MemOps and merges accordingly on success.
753 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
754 const MachineInstr *First = Cand.Instrs.front();
755 unsigned Opcode = First->getOpcode();
756 bool IsLoad = isLoadSingle(Opcode);
757 SmallVector<std::pair<unsigned, bool>, 8> Regs;
758 SmallVector<unsigned, 4> ImpDefs;
759 DenseSet<unsigned> KilledRegs;
760 DenseSet<unsigned> UsedRegs;
761 // Determine list of registers and list of implicit super-register defs.
762 for (const MachineInstr *MI : Cand.Instrs) {
763 const MachineOperand &MO = getLoadStoreRegOp(*MI);
764 unsigned Reg = MO.getReg();
765 bool IsKill = MO.isKill();
767 KilledRegs.insert(Reg);
768 Regs.push_back(std::make_pair(Reg, IsKill));
769 UsedRegs.insert(Reg);
772 // Collect any implicit defs of super-registers, after merging we can't
773 // be sure anymore that we properly preserved these live ranges and must
774 // removed these implicit operands.
775 for (const MachineOperand &MO : MI->implicit_operands()) {
776 if (!MO.isReg() || !MO.isDef() || MO.isDead())
778 assert(MO.isImplicit());
779 unsigned DefReg = MO.getReg();
781 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
783 // We can ignore cases where the super-reg is read and written.
784 if (MI->readsRegister(DefReg))
786 ImpDefs.push_back(DefReg);
791 // Attempt the merge.
792 typedef MachineBasicBlock::iterator iterator;
793 MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
794 iterator InsertBefore = std::next(iterator(LatestMI));
795 MachineBasicBlock &MBB = *LatestMI->getParent();
796 unsigned Offset = getMemoryOpOffset(First);
797 unsigned Base = getLoadStoreBaseOp(*First).getReg();
798 bool BaseKill = LatestMI->killsRegister(Base);
799 unsigned PredReg = 0;
800 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
801 DebugLoc DL = First->getDebugLoc();
802 MachineInstr *Merged = MergeOps(MBB, InsertBefore, Offset, Base, BaseKill,
803 Opcode, Pred, PredReg, DL, Regs);
807 // Determine earliest instruction that will get removed. We then keep an
808 // iterator just above it so the following erases don't invalidated it.
809 iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
810 bool EarliestAtBegin = false;
811 if (EarliestI == MBB.begin()) {
812 EarliestAtBegin = true;
814 EarliestI = std::prev(EarliestI);
817 // Remove instructions which have been merged.
818 for (MachineInstr *MI : Cand.Instrs)
821 // Determine range between the earliest removed instruction and the new one.
823 EarliestI = MBB.begin();
825 EarliestI = std::next(EarliestI);
826 auto FixupRange = make_range(EarliestI, iterator(Merged));
828 if (isLoadSingle(Opcode)) {
829 // If the previous loads defined a super-reg, then we have to mark earlier
830 // operands undef; Replicate the super-reg def on the merged instruction.
831 for (MachineInstr &MI : FixupRange) {
832 for (unsigned &ImpDefReg : ImpDefs) {
833 for (MachineOperand &MO : MI.implicit_operands()) {
834 if (!MO.isReg() || MO.getReg() != ImpDefReg)
844 MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
845 for (unsigned ImpDef : ImpDefs)
846 MIB.addReg(ImpDef, RegState::ImplicitDefine);
848 // Remove kill flags: We are possibly storing the values later now.
849 assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
850 for (MachineInstr &MI : FixupRange) {
851 for (MachineOperand &MO : MI.uses()) {
852 if (!MO.isReg() || !MO.isKill())
854 if (UsedRegs.count(MO.getReg()))
858 assert(ImpDefs.empty());
864 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
865 void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
866 const MachineInstr *FirstMI = MemOps[0].MI;
867 unsigned Opcode = FirstMI->getOpcode();
868 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
869 unsigned Size = getLSMultipleTransferSize(FirstMI);
872 unsigned EIndex = MemOps.size();
874 // Look at the first instruction.
875 const MachineInstr *MI = MemOps[SIndex].MI;
876 int Offset = MemOps[SIndex].Offset;
877 const MachineOperand &PMO = getLoadStoreRegOp(*MI);
878 unsigned PReg = PMO.getReg();
879 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
880 unsigned Latest = SIndex;
881 unsigned Earliest = SIndex;
884 // Merge additional instructions fulfilling LDM/STM constraints.
885 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
886 int NewOffset = MemOps[I].Offset;
887 if (NewOffset != Offset + (int)Size)
889 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
890 unsigned Reg = MO.getReg();
893 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
894 // Register numbers must be in ascending order.
895 if (RegNum <= PRegNum)
897 // For VFP / NEON load/store multiples, the registers must be consecutive
898 // and within the limit on the number of registers per instruction.
899 if (!isNotVFP && RegNum != PRegNum+1)
901 // On Swift we don't want vldm/vstm to start with a odd register num
902 // because Q register unaligned vldm/vstm need more uops.
903 if (!isNotVFP && STI->isSwift() && Count == 1 && (PRegNum % 2) == 1)
906 // Track MemOp with latest and earliest position (Positions are
907 // counted in reverse).
908 unsigned Position = MemOps[I].Position;
909 if (Position < MemOps[Latest].Position)
911 else if (Position > MemOps[Earliest].Position)
913 // Prepare for next MemOp.
918 // Form a candidate from the Ops collected so far.
919 MergeCandidate *Candidate = new(Allocator) MergeCandidate;
920 for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
921 Candidate->Instrs.push_back(MemOps[C].MI);
922 Candidate->LatestMIIdx = Latest - SIndex;
923 Candidate->EarliestMIIdx = Earliest - SIndex;
924 Candidate->InsertPos = MemOps[Latest].Position;
925 Candidates.push_back(Candidate);
926 // Continue after the chain.
928 } while (SIndex < EIndex);
931 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
932 unsigned Bytes, unsigned Limit,
933 ARMCC::CondCodes Pred, unsigned PredReg) {
934 unsigned MyPredReg = 0;
938 bool CheckCPSRDef = false;
939 switch (MI->getOpcode()) {
940 default: return false;
950 // Make sure the offset fits in 8 bits.
951 if (Bytes == 0 || (Limit && Bytes >= Limit))
954 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
955 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
956 if (!(MI->getOperand(0).getReg() == Base &&
957 MI->getOperand(1).getReg() == Base &&
958 (MI->getOperand(2).getImm() * Scale) == Bytes &&
959 getInstrPredicate(MI, MyPredReg) == Pred &&
960 MyPredReg == PredReg))
963 return CheckCPSRDef ? !definesCPSR(MI) : true;
966 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
967 unsigned Bytes, unsigned Limit,
968 ARMCC::CondCodes Pred, unsigned PredReg) {
969 unsigned MyPredReg = 0;
973 bool CheckCPSRDef = false;
974 switch (MI->getOpcode()) {
975 default: return false;
985 if (Bytes == 0 || (Limit && Bytes >= Limit))
986 // Make sure the offset fits in 8 bits.
989 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
990 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
991 if (!(MI->getOperand(0).getReg() == Base &&
992 MI->getOperand(1).getReg() == Base &&
993 (MI->getOperand(2).getImm() * Scale) == Bytes &&
994 getInstrPredicate(MI, MyPredReg) == Pred &&
995 MyPredReg == PredReg))
998 return CheckCPSRDef ? !definesCPSR(MI) : true;
1001 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1002 ARM_AM::AMSubMode Mode) {
1004 default: llvm_unreachable("Unhandled opcode!");
1010 default: llvm_unreachable("Unhandled submode!");
1011 case ARM_AM::ia: return ARM::LDMIA_UPD;
1012 case ARM_AM::ib: return ARM::LDMIB_UPD;
1013 case ARM_AM::da: return ARM::LDMDA_UPD;
1014 case ARM_AM::db: return ARM::LDMDB_UPD;
1021 default: llvm_unreachable("Unhandled submode!");
1022 case ARM_AM::ia: return ARM::STMIA_UPD;
1023 case ARM_AM::ib: return ARM::STMIB_UPD;
1024 case ARM_AM::da: return ARM::STMDA_UPD;
1025 case ARM_AM::db: return ARM::STMDB_UPD;
1030 default: llvm_unreachable("Unhandled submode!");
1031 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1032 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1037 default: llvm_unreachable("Unhandled submode!");
1038 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1039 case ARM_AM::db: return ARM::t2STMDB_UPD;
1043 default: llvm_unreachable("Unhandled submode!");
1044 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1045 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1049 default: llvm_unreachable("Unhandled submode!");
1050 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1051 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1055 default: llvm_unreachable("Unhandled submode!");
1056 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1057 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1061 default: llvm_unreachable("Unhandled submode!");
1062 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1063 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1068 /// Fold proceeding/trailing inc/dec of base register into the
1069 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1071 /// stmia rn, <ra, rb, rc>
1072 /// rn := rn + 4 * 3;
1074 /// stmia rn!, <ra, rb, rc>
1076 /// rn := rn - 4 * 3;
1077 /// ldmia rn, <ra, rb, rc>
1079 /// ldmdb rn!, <ra, rb, rc>
1080 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1081 // Thumb1 is already using updating loads/stores.
1082 if (isThumb1) return false;
1084 const MachineOperand &BaseOP = MI->getOperand(0);
1085 unsigned Base = BaseOP.getReg();
1086 bool BaseKill = BaseOP.isKill();
1087 unsigned Bytes = getLSMultipleTransferSize(MI);
1088 unsigned PredReg = 0;
1089 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1090 unsigned Opcode = MI->getOpcode();
1091 DebugLoc DL = MI->getDebugLoc();
1093 // Can't use an updating ld/st if the base register is also a dest
1094 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1095 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1096 if (MI->getOperand(i).getReg() == Base)
1099 bool DoMerge = false;
1100 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
1102 // Try merging with the previous instruction.
1103 MachineBasicBlock &MBB = *MI->getParent();
1104 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1105 MachineBasicBlock::iterator MBBI(MI);
1106 if (MBBI != BeginMBBI) {
1107 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1108 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1110 if (Mode == ARM_AM::ia &&
1111 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1114 } else if (Mode == ARM_AM::ib &&
1115 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1120 MBB.erase(PrevMBBI);
1123 // Try merging with the next instruction.
1124 MachineBasicBlock::iterator EndMBBI = MBB.end();
1125 if (!DoMerge && MBBI != EndMBBI) {
1126 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1127 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1129 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
1130 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1132 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
1133 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1137 MBB.erase(NextMBBI);
1143 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1144 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1145 .addReg(Base, getDefRegState(true)) // WB base register
1146 .addReg(Base, getKillRegState(BaseKill))
1147 .addImm(Pred).addReg(PredReg);
1149 // Transfer the rest of operands.
1150 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1151 MIB.addOperand(MI->getOperand(OpNum));
1153 // Transfer memoperands.
1154 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1160 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1161 ARM_AM::AddrOpc Mode) {
1164 return ARM::LDR_PRE_IMM;
1166 return ARM::STR_PRE_IMM;
1168 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1170 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1172 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1174 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1177 return ARM::t2LDR_PRE;
1180 return ARM::t2STR_PRE;
1181 default: llvm_unreachable("Unhandled opcode!");
1185 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1186 ARM_AM::AddrOpc Mode) {
1189 return ARM::LDR_POST_IMM;
1191 return ARM::STR_POST_IMM;
1193 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1195 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1197 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1199 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1202 return ARM::t2LDR_POST;
1205 return ARM::t2STR_POST;
1206 default: llvm_unreachable("Unhandled opcode!");
1210 /// Fold proceeding/trailing inc/dec of base register into the
1211 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1212 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1213 // Thumb1 doesn't have updating LDR/STR.
1214 // FIXME: Use LDM/STM with single register instead.
1215 if (isThumb1) return false;
1217 unsigned Base = getLoadStoreBaseOp(*MI).getReg();
1218 bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1219 unsigned Bytes = getLSMultipleTransferSize(MI);
1220 unsigned Opcode = MI->getOpcode();
1221 DebugLoc DL = MI->getDebugLoc();
1222 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1223 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1224 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1225 if (isi32Load(Opcode) || isi32Store(Opcode))
1226 if (MI->getOperand(2).getImm() != 0)
1228 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1231 bool isLd = isLoadSingle(Opcode);
1232 // Can't do the merge if the destination register is the same as the would-be
1233 // writeback register.
1234 if (MI->getOperand(0).getReg() == Base)
1237 unsigned PredReg = 0;
1238 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1239 bool DoMerge = false;
1240 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1241 unsigned NewOpc = 0;
1242 // AM2 - 12 bits, thumb2 - 8 bits.
1243 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1245 // Try merging with the previous instruction.
1246 MachineBasicBlock &MBB = *MI->getParent();
1247 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1248 MachineBasicBlock::iterator MBBI(MI);
1249 if (MBBI != BeginMBBI) {
1250 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1251 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1253 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1255 AddSub = ARM_AM::sub;
1256 } else if (!isAM5 &&
1257 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1261 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1262 MBB.erase(PrevMBBI);
1266 // Try merging with the next instruction.
1267 MachineBasicBlock::iterator EndMBBI = MBB.end();
1268 if (!DoMerge && MBBI != EndMBBI) {
1269 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1270 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1273 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1275 AddSub = ARM_AM::sub;
1276 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1280 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1281 MBB.erase(NextMBBI);
1289 // VLDM[SD]_UPD, VSTM[SD]_UPD
1290 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1291 // updating load/store-multiple instructions can be used with only one
1293 MachineOperand &MO = MI->getOperand(0);
1294 BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1295 .addReg(Base, getDefRegState(true)) // WB base register
1296 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1297 .addImm(Pred).addReg(PredReg)
1298 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1299 getKillRegState(MO.isKill())));
1302 // LDR_PRE, LDR_POST
1303 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1304 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1305 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1306 .addReg(Base, RegState::Define)
1307 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1309 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1310 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1311 .addReg(Base, RegState::Define)
1312 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1315 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1316 // t2LDR_PRE, t2LDR_POST
1317 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1318 .addReg(Base, RegState::Define)
1319 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1322 MachineOperand &MO = MI->getOperand(0);
1323 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1324 // the vestigal zero-reg offset register. When that's fixed, this clause
1325 // can be removed entirely.
1326 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1327 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1328 // STR_PRE, STR_POST
1329 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1330 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1331 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1333 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1334 // t2STR_PRE, t2STR_POST
1335 BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1336 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1337 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1345 /// Returns true if instruction is a memory operation that this pass is capable
1346 /// of operating on.
1347 static bool isMemoryOp(const MachineInstr *MI) {
1348 // When no memory operands are present, conservatively assume unaligned,
1349 // volatile, unfoldable.
1350 if (!MI->hasOneMemOperand())
1353 const MachineMemOperand *MMO = *MI->memoperands_begin();
1355 // Don't touch volatile memory accesses - we may be changing their order.
1356 if (MMO->isVolatile())
1359 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1361 if (MMO->getAlignment() < 4)
1364 // str <undef> could probably be eliminated entirely, but for now we just want
1365 // to avoid making a mess of it.
1366 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1367 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1368 MI->getOperand(0).isUndef())
1371 // Likewise don't mess with references to undefined addresses.
1372 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1373 MI->getOperand(1).isUndef())
1376 unsigned Opcode = MI->getOpcode();
1381 return MI->getOperand(1).isReg();
1384 return MI->getOperand(1).isReg();
1395 return MI->getOperand(1).isReg();
1400 static void InsertLDR_STR(MachineBasicBlock &MBB,
1401 MachineBasicBlock::iterator &MBBI,
1402 int Offset, bool isDef,
1403 DebugLoc DL, unsigned NewOpc,
1404 unsigned Reg, bool RegDeadKill, bool RegUndef,
1405 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1406 bool OffKill, bool OffUndef,
1407 ARMCC::CondCodes Pred, unsigned PredReg,
1408 const TargetInstrInfo *TII, bool isT2) {
1410 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1412 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1413 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1414 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1416 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1418 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1419 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1420 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1424 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1425 MachineBasicBlock::iterator &MBBI) {
1426 MachineInstr *MI = &*MBBI;
1427 unsigned Opcode = MI->getOpcode();
1428 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1431 const MachineOperand &BaseOp = MI->getOperand(2);
1432 unsigned BaseReg = BaseOp.getReg();
1433 unsigned EvenReg = MI->getOperand(0).getReg();
1434 unsigned OddReg = MI->getOperand(1).getReg();
1435 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1436 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1438 // ARM errata 602117: LDRD with base in list may result in incorrect base
1439 // register when interrupted or faulted.
1440 bool Errata602117 = EvenReg == BaseReg &&
1441 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1442 // ARM LDRD/STRD needs consecutive registers.
1443 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1444 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1446 if (!Errata602117 && !NonConsecutiveRegs)
1449 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1450 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1451 bool EvenDeadKill = isLd ?
1452 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1453 bool EvenUndef = MI->getOperand(0).isUndef();
1454 bool OddDeadKill = isLd ?
1455 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1456 bool OddUndef = MI->getOperand(1).isUndef();
1457 bool BaseKill = BaseOp.isKill();
1458 bool BaseUndef = BaseOp.isUndef();
1459 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1460 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1461 int OffImm = getMemoryOpOffset(MI);
1462 unsigned PredReg = 0;
1463 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1465 if (OddRegNum > EvenRegNum && OffImm == 0) {
1466 // Ascending register numbers and no offset. It's safe to change it to a
1468 unsigned NewOpc = (isLd)
1469 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1470 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1472 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1473 .addReg(BaseReg, getKillRegState(BaseKill))
1474 .addImm(Pred).addReg(PredReg)
1475 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1476 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1479 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1480 .addReg(BaseReg, getKillRegState(BaseKill))
1481 .addImm(Pred).addReg(PredReg)
1483 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1485 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1489 // Split into two instructions.
1490 unsigned NewOpc = (isLd)
1491 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1492 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1493 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1494 // so adjust and use t2LDRi12 here for that.
1495 unsigned NewOpc2 = (isLd)
1496 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1497 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1498 DebugLoc dl = MBBI->getDebugLoc();
1499 // If this is a load and base register is killed, it may have been
1500 // re-defed by the load, make sure the first load does not clobber it.
1502 (BaseKill || OffKill) &&
1503 (TRI->regsOverlap(EvenReg, BaseReg))) {
1504 assert(!TRI->regsOverlap(OddReg, BaseReg));
1505 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1506 OddReg, OddDeadKill, false,
1507 BaseReg, false, BaseUndef, false, OffUndef,
1508 Pred, PredReg, TII, isT2);
1509 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1510 EvenReg, EvenDeadKill, false,
1511 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1512 Pred, PredReg, TII, isT2);
1514 if (OddReg == EvenReg && EvenDeadKill) {
1515 // If the two source operands are the same, the kill marker is
1516 // probably on the first one. e.g.
1517 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1518 EvenDeadKill = false;
1521 // Never kill the base register in the first instruction.
1522 if (EvenReg == BaseReg)
1523 EvenDeadKill = false;
1524 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1525 EvenReg, EvenDeadKill, EvenUndef,
1526 BaseReg, false, BaseUndef, false, OffUndef,
1527 Pred, PredReg, TII, isT2);
1528 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1529 OddReg, OddDeadKill, OddUndef,
1530 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1531 Pred, PredReg, TII, isT2);
1539 MBBI = MBB.erase(MBBI);
1543 /// An optimization pass to turn multiple LDR / STR ops of the same base and
1544 /// incrementing offset into LDM / STM ops.
1545 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1547 unsigned CurrBase = 0;
1548 unsigned CurrOpc = ~0u;
1549 ARMCC::CondCodes CurrPred = ARMCC::AL;
1550 unsigned Position = 0;
1551 assert(Candidates.size() == 0);
1552 LiveRegsValid = false;
1554 for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
1556 // The instruction in front of the iterator is the one we look at.
1557 MBBI = std::prev(I);
1558 if (FixInvalidRegPairOp(MBB, MBBI))
1562 if (isMemoryOp(MBBI)) {
1563 unsigned Opcode = MBBI->getOpcode();
1564 const MachineOperand &MO = MBBI->getOperand(0);
1565 unsigned Reg = MO.getReg();
1566 unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
1567 unsigned PredReg = 0;
1568 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1569 int Offset = getMemoryOpOffset(MBBI);
1570 if (CurrBase == 0) {
1571 // Start of a new chain.
1575 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1578 // Note: No need to match PredReg in the next if.
1579 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1581 // r4 := ldr [r0, #8]
1582 // r4 := ldr [r0, #4]
1585 // If a load overrides the base register or a register loaded by
1586 // another load in our chain, we cannot take this instruction.
1587 bool Overlap = false;
1588 if (isLoadSingle(Opcode)) {
1589 Overlap = (Base == Reg);
1591 for (const MemOpQueueEntry &E : MemOps) {
1592 if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1601 // Check offset and sort memory operation into the current chain.
1602 if (Offset > MemOps.back().Offset) {
1603 MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1606 MemOpQueue::iterator MI, ME;
1607 for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1608 if (Offset < MI->Offset) {
1609 // Found a place to insert.
1612 if (Offset == MI->Offset) {
1613 // Collision, abort.
1618 if (MI != MemOps.end()) {
1619 MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
1626 // Don't advance the iterator; The op will start a new chain next.
1629 // Fallthrough to look into existing chain.
1630 } else if (MBBI->isDebugValue())
1633 // If we are here then the chain is broken; Extract candidates for a merge.
1634 if (MemOps.size() > 0) {
1635 FormCandidates(MemOps);
1636 // Reset for the next chain.
1639 CurrPred = ARMCC::AL;
1643 if (MemOps.size() > 0)
1644 FormCandidates(MemOps);
1646 // Sort candidates so they get processed from end to begin of the basic
1647 // block later; This is necessary for liveness calculation.
1648 auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1649 return M0->InsertPos < M1->InsertPos;
1651 std::sort(Candidates.begin(), Candidates.end(), LessThan);
1653 // Go through list of candidates and merge.
1654 bool Changed = false;
1655 for (const MergeCandidate *Candidate : Candidates) {
1656 if (Candidate->Instrs.size() > 1) {
1657 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1658 // Merge preceding/trailing base inc/dec into the merged op.
1660 MergeBaseUpdateLSMultiple(Merged);
1663 for (MachineInstr *MI : Candidate->Instrs) {
1664 if (MergeBaseUpdateLoadStore(MI))
1669 assert(Candidate->Instrs.size() == 1);
1670 if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1679 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
1680 /// into the preceding stack restore so it directly restore the value of LR
1682 /// ldmfd sp!, {..., lr}
1685 /// ldmfd sp!, {..., lr}
1688 /// ldmfd sp!, {..., pc}
1689 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1690 // Thumb1 LDM doesn't allow high registers.
1691 if (isThumb1) return false;
1692 if (MBB.empty()) return false;
1694 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1695 if (MBBI != MBB.begin() &&
1696 (MBBI->getOpcode() == ARM::BX_RET ||
1697 MBBI->getOpcode() == ARM::tBX_RET ||
1698 MBBI->getOpcode() == ARM::MOVPCLR)) {
1699 MachineInstr *PrevMI = std::prev(MBBI);
1700 unsigned Opcode = PrevMI->getOpcode();
1701 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1702 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1703 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1704 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1705 if (MO.getReg() != ARM::LR)
1707 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1708 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1709 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1710 PrevMI->setDesc(TII->get(NewOpc));
1712 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1720 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1722 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1723 TL = STI->getTargetLowering();
1724 AFI = Fn.getInfo<ARMFunctionInfo>();
1725 TII = STI->getInstrInfo();
1726 TRI = STI->getRegisterInfo();
1727 MRI = &Fn.getRegInfo();
1728 RegClassInfoValid = false;
1729 isThumb2 = AFI->isThumb2Function();
1730 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1732 bool Modified = false;
1733 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1735 MachineBasicBlock &MBB = *MFI;
1736 Modified |= LoadStoreMultipleOpti(MBB);
1737 if (STI->hasV5TOps())
1738 Modified |= MergeReturnIntoLDM(MBB);
1746 /// Pre- register allocation pass that move load / stores from consecutive
1747 /// locations close to make it more likely they will be combined later.
1748 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1750 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1752 const DataLayout *TD;
1753 const TargetInstrInfo *TII;
1754 const TargetRegisterInfo *TRI;
1755 const ARMSubtarget *STI;
1756 MachineRegisterInfo *MRI;
1757 MachineFunction *MF;
1759 bool runOnMachineFunction(MachineFunction &Fn) override;
1761 const char *getPassName() const override {
1762 return "ARM pre- register allocation load / store optimization pass";
1766 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1767 unsigned &NewOpc, unsigned &EvenReg,
1768 unsigned &OddReg, unsigned &BaseReg,
1770 unsigned &PredReg, ARMCC::CondCodes &Pred,
1772 bool RescheduleOps(MachineBasicBlock *MBB,
1773 SmallVectorImpl<MachineInstr *> &Ops,
1774 unsigned Base, bool isLd,
1775 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1776 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1778 char ARMPreAllocLoadStoreOpt::ID = 0;
1781 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1782 TD = &Fn.getDataLayout();
1783 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1784 TII = STI->getInstrInfo();
1785 TRI = STI->getRegisterInfo();
1786 MRI = &Fn.getRegInfo();
1789 bool Modified = false;
1790 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1792 Modified |= RescheduleLoadStoreInstrs(MFI);
1797 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1798 MachineBasicBlock::iterator I,
1799 MachineBasicBlock::iterator E,
1800 SmallPtrSetImpl<MachineInstr*> &MemOps,
1801 SmallSet<unsigned, 4> &MemRegs,
1802 const TargetRegisterInfo *TRI) {
1803 // Are there stores / loads / calls between them?
1804 // FIXME: This is overly conservative. We should make use of alias information
1806 SmallSet<unsigned, 4> AddedRegPressure;
1808 if (I->isDebugValue() || MemOps.count(&*I))
1810 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1812 if (isLd && I->mayStore())
1817 // It's not safe to move the first 'str' down.
1820 // str r4, [r0, #+4]
1824 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1825 MachineOperand &MO = I->getOperand(j);
1828 unsigned Reg = MO.getReg();
1829 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1831 if (Reg != Base && !MemRegs.count(Reg))
1832 AddedRegPressure.insert(Reg);
1836 // Estimate register pressure increase due to the transformation.
1837 if (MemRegs.size() <= 4)
1838 // Ok if we are moving small number of instructions.
1840 return AddedRegPressure.size() <= MemRegs.size() * 2;
1844 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
1845 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1846 MachineInstr *Op1) {
1847 assert(MI->memoperands_empty() && "expected a new machineinstr");
1848 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1849 + (Op1->memoperands_end() - Op1->memoperands_begin());
1851 MachineFunction *MF = MI->getParent()->getParent();
1852 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1853 MachineSDNode::mmo_iterator MemEnd =
1854 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1856 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1857 MI->setMemRefs(MemBegin, MemEnd);
1861 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1862 DebugLoc &dl, unsigned &NewOpc,
1864 unsigned &SecondReg,
1865 unsigned &BaseReg, int &Offset,
1867 ARMCC::CondCodes &Pred,
1869 // Make sure we're allowed to generate LDRD/STRD.
1870 if (!STI->hasV5TEOps())
1873 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1875 unsigned Opcode = Op0->getOpcode();
1876 if (Opcode == ARM::LDRi12) {
1878 } else if (Opcode == ARM::STRi12) {
1880 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1881 NewOpc = ARM::t2LDRDi8;
1884 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1885 NewOpc = ARM::t2STRDi8;
1892 // Make sure the base address satisfies i64 ld / st alignment requirement.
1893 // At the moment, we ignore the memoryoperand's value.
1894 // If we want to use AliasAnalysis, we should check it accordingly.
1895 if (!Op0->hasOneMemOperand() ||
1896 (*Op0->memoperands_begin())->isVolatile())
1899 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1900 const Function *Func = MF->getFunction();
1901 unsigned ReqAlign = STI->hasV6Ops()
1902 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1903 : 8; // Pre-v6 need 8-byte align
1904 if (Align < ReqAlign)
1907 // Then make sure the immediate offset fits.
1908 int OffImm = getMemoryOpOffset(Op0);
1910 int Limit = (1 << 8) * Scale;
1911 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1915 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1917 AddSub = ARM_AM::sub;
1920 int Limit = (1 << 8) * Scale;
1921 if (OffImm >= Limit || (OffImm & (Scale-1)))
1923 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1925 FirstReg = Op0->getOperand(0).getReg();
1926 SecondReg = Op1->getOperand(0).getReg();
1927 if (FirstReg == SecondReg)
1929 BaseReg = Op0->getOperand(1).getReg();
1930 Pred = getInstrPredicate(Op0, PredReg);
1931 dl = Op0->getDebugLoc();
1935 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1936 SmallVectorImpl<MachineInstr *> &Ops,
1937 unsigned Base, bool isLd,
1938 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1939 bool RetVal = false;
1941 // Sort by offset (in reverse order).
1942 std::sort(Ops.begin(), Ops.end(),
1943 [](const MachineInstr *LHS, const MachineInstr *RHS) {
1944 int LOffset = getMemoryOpOffset(LHS);
1945 int ROffset = getMemoryOpOffset(RHS);
1946 assert(LHS == RHS || LOffset != ROffset);
1947 return LOffset > ROffset;
1950 // The loads / stores of the same base are in order. Scan them from first to
1951 // last and check for the following:
1952 // 1. Any def of base.
1954 while (Ops.size() > 1) {
1955 unsigned FirstLoc = ~0U;
1956 unsigned LastLoc = 0;
1957 MachineInstr *FirstOp = nullptr;
1958 MachineInstr *LastOp = nullptr;
1960 unsigned LastOpcode = 0;
1961 unsigned LastBytes = 0;
1962 unsigned NumMove = 0;
1963 for (int i = Ops.size() - 1; i >= 0; --i) {
1964 MachineInstr *Op = Ops[i];
1965 unsigned Loc = MI2LocMap[Op];
1966 if (Loc <= FirstLoc) {
1970 if (Loc >= LastLoc) {
1976 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1977 if (LastOpcode && LSMOpcode != LastOpcode)
1980 int Offset = getMemoryOpOffset(Op);
1981 unsigned Bytes = getLSMultipleTransferSize(Op);
1983 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1986 LastOffset = Offset;
1988 LastOpcode = LSMOpcode;
1989 if (++NumMove == 8) // FIXME: Tune this limit.
1996 SmallPtrSet<MachineInstr*, 4> MemOps;
1997 SmallSet<unsigned, 4> MemRegs;
1998 for (int i = NumMove-1; i >= 0; --i) {
1999 MemOps.insert(Ops[i]);
2000 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2003 // Be conservative, if the instructions are too far apart, don't
2004 // move them. We want to limit the increase of register pressure.
2005 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2007 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2008 MemOps, MemRegs, TRI);
2010 for (unsigned i = 0; i != NumMove; ++i)
2013 // This is the new location for the loads / stores.
2014 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2015 while (InsertPos != MBB->end()
2016 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2019 // If we are moving a pair of loads / stores, see if it makes sense
2020 // to try to allocate a pair of registers that can form register pairs.
2021 MachineInstr *Op0 = Ops.back();
2022 MachineInstr *Op1 = Ops[Ops.size()-2];
2023 unsigned FirstReg = 0, SecondReg = 0;
2024 unsigned BaseReg = 0, PredReg = 0;
2025 ARMCC::CondCodes Pred = ARMCC::AL;
2027 unsigned NewOpc = 0;
2030 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2031 FirstReg, SecondReg, BaseReg,
2032 Offset, PredReg, Pred, isT2)) {
2036 const MCInstrDesc &MCID = TII->get(NewOpc);
2037 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2038 MRI->constrainRegClass(FirstReg, TRC);
2039 MRI->constrainRegClass(SecondReg, TRC);
2041 // Form the pair instruction.
2043 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2044 .addReg(FirstReg, RegState::Define)
2045 .addReg(SecondReg, RegState::Define)
2047 // FIXME: We're converting from LDRi12 to an insn that still
2048 // uses addrmode2, so we need an explicit offset reg. It should
2049 // always by reg0 since we're transforming LDRi12s.
2052 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2053 concatenateMemOperands(MIB, Op0, Op1);
2054 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2057 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2061 // FIXME: We're converting from LDRi12 to an insn that still
2062 // uses addrmode2, so we need an explicit offset reg. It should
2063 // always by reg0 since we're transforming STRi12s.
2066 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2067 concatenateMemOperands(MIB, Op0, Op1);
2068 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2075 // Add register allocation hints to form register pairs.
2076 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2077 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2080 for (unsigned i = 0; i != NumMove; ++i) {
2081 MachineInstr *Op = Ops.back();
2083 MBB->splice(InsertPos, MBB, Op);
2087 NumLdStMoved += NumMove;
2097 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2098 bool RetVal = false;
2100 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2101 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2102 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2103 SmallVector<unsigned, 4> LdBases;
2104 SmallVector<unsigned, 4> StBases;
2107 MachineBasicBlock::iterator MBBI = MBB->begin();
2108 MachineBasicBlock::iterator E = MBB->end();
2110 for (; MBBI != E; ++MBBI) {
2111 MachineInstr *MI = MBBI;
2112 if (MI->isCall() || MI->isTerminator()) {
2113 // Stop at barriers.
2118 if (!MI->isDebugValue())
2119 MI2LocMap[MI] = ++Loc;
2121 if (!isMemoryOp(MI))
2123 unsigned PredReg = 0;
2124 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2127 int Opc = MI->getOpcode();
2128 bool isLd = isLoadSingle(Opc);
2129 unsigned Base = MI->getOperand(1).getReg();
2130 int Offset = getMemoryOpOffset(MI);
2132 bool StopHere = false;
2134 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2135 Base2LdsMap.find(Base);
2136 if (BI != Base2LdsMap.end()) {
2137 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2138 if (Offset == getMemoryOpOffset(BI->second[i])) {
2144 BI->second.push_back(MI);
2146 Base2LdsMap[Base].push_back(MI);
2147 LdBases.push_back(Base);
2150 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2151 Base2StsMap.find(Base);
2152 if (BI != Base2StsMap.end()) {
2153 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2154 if (Offset == getMemoryOpOffset(BI->second[i])) {
2160 BI->second.push_back(MI);
2162 Base2StsMap[Base].push_back(MI);
2163 StBases.push_back(Base);
2168 // Found a duplicate (a base+offset combination that's seen earlier).
2175 // Re-schedule loads.
2176 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2177 unsigned Base = LdBases[i];
2178 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2180 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2183 // Re-schedule stores.
2184 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2185 unsigned Base = StBases[i];
2186 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2188 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2192 Base2LdsMap.clear();
2193 Base2StsMap.clear();
2203 /// Returns an instance of the load / store optimization pass.
2204 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2206 return new ARMPreAllocLoadStoreOpt();
2207 return new ARMLoadStoreOpt();