1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file This file contains a pass that performs load / store related peephole
11 /// optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterScavenging.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include "llvm/Target/TargetInstrInfo.h"
43 #include "llvm/Target/TargetMachine.h"
44 #include "llvm/Target/TargetRegisterInfo.h"
47 #define DEBUG_TYPE "arm-ldst-opt"
49 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
50 STATISTIC(NumSTMGened , "Number of stm instructions generated");
51 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
52 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
53 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
54 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
55 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
56 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
57 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
58 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
59 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
62 /// Post- register allocation pass the combine load / store instructions to
63 /// form ldm / stm instructions.
64 struct ARMLoadStoreOpt : public MachineFunctionPass {
66 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
68 const TargetInstrInfo *TII;
69 const TargetRegisterInfo *TRI;
70 const ARMSubtarget *STI;
71 const TargetLowering *TL;
74 bool isThumb1, isThumb2;
76 bool runOnMachineFunction(MachineFunction &Fn) override;
78 const char *getPassName() const override {
79 return "ARM load / store optimization pass";
83 struct MemOpQueueEntry {
88 MachineBasicBlock::iterator MBBI;
90 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
91 MachineBasicBlock::iterator i)
92 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
94 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
95 typedef MemOpQueue::iterator MemOpQueueIter;
97 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
98 const MemOpQueue &MemOps, unsigned DefReg,
99 unsigned RangeBegin, unsigned RangeEnd);
100 void UpdateBaseRegUses(MachineBasicBlock &MBB,
101 MachineBasicBlock::iterator MBBI,
102 DebugLoc dl, unsigned Base, unsigned WordOffset,
103 ARMCC::CondCodes Pred, unsigned PredReg);
104 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
105 int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
106 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
108 ArrayRef<std::pair<unsigned, bool> > Regs,
109 ArrayRef<unsigned> ImpDefs);
110 void MergeOpsUpdate(MachineBasicBlock &MBB,
112 unsigned memOpsBegin,
114 unsigned insertAfter,
119 ARMCC::CondCodes Pred,
123 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
124 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
125 unsigned Opcode, unsigned Size,
126 ARMCC::CondCodes Pred, unsigned PredReg,
127 unsigned Scratch, MemOpQueue &MemOps,
128 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
129 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
130 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
131 MachineBasicBlock::iterator &MBBI);
132 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
133 MachineBasicBlock::iterator MBBI,
134 const TargetInstrInfo *TII,
136 MachineBasicBlock::iterator &I);
137 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
138 MachineBasicBlock::iterator MBBI,
140 MachineBasicBlock::iterator &I);
141 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
142 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
144 char ARMLoadStoreOpt::ID = 0;
147 static bool definesCPSR(const MachineInstr *MI) {
148 for (const auto &MO : MI->operands()) {
151 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
152 // If the instruction has live CPSR def, then it's not safe to fold it
153 // into load / store.
160 static int getMemoryOpOffset(const MachineInstr *MI) {
161 unsigned Opcode = MI->getOpcode();
162 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
163 unsigned NumOperands = MI->getDesc().getNumOperands();
164 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
166 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
167 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
168 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
169 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
172 // Thumb1 immediate offsets are scaled by 4
173 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
174 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
177 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
178 : ARM_AM::getAM5Offset(OffField) * 4;
179 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
180 : ARM_AM::getAM5Op(OffField);
182 if (Op == ARM_AM::sub)
188 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
190 default: llvm_unreachable("Unhandled opcode!");
194 default: llvm_unreachable("Unhandled submode!");
195 case ARM_AM::ia: return ARM::LDMIA;
196 case ARM_AM::da: return ARM::LDMDA;
197 case ARM_AM::db: return ARM::LDMDB;
198 case ARM_AM::ib: return ARM::LDMIB;
203 default: llvm_unreachable("Unhandled submode!");
204 case ARM_AM::ia: return ARM::STMIA;
205 case ARM_AM::da: return ARM::STMDA;
206 case ARM_AM::db: return ARM::STMDB;
207 case ARM_AM::ib: return ARM::STMIB;
211 // tLDMIA is writeback-only - unless the base register is in the input
215 default: llvm_unreachable("Unhandled submode!");
216 case ARM_AM::ia: return ARM::tLDMIA;
220 // There is no non-writeback tSTMIA either.
223 default: llvm_unreachable("Unhandled submode!");
224 case ARM_AM::ia: return ARM::tSTMIA_UPD;
230 default: llvm_unreachable("Unhandled submode!");
231 case ARM_AM::ia: return ARM::t2LDMIA;
232 case ARM_AM::db: return ARM::t2LDMDB;
238 default: llvm_unreachable("Unhandled submode!");
239 case ARM_AM::ia: return ARM::t2STMIA;
240 case ARM_AM::db: return ARM::t2STMDB;
245 default: llvm_unreachable("Unhandled submode!");
246 case ARM_AM::ia: return ARM::VLDMSIA;
247 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
252 default: llvm_unreachable("Unhandled submode!");
253 case ARM_AM::ia: return ARM::VSTMSIA;
254 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
259 default: llvm_unreachable("Unhandled submode!");
260 case ARM_AM::ia: return ARM::VLDMDIA;
261 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
266 default: llvm_unreachable("Unhandled submode!");
267 case ARM_AM::ia: return ARM::VSTMDIA;
268 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
273 static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
275 default: llvm_unreachable("Unhandled opcode!");
282 case ARM::tLDMIA_UPD:
283 case ARM::tSTMIA_UPD:
284 case ARM::t2LDMIA_RET:
286 case ARM::t2LDMIA_UPD:
288 case ARM::t2STMIA_UPD:
290 case ARM::VLDMSIA_UPD:
292 case ARM::VSTMSIA_UPD:
294 case ARM::VLDMDIA_UPD:
296 case ARM::VSTMDIA_UPD:
310 case ARM::t2LDMDB_UPD:
312 case ARM::t2STMDB_UPD:
313 case ARM::VLDMSDB_UPD:
314 case ARM::VSTMSDB_UPD:
315 case ARM::VLDMDDB_UPD:
316 case ARM::VSTMDDB_UPD:
327 static bool isT1i32Load(unsigned Opc) {
328 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
331 static bool isT2i32Load(unsigned Opc) {
332 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
335 static bool isi32Load(unsigned Opc) {
336 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
339 static bool isT1i32Store(unsigned Opc) {
340 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
343 static bool isT2i32Store(unsigned Opc) {
344 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
347 static bool isi32Store(unsigned Opc) {
348 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
351 static unsigned getImmScale(unsigned Opc) {
353 default: llvm_unreachable("Unhandled opcode!");
368 /// Update future uses of the base register with the offset introduced
369 /// due to writeback. This function only works on Thumb1.
371 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
372 MachineBasicBlock::iterator MBBI,
373 DebugLoc dl, unsigned Base,
375 ARMCC::CondCodes Pred, unsigned PredReg) {
376 assert(isThumb1 && "Can only update base register uses for Thumb1!");
377 // Start updating any instructions with immediate offsets. Insert a SUB before
378 // the first non-updateable instruction (if any).
379 for (; MBBI != MBB.end(); ++MBBI) {
380 bool InsertSub = false;
381 unsigned Opc = MBBI->getOpcode();
383 if (MBBI->readsRegister(Base)) {
386 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
388 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
390 if (IsLoad || IsStore) {
391 // Loads and stores with immediate offsets can be updated, but only if
392 // the new offset isn't negative.
393 // The MachineOperand containing the offset immediate is the last one
394 // before predicates.
396 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
397 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
398 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
400 // If storing the base register, it needs to be reset first.
401 unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
403 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
408 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
409 !definesCPSR(MBBI)) {
410 // SUBS/ADDS using this register, with a dead def of the CPSR.
411 // Merge it with the update; if the merged offset is too large,
412 // insert a new sub instead.
414 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
415 Offset = (Opc == ARM::tSUBi8) ?
416 MO.getImm() + WordOffset * 4 :
417 MO.getImm() - WordOffset * 4 ;
418 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
419 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
422 // The base register has now been reset, so exit early.
429 // Can't update the instruction.
433 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
434 // Since SUBS sets the condition flags, we can't place the base reset
435 // after an instruction that has a live CPSR def.
436 // The base register might also contain an argument for a function call.
441 // An instruction above couldn't be updated, so insert a sub.
442 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
443 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
447 if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
448 // Register got killed. Stop updating.
452 // End of block was reached.
453 if (MBB.succ_size() > 0) {
454 // FIXME: Because of a bug, live registers are sometimes missing from
455 // the successor blocks' live-in sets. This means we can't trust that
456 // information and *always* have to reset at the end of a block.
458 if (MBBI != MBB.end()) --MBBI;
460 BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
461 .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
465 /// Create and insert a LDM or STM with Base as base register and registers in
466 /// Regs as the register operands that would be loaded / stored. It returns
467 /// true if the transformation is done.
469 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
470 MachineBasicBlock::iterator MBBI,
471 int Offset, unsigned Base, bool BaseKill,
472 unsigned Opcode, ARMCC::CondCodes Pred,
473 unsigned PredReg, unsigned Scratch, DebugLoc dl,
474 ArrayRef<std::pair<unsigned, bool> > Regs,
475 ArrayRef<unsigned> ImpDefs) {
476 // Only a single register to load / store. Don't bother.
477 unsigned NumRegs = Regs.size();
481 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
482 // Compute liveness information for that register to make the decision.
483 bool SafeToClobberCPSR = !isThumb1 ||
484 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
485 MachineBasicBlock::LQR_Dead);
487 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
489 // Exception: If the base register is in the input reglist, Thumb1 LDM is
491 // It's also not possible to merge an STR of the base register in Thumb1.
493 for (const std::pair<unsigned, bool> &R : Regs)
494 if (Base == R.first) {
495 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
496 if (Opcode == ARM::tLDRi) {
499 } else if (Opcode == ARM::tSTRi) {
504 ARM_AM::AMSubMode Mode = ARM_AM::ia;
505 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
506 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
507 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
509 if (Offset == 4 && haveIBAndDA) {
511 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
513 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
514 // VLDM/VSTM do not support DB mode without also updating the base reg.
516 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
517 // Check if this is a supported opcode before inserting instructions to
518 // calculate a new base register.
519 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
521 // If starting offset isn't zero, insert a MI to materialize a new base.
522 // But only do so if it is cost effective, i.e. merging more than two
527 // On Thumb1, it's not worth materializing a new base register without
528 // clobbering the CPSR (i.e. not using ADDS/SUBS).
529 if (!SafeToClobberCPSR)
533 if (isi32Load(Opcode)) {
534 // If it is a load, then just use one of the destination register to
535 // use as the new base.
536 NewBase = Regs[NumRegs-1].first;
538 // Use the scratch register to use as a new base.
545 isThumb2 ? ARM::t2ADDri :
546 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
547 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
548 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
553 isThumb2 ? ARM::t2SUBri :
554 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
555 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
558 if (!TL->isLegalAddImmediate(Offset))
559 // FIXME: Try add with register operand?
560 return false; // Probably not worth it then.
563 // Thumb1: depending on immediate size, use either
564 // ADDS NewBase, Base, #imm3
567 // ADDS NewBase, #imm8.
568 if (Base != NewBase &&
569 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
570 // Need to insert a MOV to the new base first.
571 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
573 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
574 if (Pred != ARMCC::AL)
576 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
577 .addReg(Base, getKillRegState(BaseKill));
579 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
580 .addReg(Base, getKillRegState(BaseKill))
581 .addImm(Pred).addReg(PredReg);
583 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
587 if (BaseOpc == ARM::tADDrSPi) {
588 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
589 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
590 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4)
591 .addImm(Pred).addReg(PredReg);
593 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
594 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
595 .addImm(Pred).addReg(PredReg);
597 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
598 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
599 .addImm(Pred).addReg(PredReg).addReg(0);
602 BaseKill = true; // New base is always killed straight away.
605 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
606 Opcode == ARM::VLDRD);
608 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
609 // base register writeback.
610 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
611 if (!Opcode) return false;
613 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
614 // - There is no writeback (LDM of base register),
615 // - the base register is killed by the merged instruction,
616 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
617 // to reset the base register.
618 // Otherwise, don't merge.
619 // It's safe to return here since the code to materialize a new base register
620 // above is also conditional on SafeToClobberCPSR.
621 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
624 MachineInstrBuilder MIB;
627 if (Opcode == ARM::tLDMIA)
628 // Update tLDMIA with writeback if necessary.
629 Opcode = ARM::tLDMIA_UPD;
631 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
633 // Thumb1: we might need to set base writeback when building the MI.
634 MIB.addReg(Base, getDefRegState(true))
635 .addReg(Base, getKillRegState(BaseKill));
637 // The base isn't dead after a merged instruction with writeback.
638 // Insert a sub instruction after the newly formed instruction to reset.
640 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
643 // No writeback, simply build the MachineInstr.
644 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
645 MIB.addReg(Base, getKillRegState(BaseKill));
648 MIB.addImm(Pred).addReg(PredReg);
650 for (const std::pair<unsigned, bool> &R : Regs)
651 MIB = MIB.addReg(R.first, getDefRegState(isDef)
652 | getKillRegState(R.second));
654 // Add implicit defs for super-registers.
655 for (unsigned ImpDef : ImpDefs)
656 MIB.addReg(ImpDef, RegState::ImplicitDefine);
661 /// Find all instructions using a given imp-def within a range.
663 /// We are trying to combine a range of instructions, one of which (located at
664 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
665 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
666 /// and RangeEnd must be modified to use an undefined value.
668 /// The live range continues until we find a second definition or one of the
669 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
670 /// we must consider all uses and decide which are relevant in a second pass.
671 void ARMLoadStoreOpt::findUsesOfImpDef(
672 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
673 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
674 std::map<unsigned, MachineOperand *> Uses;
675 unsigned LastLivePos = RangeEnd;
677 // First we find all uses of this register with Position between RangeBegin
678 // and RangeEnd, any or all of these could be uses of a definition at
679 // RangeBegin. We also record the latest position a definition at RangeBegin
680 // would be considered live.
681 for (unsigned i = 0; i < MemOps.size(); ++i) {
682 MachineInstr &MI = *MemOps[i].MBBI;
683 unsigned MIPosition = MemOps[i].Position;
684 if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
687 // If this instruction defines the register, then any later use will be of
688 // that definition rather than ours.
689 if (MI.definesRegister(DefReg))
690 LastLivePos = std::min(LastLivePos, MIPosition);
692 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
696 // If this instruction kills the register then (assuming liveness is
697 // correct when we start) we don't need to think about anything after here.
699 LastLivePos = std::min(LastLivePos, MIPosition);
701 Uses[MIPosition] = UseOp;
704 // Now we traverse the list of all uses, and append the ones that actually use
705 // our definition to the requested list.
706 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
709 // List is sorted by position so once we've found one out of range there
710 // will be no more to consider.
711 if (I->first > LastLivePos)
713 UsesOfImpDefs.push_back(I->second);
717 /// Call MergeOps and update MemOps and merges accordingly on success.
718 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
720 unsigned memOpsBegin, unsigned memOpsEnd,
721 unsigned insertAfter, int Offset,
722 unsigned Base, bool BaseKill,
724 ARMCC::CondCodes Pred, unsigned PredReg,
727 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
728 // First calculate which of the registers should be killed by the merged
730 const unsigned insertPos = memOps[insertAfter].Position;
731 SmallSet<unsigned, 4> KilledRegs;
732 DenseMap<unsigned, unsigned> Killer;
733 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
734 if (i == memOpsBegin) {
739 if (memOps[i].Position < insertPos && memOps[i].isKill) {
740 unsigned Reg = memOps[i].Reg;
741 KilledRegs.insert(Reg);
746 SmallVector<std::pair<unsigned, bool>, 8> Regs;
747 SmallVector<unsigned, 8> ImpDefs;
748 SmallVector<MachineOperand *, 8> UsesOfImpDefs;
749 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
750 unsigned Reg = memOps[i].Reg;
751 // If we are inserting the merged operation after an operation that
752 // uses the same register, make sure to transfer any kill flag.
753 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
754 Regs.push_back(std::make_pair(Reg, isKill));
756 // Collect any implicit defs of super-registers. They must be preserved.
757 for (const MachineOperand &MO : memOps[i].MBBI->operands()) {
758 if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead())
760 unsigned DefReg = MO.getReg();
761 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
762 ImpDefs.push_back(DefReg);
764 // There may be other uses of the definition between this instruction and
765 // the eventual LDM/STM position. These should be marked undef if the
766 // merge takes place.
767 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
772 // Try to do the merge.
773 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
775 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
776 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
779 // Merge succeeded, update records.
780 Merges.push_back(std::prev(Loc));
782 // In gathering loads together, we may have moved the imp-def of a register
783 // past one of its uses. This is OK, since we know better than the rest of
784 // LLVM what's OK with ARM loads and stores; but we still have to adjust the
786 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
787 E = UsesOfImpDefs.end();
791 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
792 // Remove kill flags from any memops that come before insertPos.
793 if (Regs[i-memOpsBegin].second) {
794 unsigned Reg = Regs[i-memOpsBegin].first;
795 if (KilledRegs.count(Reg)) {
796 unsigned j = Killer[Reg];
797 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
798 assert(Idx >= 0 && "Cannot find killing operand");
799 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
800 memOps[j].isKill = false;
802 memOps[i].isKill = true;
804 MBB.erase(memOps[i].MBBI);
805 // Update this memop to refer to the merged instruction.
806 // We may need to move kill flags again.
807 memOps[i].Merged = true;
808 memOps[i].MBBI = Merges.back();
809 memOps[i].Position = insertPos;
812 // Update memOps offsets, since they may have been modified by MergeOps.
813 for (auto &MemOp : memOps) {
814 MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
818 /// Merge a number of load / store instructions into one or more load / store
819 /// multiple instructions.
821 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
822 unsigned Base, unsigned Opcode, unsigned Size,
823 ARMCC::CondCodes Pred, unsigned PredReg,
824 unsigned Scratch, MemOpQueue &MemOps,
825 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
826 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
827 int Offset = MemOps[SIndex].Offset;
828 int SOffset = Offset;
829 unsigned insertAfter = SIndex;
830 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
831 DebugLoc dl = Loc->getDebugLoc();
832 const MachineOperand &PMO = Loc->getOperand(0);
833 unsigned PReg = PMO.getReg();
834 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
836 unsigned Limit = ~0U;
837 bool BaseKill = false;
838 // vldm / vstm limit are 32 for S variants, 16 for D variants.
856 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
857 int NewOffset = MemOps[i].Offset;
858 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
859 unsigned Reg = MO.getReg();
860 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
861 // Register numbers must be in ascending order. For VFP / NEON load and
862 // store multiples, the registers must also be consecutive and within the
863 // limit on the number of registers per instruction.
864 if (Reg != ARM::SP &&
865 NewOffset == Offset + (int)Size &&
866 ((isNotVFP && RegNum > PRegNum) ||
867 ((Count < Limit) && RegNum == PRegNum+1)) &&
868 // On Swift we don't want vldm/vstm to start with a odd register num
869 // because Q register unaligned vldm/vstm need more uops.
870 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
875 // Can't merge this in. Try merge the earlier ones first.
876 // We need to compute BaseKill here because the MemOps may have been
878 BaseKill = Loc->killsRegister(Base);
880 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
881 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
882 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
887 if (MemOps[i].Position > MemOps[insertAfter].Position) {
889 Loc = MemOps[i].MBBI;
893 BaseKill = Loc->killsRegister(Base);
894 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
895 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
898 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
899 unsigned Bytes, unsigned Limit,
900 ARMCC::CondCodes Pred, unsigned PredReg) {
901 unsigned MyPredReg = 0;
905 bool CheckCPSRDef = false;
906 switch (MI->getOpcode()) {
907 default: return false;
917 // Make sure the offset fits in 8 bits.
918 if (Bytes == 0 || (Limit && Bytes >= Limit))
921 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
922 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
923 if (!(MI->getOperand(0).getReg() == Base &&
924 MI->getOperand(1).getReg() == Base &&
925 (MI->getOperand(2).getImm() * Scale) == Bytes &&
926 getInstrPredicate(MI, MyPredReg) == Pred &&
927 MyPredReg == PredReg))
930 return CheckCPSRDef ? !definesCPSR(MI) : true;
933 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
934 unsigned Bytes, unsigned Limit,
935 ARMCC::CondCodes Pred, unsigned PredReg) {
936 unsigned MyPredReg = 0;
940 bool CheckCPSRDef = false;
941 switch (MI->getOpcode()) {
942 default: return false;
952 if (Bytes == 0 || (Limit && Bytes >= Limit))
953 // Make sure the offset fits in 8 bits.
956 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
957 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
958 if (!(MI->getOperand(0).getReg() == Base &&
959 MI->getOperand(1).getReg() == Base &&
960 (MI->getOperand(2).getImm() * Scale) == Bytes &&
961 getInstrPredicate(MI, MyPredReg) == Pred &&
962 MyPredReg == PredReg))
965 return CheckCPSRDef ? !definesCPSR(MI) : true;
968 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
969 switch (MI->getOpcode()) {
996 case ARM::tLDMIA_UPD:
997 case ARM::tSTMIA_UPD:
1004 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
1007 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
1011 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1012 ARM_AM::AMSubMode Mode) {
1014 default: llvm_unreachable("Unhandled opcode!");
1020 default: llvm_unreachable("Unhandled submode!");
1021 case ARM_AM::ia: return ARM::LDMIA_UPD;
1022 case ARM_AM::ib: return ARM::LDMIB_UPD;
1023 case ARM_AM::da: return ARM::LDMDA_UPD;
1024 case ARM_AM::db: return ARM::LDMDB_UPD;
1031 default: llvm_unreachable("Unhandled submode!");
1032 case ARM_AM::ia: return ARM::STMIA_UPD;
1033 case ARM_AM::ib: return ARM::STMIB_UPD;
1034 case ARM_AM::da: return ARM::STMDA_UPD;
1035 case ARM_AM::db: return ARM::STMDB_UPD;
1040 default: llvm_unreachable("Unhandled submode!");
1041 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1042 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1047 default: llvm_unreachable("Unhandled submode!");
1048 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1049 case ARM_AM::db: return ARM::t2STMDB_UPD;
1053 default: llvm_unreachable("Unhandled submode!");
1054 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1055 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1059 default: llvm_unreachable("Unhandled submode!");
1060 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1061 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1065 default: llvm_unreachable("Unhandled submode!");
1066 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1067 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1071 default: llvm_unreachable("Unhandled submode!");
1072 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1073 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1078 /// Fold proceeding/trailing inc/dec of base register into the
1079 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1081 /// stmia rn, <ra, rb, rc>
1082 /// rn := rn + 4 * 3;
1084 /// stmia rn!, <ra, rb, rc>
1086 /// rn := rn - 4 * 3;
1087 /// ldmia rn, <ra, rb, rc>
1089 /// ldmdb rn!, <ra, rb, rc>
1090 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
1091 MachineBasicBlock::iterator MBBI,
1093 MachineBasicBlock::iterator &I) {
1094 // Thumb1 is already using updating loads/stores.
1095 if (isThumb1) return false;
1097 MachineInstr *MI = MBBI;
1098 unsigned Base = MI->getOperand(0).getReg();
1099 bool BaseKill = MI->getOperand(0).isKill();
1100 unsigned Bytes = getLSMultipleTransferSize(MI);
1101 unsigned PredReg = 0;
1102 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1103 unsigned Opcode = MI->getOpcode();
1104 DebugLoc dl = MI->getDebugLoc();
1106 // Can't use an updating ld/st if the base register is also a dest
1107 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1108 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1109 if (MI->getOperand(i).getReg() == Base)
1112 bool DoMerge = false;
1113 ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
1115 // Try merging with the previous instruction.
1116 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1117 if (MBBI != BeginMBBI) {
1118 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1119 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1121 if (Mode == ARM_AM::ia &&
1122 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1125 } else if (Mode == ARM_AM::ib &&
1126 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1131 MBB.erase(PrevMBBI);
1134 // Try merging with the next instruction.
1135 MachineBasicBlock::iterator EndMBBI = MBB.end();
1136 if (!DoMerge && MBBI != EndMBBI) {
1137 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1138 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1140 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
1141 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1143 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
1144 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1148 if (NextMBBI == I) {
1152 MBB.erase(NextMBBI);
1159 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1160 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1161 .addReg(Base, getDefRegState(true)) // WB base register
1162 .addReg(Base, getKillRegState(BaseKill))
1163 .addImm(Pred).addReg(PredReg);
1165 // Transfer the rest of operands.
1166 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1167 MIB.addOperand(MI->getOperand(OpNum));
1169 // Transfer memoperands.
1170 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1176 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1177 ARM_AM::AddrOpc Mode) {
1180 return ARM::LDR_PRE_IMM;
1182 return ARM::STR_PRE_IMM;
1184 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1186 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1188 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1190 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1193 return ARM::t2LDR_PRE;
1196 return ARM::t2STR_PRE;
1197 default: llvm_unreachable("Unhandled opcode!");
1201 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1202 ARM_AM::AddrOpc Mode) {
1205 return ARM::LDR_POST_IMM;
1207 return ARM::STR_POST_IMM;
1209 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1211 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1213 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1215 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1218 return ARM::t2LDR_POST;
1221 return ARM::t2STR_POST;
1222 default: llvm_unreachable("Unhandled opcode!");
1226 /// Fold proceeding/trailing inc/dec of base register into the
1227 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1228 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
1229 MachineBasicBlock::iterator MBBI,
1230 const TargetInstrInfo *TII,
1232 MachineBasicBlock::iterator &I) {
1233 // Thumb1 doesn't have updating LDR/STR.
1234 // FIXME: Use LDM/STM with single register instead.
1235 if (isThumb1) return false;
1237 MachineInstr *MI = MBBI;
1238 unsigned Base = MI->getOperand(1).getReg();
1239 bool BaseKill = MI->getOperand(1).isKill();
1240 unsigned Bytes = getLSMultipleTransferSize(MI);
1241 unsigned Opcode = MI->getOpcode();
1242 DebugLoc dl = MI->getDebugLoc();
1243 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1244 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1245 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1246 if (isi32Load(Opcode) || isi32Store(Opcode))
1247 if (MI->getOperand(2).getImm() != 0)
1249 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1252 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
1253 // Can't do the merge if the destination register is the same as the would-be
1254 // writeback register.
1255 if (MI->getOperand(0).getReg() == Base)
1258 unsigned PredReg = 0;
1259 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1260 bool DoMerge = false;
1261 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1262 unsigned NewOpc = 0;
1263 // AM2 - 12 bits, thumb2 - 8 bits.
1264 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1266 // Try merging with the previous instruction.
1267 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1268 if (MBBI != BeginMBBI) {
1269 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1270 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1272 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1274 AddSub = ARM_AM::sub;
1275 } else if (!isAM5 &&
1276 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1280 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1281 MBB.erase(PrevMBBI);
1285 // Try merging with the next instruction.
1286 MachineBasicBlock::iterator EndMBBI = MBB.end();
1287 if (!DoMerge && MBBI != EndMBBI) {
1288 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1289 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1292 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1294 AddSub = ARM_AM::sub;
1295 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1299 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1300 if (NextMBBI == I) {
1304 MBB.erase(NextMBBI);
1312 // VLDM[SD]_UPD, VSTM[SD]_UPD
1313 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1314 // updating load/store-multiple instructions can be used with only one
1316 MachineOperand &MO = MI->getOperand(0);
1317 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1318 .addReg(Base, getDefRegState(true)) // WB base register
1319 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1320 .addImm(Pred).addReg(PredReg)
1321 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1322 getKillRegState(MO.isKill())));
1325 // LDR_PRE, LDR_POST
1326 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1327 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1328 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1329 .addReg(Base, RegState::Define)
1330 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1332 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1333 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1334 .addReg(Base, RegState::Define)
1335 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1338 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1339 // t2LDR_PRE, t2LDR_POST
1340 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1341 .addReg(Base, RegState::Define)
1342 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1345 MachineOperand &MO = MI->getOperand(0);
1346 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1347 // the vestigal zero-reg offset register. When that's fixed, this clause
1348 // can be removed entirely.
1349 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1350 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1351 // STR_PRE, STR_POST
1352 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1353 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1354 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1356 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1357 // t2STR_PRE, t2STR_POST
1358 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1359 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1360 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1368 /// Returns true if instruction is a memory operation that this pass is capable
1369 /// of operating on.
1370 static bool isMemoryOp(const MachineInstr *MI) {
1371 // When no memory operands are present, conservatively assume unaligned,
1372 // volatile, unfoldable.
1373 if (!MI->hasOneMemOperand())
1376 const MachineMemOperand *MMO = *MI->memoperands_begin();
1378 // Don't touch volatile memory accesses - we may be changing their order.
1379 if (MMO->isVolatile())
1382 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1384 if (MMO->getAlignment() < 4)
1387 // str <undef> could probably be eliminated entirely, but for now we just want
1388 // to avoid making a mess of it.
1389 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1390 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1391 MI->getOperand(0).isUndef())
1394 // Likewise don't mess with references to undefined addresses.
1395 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1396 MI->getOperand(1).isUndef())
1399 unsigned Opcode = MI->getOpcode();
1404 return MI->getOperand(1).isReg();
1407 return MI->getOperand(1).isReg();
1418 return MI->getOperand(1).isReg();
1423 /// Advance register scavenger to just before the earliest memory op that is
1425 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1426 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1427 unsigned Position = MemOps[0].Position;
1428 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1429 if (MemOps[i].Position < Position) {
1430 Position = MemOps[i].Position;
1431 Loc = MemOps[i].MBBI;
1435 if (Loc != MBB.begin())
1436 RS->forward(std::prev(Loc));
1439 static void InsertLDR_STR(MachineBasicBlock &MBB,
1440 MachineBasicBlock::iterator &MBBI,
1441 int Offset, bool isDef,
1442 DebugLoc dl, unsigned NewOpc,
1443 unsigned Reg, bool RegDeadKill, bool RegUndef,
1444 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1445 bool OffKill, bool OffUndef,
1446 ARMCC::CondCodes Pred, unsigned PredReg,
1447 const TargetInstrInfo *TII, bool isT2) {
1449 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1451 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1452 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1453 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1455 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1457 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1458 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1459 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1463 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1464 MachineBasicBlock::iterator &MBBI) {
1465 MachineInstr *MI = &*MBBI;
1466 unsigned Opcode = MI->getOpcode();
1467 if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1470 const MachineOperand &BaseOp = MI->getOperand(2);
1471 unsigned BaseReg = BaseOp.getReg();
1472 unsigned EvenReg = MI->getOperand(0).getReg();
1473 unsigned OddReg = MI->getOperand(1).getReg();
1474 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1475 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1477 // ARM errata 602117: LDRD with base in list may result in incorrect base
1478 // register when interrupted or faulted.
1479 bool Errata602117 = EvenReg == BaseReg &&
1480 (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1481 // ARM LDRD/STRD needs consecutive registers.
1482 bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1483 (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1485 if (!Errata602117 && !NonConsecutiveRegs)
1488 MachineBasicBlock::iterator NewBBI = MBBI;
1489 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1490 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1491 bool EvenDeadKill = isLd ?
1492 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1493 bool EvenUndef = MI->getOperand(0).isUndef();
1494 bool OddDeadKill = isLd ?
1495 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1496 bool OddUndef = MI->getOperand(1).isUndef();
1497 bool BaseKill = BaseOp.isKill();
1498 bool BaseUndef = BaseOp.isUndef();
1499 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1500 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1501 int OffImm = getMemoryOpOffset(MI);
1502 unsigned PredReg = 0;
1503 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1505 if (OddRegNum > EvenRegNum && OffImm == 0) {
1506 // Ascending register numbers and no offset. It's safe to change it to a
1508 unsigned NewOpc = (isLd)
1509 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1510 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1512 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1513 .addReg(BaseReg, getKillRegState(BaseKill))
1514 .addImm(Pred).addReg(PredReg)
1515 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1516 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1519 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1520 .addReg(BaseReg, getKillRegState(BaseKill))
1521 .addImm(Pred).addReg(PredReg)
1523 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1525 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1528 NewBBI = std::prev(MBBI);
1530 // Split into two instructions.
1531 unsigned NewOpc = (isLd)
1532 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1533 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1534 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1535 // so adjust and use t2LDRi12 here for that.
1536 unsigned NewOpc2 = (isLd)
1537 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1538 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1539 DebugLoc dl = MBBI->getDebugLoc();
1540 // If this is a load and base register is killed, it may have been
1541 // re-defed by the load, make sure the first load does not clobber it.
1543 (BaseKill || OffKill) &&
1544 (TRI->regsOverlap(EvenReg, BaseReg))) {
1545 assert(!TRI->regsOverlap(OddReg, BaseReg));
1546 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1547 OddReg, OddDeadKill, false,
1548 BaseReg, false, BaseUndef, false, OffUndef,
1549 Pred, PredReg, TII, isT2);
1550 NewBBI = std::prev(MBBI);
1551 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1552 EvenReg, EvenDeadKill, false,
1553 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1554 Pred, PredReg, TII, isT2);
1556 if (OddReg == EvenReg && EvenDeadKill) {
1557 // If the two source operands are the same, the kill marker is
1558 // probably on the first one. e.g.
1559 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1560 EvenDeadKill = false;
1563 // Never kill the base register in the first instruction.
1564 if (EvenReg == BaseReg)
1565 EvenDeadKill = false;
1566 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1567 EvenReg, EvenDeadKill, EvenUndef,
1568 BaseReg, false, BaseUndef, false, OffUndef,
1569 Pred, PredReg, TII, isT2);
1570 NewBBI = std::prev(MBBI);
1571 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1572 OddReg, OddDeadKill, OddUndef,
1573 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1574 Pred, PredReg, TII, isT2);
1587 /// An optimization pass to turn multiple LDR / STR ops of the same base and
1588 /// incrementing offset into LDM / STM ops.
1589 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1590 unsigned NumMerges = 0;
1591 unsigned NumMemOps = 0;
1593 unsigned CurrBase = 0;
1594 unsigned CurrOpc = ~0u;
1595 unsigned CurrSize = 0;
1596 ARMCC::CondCodes CurrPred = ARMCC::AL;
1597 unsigned CurrPredReg = 0;
1598 unsigned Position = 0;
1599 SmallVector<MachineBasicBlock::iterator,4> Merges;
1601 RS->enterBasicBlock(&MBB);
1602 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1604 if (FixInvalidRegPairOp(MBB, MBBI))
1607 bool Advance = false;
1608 bool TryMerge = false;
1610 bool isMemOp = isMemoryOp(MBBI);
1612 unsigned Opcode = MBBI->getOpcode();
1613 unsigned Size = getLSMultipleTransferSize(MBBI);
1614 const MachineOperand &MO = MBBI->getOperand(0);
1615 unsigned Reg = MO.getReg();
1616 bool isKill = MO.isDef() ? false : MO.isKill();
1617 unsigned Base = MBBI->getOperand(1).getReg();
1618 unsigned PredReg = 0;
1619 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1620 int Offset = getMemoryOpOffset(MBBI);
1623 // r5 := ldr [r5, #4]
1624 // r6 := ldr [r5, #8]
1626 // The second ldr has effectively broken the chain even though it
1627 // looks like the later ldr(s) use the same base register. Try to
1628 // merge the ldr's so far, including this one. But don't try to
1629 // combine the following ldr(s).
1630 bool Clobber = isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg();
1633 // r4 := ldr [r0, #8]
1634 // r4 := ldr [r0, #4]
1636 // The optimization may reorder the second ldr in front of the first
1637 // ldr, which violates write after write(WAW) dependence. The same as
1638 // str. Try to merge inst(s) already in MemOps.
1639 bool Overlap = false;
1640 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1641 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1647 if (CurrBase == 0 && !Clobber) {
1648 // Start of a new chain.
1653 CurrPredReg = PredReg;
1654 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1657 } else if (!Overlap) {
1663 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1664 // No need to match PredReg.
1665 // Continue adding to the queue.
1666 if (Offset > MemOps.back().Offset) {
1667 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1672 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1674 if (Offset < I->Offset) {
1675 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1680 } else if (Offset == I->Offset) {
1681 // Collision! This can't be merged!
1690 if (MBBI->isDebugValue()) {
1693 // Reach the end of the block, try merging the memory instructions.
1695 } else if (Advance) {
1699 // Reach the end of the block, try merging the memory instructions.
1706 if (NumMemOps > 1) {
1707 // Try to find a free register to use as a new base in case it's needed.
1708 // First advance to the instruction just before the start of the chain.
1709 AdvanceRS(MBB, MemOps);
1711 // Find a scratch register.
1713 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
1715 // Process the load / store instructions.
1716 RS->forward(std::prev(MBBI));
1720 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1721 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1723 // Try folding preceding/trailing base inc/dec into the generated
1725 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1726 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1728 NumMerges += Merges.size();
1730 // Try folding preceding/trailing base inc/dec into those load/store
1731 // that were not merged to form LDM/STM ops.
1732 for (unsigned i = 0; i != NumMemOps; ++i)
1733 if (!MemOps[i].Merged)
1734 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1737 // RS may be pointing to an instruction that's deleted.
1738 RS->skipTo(std::prev(MBBI));
1739 } else if (NumMemOps == 1) {
1740 // Try folding preceding/trailing base inc/dec into the single
1742 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1744 RS->forward(std::prev(MBBI));
1751 CurrPred = ARMCC::AL;
1758 // If iterator hasn't been advanced and this is not a memory op, skip it.
1759 // It can't start a new chain anyway.
1760 if (!Advance && !isMemOp && MBBI != E) {
1766 return NumMerges > 0;
1769 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
1770 /// into the preceding stack restore so it directly restore the value of LR
1772 /// ldmfd sp!, {..., lr}
1775 /// ldmfd sp!, {..., lr}
1778 /// ldmfd sp!, {..., pc}
1779 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1780 // Thumb1 LDM doesn't allow high registers.
1781 if (isThumb1) return false;
1782 if (MBB.empty()) return false;
1784 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1785 if (MBBI != MBB.begin() &&
1786 (MBBI->getOpcode() == ARM::BX_RET ||
1787 MBBI->getOpcode() == ARM::tBX_RET ||
1788 MBBI->getOpcode() == ARM::MOVPCLR)) {
1789 MachineInstr *PrevMI = std::prev(MBBI);
1790 unsigned Opcode = PrevMI->getOpcode();
1791 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1792 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1793 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1794 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1795 if (MO.getReg() != ARM::LR)
1797 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1798 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1799 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1800 PrevMI->setDesc(TII->get(NewOpc));
1802 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1810 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1811 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1812 TL = STI->getTargetLowering();
1813 AFI = Fn.getInfo<ARMFunctionInfo>();
1814 TII = STI->getInstrInfo();
1815 TRI = STI->getRegisterInfo();
1816 RS = new RegScavenger();
1817 isThumb2 = AFI->isThumb2Function();
1818 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1820 bool Modified = false;
1821 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1823 MachineBasicBlock &MBB = *MFI;
1824 Modified |= LoadStoreMultipleOpti(MBB);
1825 if (STI->hasV5TOps())
1826 Modified |= MergeReturnIntoLDM(MBB);
1834 /// Pre- register allocation pass that move load / stores from consecutive
1835 /// locations close to make it more likely they will be combined later.
1836 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1838 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1840 const DataLayout *TD;
1841 const TargetInstrInfo *TII;
1842 const TargetRegisterInfo *TRI;
1843 const ARMSubtarget *STI;
1844 MachineRegisterInfo *MRI;
1845 MachineFunction *MF;
1847 bool runOnMachineFunction(MachineFunction &Fn) override;
1849 const char *getPassName() const override {
1850 return "ARM pre- register allocation load / store optimization pass";
1854 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1855 unsigned &NewOpc, unsigned &EvenReg,
1856 unsigned &OddReg, unsigned &BaseReg,
1858 unsigned &PredReg, ARMCC::CondCodes &Pred,
1860 bool RescheduleOps(MachineBasicBlock *MBB,
1861 SmallVectorImpl<MachineInstr *> &Ops,
1862 unsigned Base, bool isLd,
1863 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1864 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1866 char ARMPreAllocLoadStoreOpt::ID = 0;
1869 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1870 TD = Fn.getTarget().getDataLayout();
1871 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1872 TII = STI->getInstrInfo();
1873 TRI = STI->getRegisterInfo();
1874 MRI = &Fn.getRegInfo();
1877 bool Modified = false;
1878 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1880 Modified |= RescheduleLoadStoreInstrs(MFI);
1885 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1886 MachineBasicBlock::iterator I,
1887 MachineBasicBlock::iterator E,
1888 SmallPtrSetImpl<MachineInstr*> &MemOps,
1889 SmallSet<unsigned, 4> &MemRegs,
1890 const TargetRegisterInfo *TRI) {
1891 // Are there stores / loads / calls between them?
1892 // FIXME: This is overly conservative. We should make use of alias information
1894 SmallSet<unsigned, 4> AddedRegPressure;
1896 if (I->isDebugValue() || MemOps.count(&*I))
1898 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1900 if (isLd && I->mayStore())
1905 // It's not safe to move the first 'str' down.
1908 // str r4, [r0, #+4]
1912 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1913 MachineOperand &MO = I->getOperand(j);
1916 unsigned Reg = MO.getReg();
1917 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1919 if (Reg != Base && !MemRegs.count(Reg))
1920 AddedRegPressure.insert(Reg);
1924 // Estimate register pressure increase due to the transformation.
1925 if (MemRegs.size() <= 4)
1926 // Ok if we are moving small number of instructions.
1928 return AddedRegPressure.size() <= MemRegs.size() * 2;
1932 /// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
1933 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1934 MachineInstr *Op1) {
1935 assert(MI->memoperands_empty() && "expected a new machineinstr");
1936 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1937 + (Op1->memoperands_end() - Op1->memoperands_begin());
1939 MachineFunction *MF = MI->getParent()->getParent();
1940 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1941 MachineSDNode::mmo_iterator MemEnd =
1942 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1944 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1945 MI->setMemRefs(MemBegin, MemEnd);
1949 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1950 DebugLoc &dl, unsigned &NewOpc,
1952 unsigned &SecondReg,
1953 unsigned &BaseReg, int &Offset,
1955 ARMCC::CondCodes &Pred,
1957 // Make sure we're allowed to generate LDRD/STRD.
1958 if (!STI->hasV5TEOps())
1961 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1963 unsigned Opcode = Op0->getOpcode();
1964 if (Opcode == ARM::LDRi12) {
1966 } else if (Opcode == ARM::STRi12) {
1968 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1969 NewOpc = ARM::t2LDRDi8;
1972 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1973 NewOpc = ARM::t2STRDi8;
1980 // Make sure the base address satisfies i64 ld / st alignment requirement.
1981 // At the moment, we ignore the memoryoperand's value.
1982 // If we want to use AliasAnalysis, we should check it accordingly.
1983 if (!Op0->hasOneMemOperand() ||
1984 (*Op0->memoperands_begin())->isVolatile())
1987 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1988 const Function *Func = MF->getFunction();
1989 unsigned ReqAlign = STI->hasV6Ops()
1990 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1991 : 8; // Pre-v6 need 8-byte align
1992 if (Align < ReqAlign)
1995 // Then make sure the immediate offset fits.
1996 int OffImm = getMemoryOpOffset(Op0);
1998 int Limit = (1 << 8) * Scale;
1999 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2003 ARM_AM::AddrOpc AddSub = ARM_AM::add;
2005 AddSub = ARM_AM::sub;
2008 int Limit = (1 << 8) * Scale;
2009 if (OffImm >= Limit || (OffImm & (Scale-1)))
2011 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2013 FirstReg = Op0->getOperand(0).getReg();
2014 SecondReg = Op1->getOperand(0).getReg();
2015 if (FirstReg == SecondReg)
2017 BaseReg = Op0->getOperand(1).getReg();
2018 Pred = getInstrPredicate(Op0, PredReg);
2019 dl = Op0->getDebugLoc();
2023 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2024 SmallVectorImpl<MachineInstr *> &Ops,
2025 unsigned Base, bool isLd,
2026 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2027 bool RetVal = false;
2029 // Sort by offset (in reverse order).
2030 std::sort(Ops.begin(), Ops.end(),
2031 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2032 int LOffset = getMemoryOpOffset(LHS);
2033 int ROffset = getMemoryOpOffset(RHS);
2034 assert(LHS == RHS || LOffset != ROffset);
2035 return LOffset > ROffset;
2038 // The loads / stores of the same base are in order. Scan them from first to
2039 // last and check for the following:
2040 // 1. Any def of base.
2042 while (Ops.size() > 1) {
2043 unsigned FirstLoc = ~0U;
2044 unsigned LastLoc = 0;
2045 MachineInstr *FirstOp = nullptr;
2046 MachineInstr *LastOp = nullptr;
2048 unsigned LastOpcode = 0;
2049 unsigned LastBytes = 0;
2050 unsigned NumMove = 0;
2051 for (int i = Ops.size() - 1; i >= 0; --i) {
2052 MachineInstr *Op = Ops[i];
2053 unsigned Loc = MI2LocMap[Op];
2054 if (Loc <= FirstLoc) {
2058 if (Loc >= LastLoc) {
2064 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2065 if (LastOpcode && LSMOpcode != LastOpcode)
2068 int Offset = getMemoryOpOffset(Op);
2069 unsigned Bytes = getLSMultipleTransferSize(Op);
2071 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2074 LastOffset = Offset;
2076 LastOpcode = LSMOpcode;
2077 if (++NumMove == 8) // FIXME: Tune this limit.
2084 SmallPtrSet<MachineInstr*, 4> MemOps;
2085 SmallSet<unsigned, 4> MemRegs;
2086 for (int i = NumMove-1; i >= 0; --i) {
2087 MemOps.insert(Ops[i]);
2088 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2091 // Be conservative, if the instructions are too far apart, don't
2092 // move them. We want to limit the increase of register pressure.
2093 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2095 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2096 MemOps, MemRegs, TRI);
2098 for (unsigned i = 0; i != NumMove; ++i)
2101 // This is the new location for the loads / stores.
2102 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2103 while (InsertPos != MBB->end()
2104 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2107 // If we are moving a pair of loads / stores, see if it makes sense
2108 // to try to allocate a pair of registers that can form register pairs.
2109 MachineInstr *Op0 = Ops.back();
2110 MachineInstr *Op1 = Ops[Ops.size()-2];
2111 unsigned FirstReg = 0, SecondReg = 0;
2112 unsigned BaseReg = 0, PredReg = 0;
2113 ARMCC::CondCodes Pred = ARMCC::AL;
2115 unsigned NewOpc = 0;
2118 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2119 FirstReg, SecondReg, BaseReg,
2120 Offset, PredReg, Pred, isT2)) {
2124 const MCInstrDesc &MCID = TII->get(NewOpc);
2125 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2126 MRI->constrainRegClass(FirstReg, TRC);
2127 MRI->constrainRegClass(SecondReg, TRC);
2129 // Form the pair instruction.
2131 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2132 .addReg(FirstReg, RegState::Define)
2133 .addReg(SecondReg, RegState::Define)
2135 // FIXME: We're converting from LDRi12 to an insn that still
2136 // uses addrmode2, so we need an explicit offset reg. It should
2137 // always by reg0 since we're transforming LDRi12s.
2140 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2141 concatenateMemOperands(MIB, Op0, Op1);
2142 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2145 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2149 // FIXME: We're converting from LDRi12 to an insn that still
2150 // uses addrmode2, so we need an explicit offset reg. It should
2151 // always by reg0 since we're transforming STRi12s.
2154 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2155 concatenateMemOperands(MIB, Op0, Op1);
2156 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2163 // Add register allocation hints to form register pairs.
2164 MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2165 MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
2168 for (unsigned i = 0; i != NumMove; ++i) {
2169 MachineInstr *Op = Ops.back();
2171 MBB->splice(InsertPos, MBB, Op);
2175 NumLdStMoved += NumMove;
2185 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2186 bool RetVal = false;
2188 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2189 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2190 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2191 SmallVector<unsigned, 4> LdBases;
2192 SmallVector<unsigned, 4> StBases;
2195 MachineBasicBlock::iterator MBBI = MBB->begin();
2196 MachineBasicBlock::iterator E = MBB->end();
2198 for (; MBBI != E; ++MBBI) {
2199 MachineInstr *MI = MBBI;
2200 if (MI->isCall() || MI->isTerminator()) {
2201 // Stop at barriers.
2206 if (!MI->isDebugValue())
2207 MI2LocMap[MI] = ++Loc;
2209 if (!isMemoryOp(MI))
2211 unsigned PredReg = 0;
2212 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2215 int Opc = MI->getOpcode();
2216 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
2217 unsigned Base = MI->getOperand(1).getReg();
2218 int Offset = getMemoryOpOffset(MI);
2220 bool StopHere = false;
2222 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2223 Base2LdsMap.find(Base);
2224 if (BI != Base2LdsMap.end()) {
2225 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2226 if (Offset == getMemoryOpOffset(BI->second[i])) {
2232 BI->second.push_back(MI);
2234 Base2LdsMap[Base].push_back(MI);
2235 LdBases.push_back(Base);
2238 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2239 Base2StsMap.find(Base);
2240 if (BI != Base2StsMap.end()) {
2241 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2242 if (Offset == getMemoryOpOffset(BI->second[i])) {
2248 BI->second.push_back(MI);
2250 Base2StsMap[Base].push_back(MI);
2251 StBases.push_back(Base);
2256 // Found a duplicate (a base+offset combination that's seen earlier).
2263 // Re-schedule loads.
2264 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2265 unsigned Base = LdBases[i];
2266 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2268 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2271 // Re-schedule stores.
2272 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2273 unsigned Base = StBases[i];
2274 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2276 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2280 Base2LdsMap.clear();
2281 Base2StsMap.clear();
2291 /// Returns an instance of the load / store optimization pass.
2292 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2294 return new ARMPreAllocLoadStoreOpt();
2295 return new ARMLoadStoreOpt();