1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "Thumb1RegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterScavenging.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Target/TargetInstrInfo.h"
42 #include "llvm/Target/TargetMachine.h"
43 #include "llvm/Target/TargetRegisterInfo.h"
46 #define DEBUG_TYPE "arm-ldst-opt"
48 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
49 STATISTIC(NumSTMGened , "Number of stm instructions generated");
50 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
51 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
52 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
53 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
54 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
55 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
56 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
57 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
58 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
60 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
61 /// load / store instructions to form ldm / stm instructions.
64 struct ARMLoadStoreOpt : public MachineFunctionPass {
66 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
68 const TargetInstrInfo *TII;
69 const TargetRegisterInfo *TRI;
70 const ARMSubtarget *STI;
71 const TargetLowering *TL;
74 bool isThumb1, isThumb2;
76 bool runOnMachineFunction(MachineFunction &Fn) override;
78 const char *getPassName() const override {
79 return "ARM load / store optimization pass";
83 struct MemOpQueueEntry {
88 MachineBasicBlock::iterator MBBI;
90 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
91 MachineBasicBlock::iterator i)
92 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
94 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
95 typedef MemOpQueue::iterator MemOpQueueIter;
97 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
98 const MemOpQueue &MemOps, unsigned DefReg,
99 unsigned RangeBegin, unsigned RangeEnd);
100 void UpdateBaseRegUses(MachineBasicBlock &MBB,
101 MachineBasicBlock::iterator MBBI,
102 DebugLoc dl, unsigned Base, unsigned WordOffset,
103 ARMCC::CondCodes Pred, unsigned PredReg);
104 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
105 int Offset, unsigned Base, bool BaseKill, int Opcode,
106 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
108 ArrayRef<std::pair<unsigned, bool> > Regs,
109 ArrayRef<unsigned> ImpDefs);
110 void MergeOpsUpdate(MachineBasicBlock &MBB,
112 unsigned memOpsBegin,
114 unsigned insertAfter,
119 ARMCC::CondCodes Pred,
123 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
124 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
125 int Opcode, unsigned Size,
126 ARMCC::CondCodes Pred, unsigned PredReg,
127 unsigned Scratch, MemOpQueue &MemOps,
128 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
129 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
130 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
131 MachineBasicBlock::iterator &MBBI);
132 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
133 MachineBasicBlock::iterator MBBI,
134 const TargetInstrInfo *TII,
136 MachineBasicBlock::iterator &I);
137 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
138 MachineBasicBlock::iterator MBBI,
140 MachineBasicBlock::iterator &I);
141 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
142 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
144 char ARMLoadStoreOpt::ID = 0;
147 static bool definesCPSR(const MachineInstr *MI) {
148 for (const auto &MO : MI->operands()) {
151 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
152 // If the instruction has live CPSR def, then it's not safe to fold it
153 // into load / store.
160 static int getMemoryOpOffset(const MachineInstr *MI) {
161 int Opcode = MI->getOpcode();
162 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
163 unsigned NumOperands = MI->getDesc().getNumOperands();
164 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
166 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
167 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
168 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
169 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
172 // Thumb1 immediate offsets are scaled by 4
173 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
176 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
177 : ARM_AM::getAM5Offset(OffField) * 4;
178 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
179 : ARM_AM::getAM5Op(OffField);
181 if (Op == ARM_AM::sub)
187 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
189 default: llvm_unreachable("Unhandled opcode!");
193 default: llvm_unreachable("Unhandled submode!");
194 case ARM_AM::ia: return ARM::LDMIA;
195 case ARM_AM::da: return ARM::LDMDA;
196 case ARM_AM::db: return ARM::LDMDB;
197 case ARM_AM::ib: return ARM::LDMIB;
202 default: llvm_unreachable("Unhandled submode!");
203 case ARM_AM::ia: return ARM::STMIA;
204 case ARM_AM::da: return ARM::STMDA;
205 case ARM_AM::db: return ARM::STMDB;
206 case ARM_AM::ib: return ARM::STMIB;
209 // tLDMIA is writeback-only - unless the base register is in the input
213 default: llvm_unreachable("Unhandled submode!");
214 case ARM_AM::ia: return ARM::tLDMIA;
217 // There is no non-writeback tSTMIA either.
220 default: llvm_unreachable("Unhandled submode!");
221 case ARM_AM::ia: return ARM::tSTMIA_UPD;
227 default: llvm_unreachable("Unhandled submode!");
228 case ARM_AM::ia: return ARM::t2LDMIA;
229 case ARM_AM::db: return ARM::t2LDMDB;
235 default: llvm_unreachable("Unhandled submode!");
236 case ARM_AM::ia: return ARM::t2STMIA;
237 case ARM_AM::db: return ARM::t2STMDB;
242 default: llvm_unreachable("Unhandled submode!");
243 case ARM_AM::ia: return ARM::VLDMSIA;
244 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
249 default: llvm_unreachable("Unhandled submode!");
250 case ARM_AM::ia: return ARM::VSTMSIA;
251 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
256 default: llvm_unreachable("Unhandled submode!");
257 case ARM_AM::ia: return ARM::VLDMDIA;
258 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
263 default: llvm_unreachable("Unhandled submode!");
264 case ARM_AM::ia: return ARM::VSTMDIA;
265 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
273 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
275 default: llvm_unreachable("Unhandled opcode!");
282 case ARM::tLDMIA_UPD:
283 case ARM::tSTMIA_UPD:
284 case ARM::t2LDMIA_RET:
286 case ARM::t2LDMIA_UPD:
288 case ARM::t2STMIA_UPD:
290 case ARM::VLDMSIA_UPD:
292 case ARM::VSTMSIA_UPD:
294 case ARM::VLDMDIA_UPD:
296 case ARM::VSTMDIA_UPD:
310 case ARM::t2LDMDB_UPD:
312 case ARM::t2STMDB_UPD:
313 case ARM::VLDMSDB_UPD:
314 case ARM::VSTMSDB_UPD:
315 case ARM::VLDMDDB_UPD:
316 case ARM::VSTMDDB_UPD:
327 } // end namespace ARM_AM
328 } // end namespace llvm
330 static bool isT1i32Load(unsigned Opc) {
331 return Opc == ARM::tLDRi;
334 static bool isT2i32Load(unsigned Opc) {
335 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
338 static bool isi32Load(unsigned Opc) {
339 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
342 static bool isT1i32Store(unsigned Opc) {
343 return Opc == ARM::tSTRi;
346 static bool isT2i32Store(unsigned Opc) {
347 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
350 static bool isi32Store(unsigned Opc) {
351 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
354 static unsigned getImmScale(unsigned Opc) {
356 default: llvm_unreachable("Unhandled opcode!");
369 /// Update future uses of the base register with the offset introduced
370 /// due to writeback. This function only works on Thumb1.
372 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
373 MachineBasicBlock::iterator MBBI,
374 DebugLoc dl, unsigned Base,
376 ARMCC::CondCodes Pred, unsigned PredReg) {
377 assert(isThumb1 && "Can only update base register uses for Thumb1!");
378 // Start updating any instructions with immediate offsets. Insert a SUB before
379 // the first non-updateable instruction (if any).
380 for (; MBBI != MBB.end(); ++MBBI) {
381 bool InsertSub = false;
382 unsigned Opc = MBBI->getOpcode();
384 if (MBBI->readsRegister(Base)) {
387 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
389 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
391 if (IsLoad || IsStore) {
392 // Loads and stores with immediate offsets can be updated, but only if
393 // the new offset isn't negative.
394 // The MachineOperand containing the offset immediate is the last one
395 // before predicates.
397 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
398 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
399 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
401 // If storing the base register, it needs to be reset first.
402 unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
404 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
409 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
410 !definesCPSR(MBBI)) {
411 // SUBS/ADDS using this register, with a dead def of the CPSR.
412 // Merge it with the update; if the merged offset is too large,
413 // insert a new sub instead.
415 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
416 Offset = (Opc == ARM::tSUBi8) ?
417 MO.getImm() + WordOffset * 4 :
418 MO.getImm() - WordOffset * 4 ;
419 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
420 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
423 // The base register has now been reset, so exit early.
430 // Can't update the instruction.
434 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
435 // Since SUBS sets the condition flags, we can't place the base reset
436 // after an instruction that has a live CPSR def.
437 // The base register might also contain an argument for a function call.
442 // An instruction above couldn't be updated, so insert a sub.
443 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
444 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
445 .addImm(Pred).addReg(PredReg);
449 if (MBBI->killsRegister(Base))
450 // Register got killed. Stop updating.
454 // End of block was reached.
455 if (MBB.succ_size() > 0) {
456 // FIXME: Because of a bug, live registers are sometimes missing from
457 // the successor blocks' live-in sets. This means we can't trust that
458 // information and *always* have to reset at the end of a block.
460 if (MBBI != MBB.end()) --MBBI;
462 BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
463 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
464 .addImm(Pred).addReg(PredReg);
468 /// MergeOps - Create and insert a LDM or STM with Base as base register and
469 /// registers in Regs as the register operands that would be loaded / stored.
470 /// It returns true if the transformation is done.
472 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
473 MachineBasicBlock::iterator MBBI,
474 int Offset, unsigned Base, bool BaseKill,
475 int Opcode, ARMCC::CondCodes Pred,
476 unsigned PredReg, unsigned Scratch, DebugLoc dl,
477 ArrayRef<std::pair<unsigned, bool> > Regs,
478 ArrayRef<unsigned> ImpDefs) {
479 // Only a single register to load / store. Don't bother.
480 unsigned NumRegs = Regs.size();
484 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
485 // Compute liveness information for that register to make the decision.
486 bool SafeToClobberCPSR = !isThumb1 ||
487 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
488 MachineBasicBlock::LQR_Dead);
490 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
492 // Exception: If the base register is in the input reglist, Thumb1 LDM is
494 // It's also not possible to merge an STR of the base register in Thumb1.
496 for (unsigned I = 0; I < NumRegs; ++I)
497 if (Base == Regs[I].first) {
498 if (Opcode == ARM::tLDRi) {
501 } else if (Opcode == ARM::tSTRi) {
506 ARM_AM::AMSubMode Mode = ARM_AM::ia;
507 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
508 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
509 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
511 if (Offset == 4 && haveIBAndDA) {
513 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
515 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
516 // VLDM/VSTM do not support DB mode without also updating the base reg.
518 } else if (Offset != 0) {
519 // Check if this is a supported opcode before inserting instructions to
520 // calculate a new base register.
521 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
523 // If starting offset isn't zero, insert a MI to materialize a new base.
524 // But only do so if it is cost effective, i.e. merging more than two
529 // On Thumb1, it's not worth materializing a new base register without
530 // clobbering the CPSR (i.e. not using ADDS/SUBS).
531 if (!SafeToClobberCPSR)
535 if (isi32Load(Opcode)) {
536 // If it is a load, then just use one of the destination register to
537 // use as the new base.
538 NewBase = Regs[NumRegs-1].first;
540 // Use the scratch register to use as a new base.
547 isThumb2 ? ARM::t2ADDri :
548 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
549 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
554 isThumb2 ? ARM::t2SUBri :
555 (isThumb1 && Offset < 8) ? ARM::tSUBi3 :
556 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
559 if (!TL->isLegalAddImmediate(Offset))
560 // FIXME: Try add with register operand?
561 return false; // Probably not worth it then.
564 // Thumb1: depending on immediate size, use either
565 // ADDS NewBase, Base, #imm3
568 // ADDS NewBase, #imm8.
569 if (Base != NewBase && Offset >= 8) {
570 const ARMSubtarget &Subtarget = MBB.getParent()->getTarget()
571 .getSubtarget<ARMSubtarget>();
572 // Need to insert a MOV to the new base first.
573 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
574 !Subtarget.hasV6Ops()) {
575 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
576 if (Pred != ARMCC::AL)
578 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
579 .addReg(Base, getKillRegState(BaseKill));
581 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
582 .addReg(Base, getKillRegState(BaseKill))
583 .addImm(Pred).addReg(PredReg);
585 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
589 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
590 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
591 .addImm(Pred).addReg(PredReg);
593 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
594 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
595 .addImm(Pred).addReg(PredReg).addReg(0);
598 BaseKill = true; // New base is always killed straight away.
601 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
602 Opcode == ARM::VLDRD);
604 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
605 // base register writeback.
606 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
607 if (!Opcode) return false;
609 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
610 // - There is no writeback (LDM of base register),
611 // - the base register is killed by the merged instruction,
612 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
613 // to reset the base register.
614 // Otherwise, don't merge.
615 // It's safe to return here since the code to materialize a new base register
616 // above is also conditional on SafeToClobberCPSR.
617 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
620 MachineInstrBuilder MIB;
623 if (Opcode == ARM::tLDMIA)
624 // Update tLDMIA with writeback if necessary.
625 Opcode = ARM::tLDMIA_UPD;
627 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
629 // Thumb1: we might need to set base writeback when building the MI.
630 MIB.addReg(Base, getDefRegState(true))
631 .addReg(Base, getKillRegState(BaseKill));
633 // The base isn't dead after a merged instruction with writeback.
634 // Insert a sub instruction after the newly formed instruction to reset.
636 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
639 // No writeback, simply build the MachineInstr.
640 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
641 MIB.addReg(Base, getKillRegState(BaseKill));
644 MIB.addImm(Pred).addReg(PredReg);
646 for (unsigned i = 0; i != NumRegs; ++i)
647 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
648 | getKillRegState(Regs[i].second));
650 // Add implicit defs for super-registers.
651 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
652 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
657 /// \brief Find all instructions using a given imp-def within a range.
659 /// We are trying to combine a range of instructions, one of which (located at
660 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
661 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
662 /// and RangeEnd must be modified to use an undefined value.
664 /// The live range continues until we find a second definition or one of the
665 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
666 /// we must consider all uses and decide which are relevant in a second pass.
667 void ARMLoadStoreOpt::findUsesOfImpDef(
668 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
669 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
670 std::map<unsigned, MachineOperand *> Uses;
671 unsigned LastLivePos = RangeEnd;
673 // First we find all uses of this register with Position between RangeBegin
674 // and RangeEnd, any or all of these could be uses of a definition at
675 // RangeBegin. We also record the latest position a definition at RangeBegin
676 // would be considered live.
677 for (unsigned i = 0; i < MemOps.size(); ++i) {
678 MachineInstr &MI = *MemOps[i].MBBI;
679 unsigned MIPosition = MemOps[i].Position;
680 if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
683 // If this instruction defines the register, then any later use will be of
684 // that definition rather than ours.
685 if (MI.definesRegister(DefReg))
686 LastLivePos = std::min(LastLivePos, MIPosition);
688 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
692 // If this instruction kills the register then (assuming liveness is
693 // correct when we start) we don't need to think about anything after here.
695 LastLivePos = std::min(LastLivePos, MIPosition);
697 Uses[MIPosition] = UseOp;
700 // Now we traverse the list of all uses, and append the ones that actually use
701 // our definition to the requested list.
702 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
705 // List is sorted by position so once we've found one out of range there
706 // will be no more to consider.
707 if (I->first > LastLivePos)
709 UsesOfImpDefs.push_back(I->second);
713 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
715 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
717 unsigned memOpsBegin, unsigned memOpsEnd,
718 unsigned insertAfter, int Offset,
719 unsigned Base, bool BaseKill,
721 ARMCC::CondCodes Pred, unsigned PredReg,
724 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
725 // First calculate which of the registers should be killed by the merged
727 const unsigned insertPos = memOps[insertAfter].Position;
728 SmallSet<unsigned, 4> KilledRegs;
729 DenseMap<unsigned, unsigned> Killer;
730 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
731 if (i == memOpsBegin) {
736 if (memOps[i].Position < insertPos && memOps[i].isKill) {
737 unsigned Reg = memOps[i].Reg;
738 KilledRegs.insert(Reg);
743 SmallVector<std::pair<unsigned, bool>, 8> Regs;
744 SmallVector<unsigned, 8> ImpDefs;
745 SmallVector<MachineOperand *, 8> UsesOfImpDefs;
746 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
747 unsigned Reg = memOps[i].Reg;
748 // If we are inserting the merged operation after an operation that
749 // uses the same register, make sure to transfer any kill flag.
750 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
751 Regs.push_back(std::make_pair(Reg, isKill));
753 // Collect any implicit defs of super-registers. They must be preserved.
754 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
755 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
757 unsigned DefReg = MO->getReg();
758 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
759 ImpDefs.push_back(DefReg);
761 // There may be other uses of the definition between this instruction and
762 // the eventual LDM/STM position. These should be marked undef if the
763 // merge takes place.
764 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
769 // Try to do the merge.
770 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
772 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
773 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
776 // Merge succeeded, update records.
777 Merges.push_back(std::prev(Loc));
779 // In gathering loads together, we may have moved the imp-def of a register
780 // past one of its uses. This is OK, since we know better than the rest of
781 // LLVM what's OK with ARM loads and stores; but we still have to adjust the
783 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
784 E = UsesOfImpDefs.end();
788 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
789 // Remove kill flags from any memops that come before insertPos.
790 if (Regs[i-memOpsBegin].second) {
791 unsigned Reg = Regs[i-memOpsBegin].first;
792 if (KilledRegs.count(Reg)) {
793 unsigned j = Killer[Reg];
794 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
795 assert(Idx >= 0 && "Cannot find killing operand");
796 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
797 memOps[j].isKill = false;
799 memOps[i].isKill = true;
801 MBB.erase(memOps[i].MBBI);
802 // Update this memop to refer to the merged instruction.
803 // We may need to move kill flags again.
804 memOps[i].Merged = true;
805 memOps[i].MBBI = Merges.back();
806 memOps[i].Position = insertPos;
809 // Update memOps offsets, since they may have been modified by MergeOps.
810 for (auto &MemOp : memOps) {
811 MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
815 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
816 /// load / store multiple instructions.
818 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
819 unsigned Base, int Opcode, unsigned Size,
820 ARMCC::CondCodes Pred, unsigned PredReg,
821 unsigned Scratch, MemOpQueue &MemOps,
822 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
823 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
824 int Offset = MemOps[SIndex].Offset;
825 int SOffset = Offset;
826 unsigned insertAfter = SIndex;
827 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
828 DebugLoc dl = Loc->getDebugLoc();
829 const MachineOperand &PMO = Loc->getOperand(0);
830 unsigned PReg = PMO.getReg();
831 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
833 unsigned Limit = ~0U;
834 bool BaseKill = false;
835 // vldm / vstm limit are 32 for S variants, 16 for D variants.
853 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
854 int NewOffset = MemOps[i].Offset;
855 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
856 unsigned Reg = MO.getReg();
857 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
858 // Register numbers must be in ascending order. For VFP / NEON load and
859 // store multiples, the registers must also be consecutive and within the
860 // limit on the number of registers per instruction.
861 if (Reg != ARM::SP &&
862 NewOffset == Offset + (int)Size &&
863 ((isNotVFP && RegNum > PRegNum) ||
864 ((Count < Limit) && RegNum == PRegNum+1)) &&
865 // On Swift we don't want vldm/vstm to start with a odd register num
866 // because Q register unaligned vldm/vstm need more uops.
867 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
872 // Can't merge this in. Try merge the earlier ones first.
873 // We need to compute BaseKill here because the MemOps may have been
875 BaseKill = Loc->killsRegister(Base);
877 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
878 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
879 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
884 if (MemOps[i].Position > MemOps[insertAfter].Position) {
886 Loc = MemOps[i].MBBI;
890 BaseKill = Loc->killsRegister(Base);
891 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
892 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
895 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
896 unsigned Bytes, unsigned Limit,
897 ARMCC::CondCodes Pred, unsigned PredReg) {
898 unsigned MyPredReg = 0;
902 bool CheckCPSRDef = false;
903 switch (MI->getOpcode()) {
904 default: return false;
914 // Make sure the offset fits in 8 bits.
915 if (Bytes == 0 || (Limit && Bytes >= Limit))
918 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
919 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
920 if (!(MI->getOperand(0).getReg() == Base &&
921 MI->getOperand(1).getReg() == Base &&
922 (MI->getOperand(2).getImm() * Scale) == Bytes &&
923 getInstrPredicate(MI, MyPredReg) == Pred &&
924 MyPredReg == PredReg))
927 return CheckCPSRDef ? !definesCPSR(MI) : true;
930 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
931 unsigned Bytes, unsigned Limit,
932 ARMCC::CondCodes Pred, unsigned PredReg) {
933 unsigned MyPredReg = 0;
937 bool CheckCPSRDef = false;
938 switch (MI->getOpcode()) {
939 default: return false;
949 if (Bytes == 0 || (Limit && Bytes >= Limit))
950 // Make sure the offset fits in 8 bits.
953 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
954 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
955 if (!(MI->getOperand(0).getReg() == Base &&
956 MI->getOperand(1).getReg() == Base &&
957 (MI->getOperand(2).getImm() * Scale) == Bytes &&
958 getInstrPredicate(MI, MyPredReg) == Pred &&
959 MyPredReg == PredReg))
962 return CheckCPSRDef ? !definesCPSR(MI) : true;
965 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
966 switch (MI->getOpcode()) {
991 case ARM::tLDMIA_UPD:
992 case ARM::tSTMIA_UPD:
999 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
1002 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
1006 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1007 ARM_AM::AMSubMode Mode) {
1009 default: llvm_unreachable("Unhandled opcode!");
1015 default: llvm_unreachable("Unhandled submode!");
1016 case ARM_AM::ia: return ARM::LDMIA_UPD;
1017 case ARM_AM::ib: return ARM::LDMIB_UPD;
1018 case ARM_AM::da: return ARM::LDMDA_UPD;
1019 case ARM_AM::db: return ARM::LDMDB_UPD;
1026 default: llvm_unreachable("Unhandled submode!");
1027 case ARM_AM::ia: return ARM::STMIA_UPD;
1028 case ARM_AM::ib: return ARM::STMIB_UPD;
1029 case ARM_AM::da: return ARM::STMDA_UPD;
1030 case ARM_AM::db: return ARM::STMDB_UPD;
1035 default: llvm_unreachable("Unhandled submode!");
1036 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1037 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1042 default: llvm_unreachable("Unhandled submode!");
1043 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1044 case ARM_AM::db: return ARM::t2STMDB_UPD;
1048 default: llvm_unreachable("Unhandled submode!");
1049 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1050 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1054 default: llvm_unreachable("Unhandled submode!");
1055 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1056 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1060 default: llvm_unreachable("Unhandled submode!");
1061 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1062 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1066 default: llvm_unreachable("Unhandled submode!");
1067 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1068 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1073 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
1074 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1076 /// stmia rn, <ra, rb, rc>
1077 /// rn := rn + 4 * 3;
1079 /// stmia rn!, <ra, rb, rc>
1081 /// rn := rn - 4 * 3;
1082 /// ldmia rn, <ra, rb, rc>
1084 /// ldmdb rn!, <ra, rb, rc>
1085 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
1086 MachineBasicBlock::iterator MBBI,
1088 MachineBasicBlock::iterator &I) {
1089 // Thumb1 is already using updating loads/stores.
1090 if (isThumb1) return false;
1092 MachineInstr *MI = MBBI;
1093 unsigned Base = MI->getOperand(0).getReg();
1094 bool BaseKill = MI->getOperand(0).isKill();
1095 unsigned Bytes = getLSMultipleTransferSize(MI);
1096 unsigned PredReg = 0;
1097 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1098 int Opcode = MI->getOpcode();
1099 DebugLoc dl = MI->getDebugLoc();
1101 // Can't use an updating ld/st if the base register is also a dest
1102 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1103 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1104 if (MI->getOperand(i).getReg() == Base)
1107 bool DoMerge = false;
1108 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
1110 // Try merging with the previous instruction.
1111 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1112 if (MBBI != BeginMBBI) {
1113 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1114 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1116 if (Mode == ARM_AM::ia &&
1117 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1120 } else if (Mode == ARM_AM::ib &&
1121 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1126 MBB.erase(PrevMBBI);
1129 // Try merging with the next instruction.
1130 MachineBasicBlock::iterator EndMBBI = MBB.end();
1131 if (!DoMerge && MBBI != EndMBBI) {
1132 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1133 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1135 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
1136 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1138 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
1139 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1143 if (NextMBBI == I) {
1147 MBB.erase(NextMBBI);
1154 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1155 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1156 .addReg(Base, getDefRegState(true)) // WB base register
1157 .addReg(Base, getKillRegState(BaseKill))
1158 .addImm(Pred).addReg(PredReg);
1160 // Transfer the rest of operands.
1161 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1162 MIB.addOperand(MI->getOperand(OpNum));
1164 // Transfer memoperands.
1165 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1171 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1172 ARM_AM::AddrOpc Mode) {
1175 return ARM::LDR_PRE_IMM;
1177 return ARM::STR_PRE_IMM;
1179 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1181 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1183 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1185 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1188 return ARM::t2LDR_PRE;
1191 return ARM::t2STR_PRE;
1192 default: llvm_unreachable("Unhandled opcode!");
1196 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1197 ARM_AM::AddrOpc Mode) {
1200 return ARM::LDR_POST_IMM;
1202 return ARM::STR_POST_IMM;
1204 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1206 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1208 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1210 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1213 return ARM::t2LDR_POST;
1216 return ARM::t2STR_POST;
1217 default: llvm_unreachable("Unhandled opcode!");
1221 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
1222 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1223 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
1224 MachineBasicBlock::iterator MBBI,
1225 const TargetInstrInfo *TII,
1227 MachineBasicBlock::iterator &I) {
1228 // Thumb1 doesn't have updating LDR/STR.
1229 // FIXME: Use LDM/STM with single register instead.
1230 if (isThumb1) return false;
1232 MachineInstr *MI = MBBI;
1233 unsigned Base = MI->getOperand(1).getReg();
1234 bool BaseKill = MI->getOperand(1).isKill();
1235 unsigned Bytes = getLSMultipleTransferSize(MI);
1236 int Opcode = MI->getOpcode();
1237 DebugLoc dl = MI->getDebugLoc();
1238 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1239 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1240 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1241 if (isi32Load(Opcode) || isi32Store(Opcode))
1242 if (MI->getOperand(2).getImm() != 0)
1244 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1247 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
1248 // Can't do the merge if the destination register is the same as the would-be
1249 // writeback register.
1250 if (MI->getOperand(0).getReg() == Base)
1253 unsigned PredReg = 0;
1254 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1255 bool DoMerge = false;
1256 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1257 unsigned NewOpc = 0;
1258 // AM2 - 12 bits, thumb2 - 8 bits.
1259 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1261 // Try merging with the previous instruction.
1262 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1263 if (MBBI != BeginMBBI) {
1264 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1265 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1267 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1269 AddSub = ARM_AM::sub;
1270 } else if (!isAM5 &&
1271 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1275 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1276 MBB.erase(PrevMBBI);
1280 // Try merging with the next instruction.
1281 MachineBasicBlock::iterator EndMBBI = MBB.end();
1282 if (!DoMerge && MBBI != EndMBBI) {
1283 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1284 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1287 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1289 AddSub = ARM_AM::sub;
1290 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1294 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1295 if (NextMBBI == I) {
1299 MBB.erase(NextMBBI);
1307 // VLDM[SD]_UPD, VSTM[SD]_UPD
1308 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1309 // updating load/store-multiple instructions can be used with only one
1311 MachineOperand &MO = MI->getOperand(0);
1312 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1313 .addReg(Base, getDefRegState(true)) // WB base register
1314 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1315 .addImm(Pred).addReg(PredReg)
1316 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1317 getKillRegState(MO.isKill())));
1320 // LDR_PRE, LDR_POST
1321 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1322 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1323 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1324 .addReg(Base, RegState::Define)
1325 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1327 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1328 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1329 .addReg(Base, RegState::Define)
1330 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1333 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1334 // t2LDR_PRE, t2LDR_POST
1335 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1336 .addReg(Base, RegState::Define)
1337 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1340 MachineOperand &MO = MI->getOperand(0);
1341 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1342 // the vestigal zero-reg offset register. When that's fixed, this clause
1343 // can be removed entirely.
1344 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1345 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1346 // STR_PRE, STR_POST
1347 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1348 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1349 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1351 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1352 // t2STR_PRE, t2STR_POST
1353 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1354 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1355 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1363 /// isMemoryOp - Returns true if instruction is a memory operation that this
1364 /// pass is capable of operating on.
1365 static bool isMemoryOp(const MachineInstr *MI) {
1366 // When no memory operands are present, conservatively assume unaligned,
1367 // volatile, unfoldable.
1368 if (!MI->hasOneMemOperand())
1371 const MachineMemOperand *MMO = *MI->memoperands_begin();
1373 // Don't touch volatile memory accesses - we may be changing their order.
1374 if (MMO->isVolatile())
1377 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1379 if (MMO->getAlignment() < 4)
1382 // str <undef> could probably be eliminated entirely, but for now we just want
1383 // to avoid making a mess of it.
1384 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1385 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1386 MI->getOperand(0).isUndef())
1389 // Likewise don't mess with references to undefined addresses.
1390 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1391 MI->getOperand(1).isUndef())
1394 int Opcode = MI->getOpcode();
1399 return MI->getOperand(1).isReg();
1402 return MI->getOperand(1).isReg();
1411 return MI->getOperand(1).isReg();
1416 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1417 /// op that is being merged.
1418 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1419 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1420 unsigned Position = MemOps[0].Position;
1421 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1422 if (MemOps[i].Position < Position) {
1423 Position = MemOps[i].Position;
1424 Loc = MemOps[i].MBBI;
1428 if (Loc != MBB.begin())
1429 RS->forward(std::prev(Loc));
1432 static void InsertLDR_STR(MachineBasicBlock &MBB,
1433 MachineBasicBlock::iterator &MBBI,
1434 int Offset, bool isDef,
1435 DebugLoc dl, unsigned NewOpc,
1436 unsigned Reg, bool RegDeadKill, bool RegUndef,
1437 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1438 bool OffKill, bool OffUndef,
1439 ARMCC::CondCodes Pred, unsigned PredReg,
1440 const TargetInstrInfo *TII, bool isT2) {
1442 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1444 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1445 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1446 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1448 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1450 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1451 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1452 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1456 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1457 MachineBasicBlock::iterator &MBBI) {
1458 MachineInstr *MI = &*MBBI;
1459 unsigned Opcode = MI->getOpcode();
1460 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1461 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1462 const MachineOperand &BaseOp = MI->getOperand(2);
1463 unsigned BaseReg = BaseOp.getReg();
1464 unsigned EvenReg = MI->getOperand(0).getReg();
1465 unsigned OddReg = MI->getOperand(1).getReg();
1466 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1467 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1468 // ARM errata 602117: LDRD with base in list may result in incorrect base
1469 // register when interrupted or faulted.
1470 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1471 if (!Errata602117 &&
1472 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1475 MachineBasicBlock::iterator NewBBI = MBBI;
1476 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1477 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1478 bool EvenDeadKill = isLd ?
1479 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1480 bool EvenUndef = MI->getOperand(0).isUndef();
1481 bool OddDeadKill = isLd ?
1482 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1483 bool OddUndef = MI->getOperand(1).isUndef();
1484 bool BaseKill = BaseOp.isKill();
1485 bool BaseUndef = BaseOp.isUndef();
1486 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1487 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1488 int OffImm = getMemoryOpOffset(MI);
1489 unsigned PredReg = 0;
1490 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1492 if (OddRegNum > EvenRegNum && OffImm == 0) {
1493 // Ascending register numbers and no offset. It's safe to change it to a
1495 unsigned NewOpc = (isLd)
1496 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1497 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1499 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1500 .addReg(BaseReg, getKillRegState(BaseKill))
1501 .addImm(Pred).addReg(PredReg)
1502 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1503 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1506 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1507 .addReg(BaseReg, getKillRegState(BaseKill))
1508 .addImm(Pred).addReg(PredReg)
1510 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1512 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1515 NewBBI = std::prev(MBBI);
1517 // Split into two instructions.
1518 unsigned NewOpc = (isLd)
1519 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1520 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1521 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1522 // so adjust and use t2LDRi12 here for that.
1523 unsigned NewOpc2 = (isLd)
1524 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1525 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1526 DebugLoc dl = MBBI->getDebugLoc();
1527 // If this is a load and base register is killed, it may have been
1528 // re-defed by the load, make sure the first load does not clobber it.
1530 (BaseKill || OffKill) &&
1531 (TRI->regsOverlap(EvenReg, BaseReg))) {
1532 assert(!TRI->regsOverlap(OddReg, BaseReg));
1533 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1534 OddReg, OddDeadKill, false,
1535 BaseReg, false, BaseUndef, false, OffUndef,
1536 Pred, PredReg, TII, isT2);
1537 NewBBI = std::prev(MBBI);
1538 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1539 EvenReg, EvenDeadKill, false,
1540 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1541 Pred, PredReg, TII, isT2);
1543 if (OddReg == EvenReg && EvenDeadKill) {
1544 // If the two source operands are the same, the kill marker is
1545 // probably on the first one. e.g.
1546 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1547 EvenDeadKill = false;
1550 // Never kill the base register in the first instruction.
1551 if (EvenReg == BaseReg)
1552 EvenDeadKill = false;
1553 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1554 EvenReg, EvenDeadKill, EvenUndef,
1555 BaseReg, false, BaseUndef, false, OffUndef,
1556 Pred, PredReg, TII, isT2);
1557 NewBBI = std::prev(MBBI);
1558 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1559 OddReg, OddDeadKill, OddUndef,
1560 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1561 Pred, PredReg, TII, isT2);
1576 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1577 /// ops of the same base and incrementing offset into LDM / STM ops.
1578 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1579 unsigned NumMerges = 0;
1580 unsigned NumMemOps = 0;
1582 unsigned CurrBase = 0;
1584 unsigned CurrSize = 0;
1585 ARMCC::CondCodes CurrPred = ARMCC::AL;
1586 unsigned CurrPredReg = 0;
1587 unsigned Position = 0;
1588 SmallVector<MachineBasicBlock::iterator,4> Merges;
1590 RS->enterBasicBlock(&MBB);
1591 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1593 if (FixInvalidRegPairOp(MBB, MBBI))
1596 bool Advance = false;
1597 bool TryMerge = false;
1598 bool Clobber = false;
1600 bool isMemOp = isMemoryOp(MBBI);
1602 int Opcode = MBBI->getOpcode();
1603 unsigned Size = getLSMultipleTransferSize(MBBI);
1604 const MachineOperand &MO = MBBI->getOperand(0);
1605 unsigned Reg = MO.getReg();
1606 bool isKill = MO.isDef() ? false : MO.isKill();
1607 unsigned Base = MBBI->getOperand(1).getReg();
1608 unsigned PredReg = 0;
1609 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1610 int Offset = getMemoryOpOffset(MBBI);
1613 // r5 := ldr [r5, #4]
1614 // r6 := ldr [r5, #8]
1616 // The second ldr has effectively broken the chain even though it
1617 // looks like the later ldr(s) use the same base register. Try to
1618 // merge the ldr's so far, including this one. But don't try to
1619 // combine the following ldr(s).
1620 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1623 // r4 := ldr [r0, #8]
1624 // r4 := ldr [r0, #4]
1626 // The optimization may reorder the second ldr in front of the first
1627 // ldr, which violates write after write(WAW) dependence. The same as
1628 // str. Try to merge inst(s) already in MemOps.
1629 bool Overlap = false;
1630 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1631 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1637 if (CurrBase == 0 && !Clobber) {
1638 // Start of a new chain.
1643 CurrPredReg = PredReg;
1644 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1647 } else if (!Overlap) {
1653 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1654 // No need to match PredReg.
1655 // Continue adding to the queue.
1656 if (Offset > MemOps.back().Offset) {
1657 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1662 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1664 if (Offset < I->Offset) {
1665 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1670 } else if (Offset == I->Offset) {
1671 // Collision! This can't be merged!
1680 if (MBBI->isDebugValue()) {
1683 // Reach the end of the block, try merging the memory instructions.
1685 } else if (Advance) {
1689 // Reach the end of the block, try merging the memory instructions.
1696 if (NumMemOps > 1) {
1697 // Try to find a free register to use as a new base in case it's needed.
1698 // First advance to the instruction just before the start of the chain.
1699 AdvanceRS(MBB, MemOps);
1701 // Find a scratch register.
1703 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
1705 // Process the load / store instructions.
1706 RS->forward(std::prev(MBBI));
1710 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1711 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1713 // Try folding preceding/trailing base inc/dec into the generated
1715 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1716 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1718 NumMerges += Merges.size();
1720 // Try folding preceding/trailing base inc/dec into those load/store
1721 // that were not merged to form LDM/STM ops.
1722 for (unsigned i = 0; i != NumMemOps; ++i)
1723 if (!MemOps[i].Merged)
1724 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1727 // RS may be pointing to an instruction that's deleted.
1728 RS->skipTo(std::prev(MBBI));
1729 } else if (NumMemOps == 1) {
1730 // Try folding preceding/trailing base inc/dec into the single
1732 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1734 RS->forward(std::prev(MBBI));
1741 CurrPred = ARMCC::AL;
1748 // If iterator hasn't been advanced and this is not a memory op, skip it.
1749 // It can't start a new chain anyway.
1750 if (!Advance && !isMemOp && MBBI != E) {
1756 return NumMerges > 0;
1759 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1760 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1761 /// directly restore the value of LR into pc.
1762 /// ldmfd sp!, {..., lr}
1765 /// ldmfd sp!, {..., lr}
1768 /// ldmfd sp!, {..., pc}
1769 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1770 // Thumb1 LDM doesn't allow high registers.
1771 if (isThumb1) return false;
1772 if (MBB.empty()) return false;
1774 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1775 if (MBBI != MBB.begin() &&
1776 (MBBI->getOpcode() == ARM::BX_RET ||
1777 MBBI->getOpcode() == ARM::tBX_RET ||
1778 MBBI->getOpcode() == ARM::MOVPCLR)) {
1779 MachineInstr *PrevMI = std::prev(MBBI);
1780 unsigned Opcode = PrevMI->getOpcode();
1781 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1782 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1783 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1784 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1785 if (MO.getReg() != ARM::LR)
1787 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1788 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1789 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1790 PrevMI->setDesc(TII->get(NewOpc));
1792 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1800 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1801 const TargetMachine &TM = Fn.getTarget();
1802 TL = TM.getSubtargetImpl()->getTargetLowering();
1803 AFI = Fn.getInfo<ARMFunctionInfo>();
1804 TII = TM.getSubtargetImpl()->getInstrInfo();
1805 TRI = TM.getSubtargetImpl()->getRegisterInfo();
1806 STI = &TM.getSubtarget<ARMSubtarget>();
1807 RS = new RegScavenger();
1808 isThumb2 = AFI->isThumb2Function();
1809 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1811 bool Modified = false;
1812 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1814 MachineBasicBlock &MBB = *MFI;
1815 Modified |= LoadStoreMultipleOpti(MBB);
1816 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1817 Modified |= MergeReturnIntoLDM(MBB);
1825 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1826 /// load / stores from consecutive locations close to make it more
1827 /// likely they will be combined later.
1830 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1832 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1834 const DataLayout *TD;
1835 const TargetInstrInfo *TII;
1836 const TargetRegisterInfo *TRI;
1837 const ARMSubtarget *STI;
1838 MachineRegisterInfo *MRI;
1839 MachineFunction *MF;
1841 bool runOnMachineFunction(MachineFunction &Fn) override;
1843 const char *getPassName() const override {
1844 return "ARM pre- register allocation load / store optimization pass";
1848 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1849 unsigned &NewOpc, unsigned &EvenReg,
1850 unsigned &OddReg, unsigned &BaseReg,
1852 unsigned &PredReg, ARMCC::CondCodes &Pred,
1854 bool RescheduleOps(MachineBasicBlock *MBB,
1855 SmallVectorImpl<MachineInstr *> &Ops,
1856 unsigned Base, bool isLd,
1857 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1858 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1860 char ARMPreAllocLoadStoreOpt::ID = 0;
1863 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1864 TD = Fn.getTarget().getDataLayout();
1865 TII = Fn.getSubtarget().getInstrInfo();
1866 TRI = Fn.getSubtarget().getRegisterInfo();
1867 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1868 MRI = &Fn.getRegInfo();
1871 bool Modified = false;
1872 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1874 Modified |= RescheduleLoadStoreInstrs(MFI);
1879 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1880 MachineBasicBlock::iterator I,
1881 MachineBasicBlock::iterator E,
1882 SmallPtrSetImpl<MachineInstr*> &MemOps,
1883 SmallSet<unsigned, 4> &MemRegs,
1884 const TargetRegisterInfo *TRI) {
1885 // Are there stores / loads / calls between them?
1886 // FIXME: This is overly conservative. We should make use of alias information
1888 SmallSet<unsigned, 4> AddedRegPressure;
1890 if (I->isDebugValue() || MemOps.count(&*I))
1892 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1894 if (isLd && I->mayStore())
1899 // It's not safe to move the first 'str' down.
1902 // str r4, [r0, #+4]
1906 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1907 MachineOperand &MO = I->getOperand(j);
1910 unsigned Reg = MO.getReg();
1911 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1913 if (Reg != Base && !MemRegs.count(Reg))
1914 AddedRegPressure.insert(Reg);
1918 // Estimate register pressure increase due to the transformation.
1919 if (MemRegs.size() <= 4)
1920 // Ok if we are moving small number of instructions.
1922 return AddedRegPressure.size() <= MemRegs.size() * 2;
1926 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1927 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1928 MachineInstr *Op1) {
1929 assert(MI->memoperands_empty() && "expected a new machineinstr");
1930 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1931 + (Op1->memoperands_end() - Op1->memoperands_begin());
1933 MachineFunction *MF = MI->getParent()->getParent();
1934 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1935 MachineSDNode::mmo_iterator MemEnd =
1936 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1938 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1939 MI->setMemRefs(MemBegin, MemEnd);
1943 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1945 unsigned &NewOpc, unsigned &EvenReg,
1946 unsigned &OddReg, unsigned &BaseReg,
1947 int &Offset, unsigned &PredReg,
1948 ARMCC::CondCodes &Pred,
1950 // Make sure we're allowed to generate LDRD/STRD.
1951 if (!STI->hasV5TEOps())
1954 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1956 unsigned Opcode = Op0->getOpcode();
1957 if (Opcode == ARM::LDRi12) {
1959 } else if (Opcode == ARM::STRi12) {
1961 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1962 NewOpc = ARM::t2LDRDi8;
1965 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1966 NewOpc = ARM::t2STRDi8;
1973 // Make sure the base address satisfies i64 ld / st alignment requirement.
1974 // At the moment, we ignore the memoryoperand's value.
1975 // If we want to use AliasAnalysis, we should check it accordingly.
1976 if (!Op0->hasOneMemOperand() ||
1977 (*Op0->memoperands_begin())->isVolatile())
1980 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1981 const Function *Func = MF->getFunction();
1982 unsigned ReqAlign = STI->hasV6Ops()
1983 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1984 : 8; // Pre-v6 need 8-byte align
1985 if (Align < ReqAlign)
1988 // Then make sure the immediate offset fits.
1989 int OffImm = getMemoryOpOffset(Op0);
1991 int Limit = (1 << 8) * Scale;
1992 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1996 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1998 AddSub = ARM_AM::sub;
2001 int Limit = (1 << 8) * Scale;
2002 if (OffImm >= Limit || (OffImm & (Scale-1)))
2004 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2006 EvenReg = Op0->getOperand(0).getReg();
2007 OddReg = Op1->getOperand(0).getReg();
2008 if (EvenReg == OddReg)
2010 BaseReg = Op0->getOperand(1).getReg();
2011 Pred = getInstrPredicate(Op0, PredReg);
2012 dl = Op0->getDebugLoc();
2016 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2017 SmallVectorImpl<MachineInstr *> &Ops,
2018 unsigned Base, bool isLd,
2019 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2020 bool RetVal = false;
2022 // Sort by offset (in reverse order).
2023 std::sort(Ops.begin(), Ops.end(),
2024 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2025 int LOffset = getMemoryOpOffset(LHS);
2026 int ROffset = getMemoryOpOffset(RHS);
2027 assert(LHS == RHS || LOffset != ROffset);
2028 return LOffset > ROffset;
2031 // The loads / stores of the same base are in order. Scan them from first to
2032 // last and check for the following:
2033 // 1. Any def of base.
2035 while (Ops.size() > 1) {
2036 unsigned FirstLoc = ~0U;
2037 unsigned LastLoc = 0;
2038 MachineInstr *FirstOp = nullptr;
2039 MachineInstr *LastOp = nullptr;
2041 unsigned LastOpcode = 0;
2042 unsigned LastBytes = 0;
2043 unsigned NumMove = 0;
2044 for (int i = Ops.size() - 1; i >= 0; --i) {
2045 MachineInstr *Op = Ops[i];
2046 unsigned Loc = MI2LocMap[Op];
2047 if (Loc <= FirstLoc) {
2051 if (Loc >= LastLoc) {
2057 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2058 if (LastOpcode && LSMOpcode != LastOpcode)
2061 int Offset = getMemoryOpOffset(Op);
2062 unsigned Bytes = getLSMultipleTransferSize(Op);
2064 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2067 LastOffset = Offset;
2069 LastOpcode = LSMOpcode;
2070 if (++NumMove == 8) // FIXME: Tune this limit.
2077 SmallPtrSet<MachineInstr*, 4> MemOps;
2078 SmallSet<unsigned, 4> MemRegs;
2079 for (int i = NumMove-1; i >= 0; --i) {
2080 MemOps.insert(Ops[i]);
2081 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2084 // Be conservative, if the instructions are too far apart, don't
2085 // move them. We want to limit the increase of register pressure.
2086 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2088 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2089 MemOps, MemRegs, TRI);
2091 for (unsigned i = 0; i != NumMove; ++i)
2094 // This is the new location for the loads / stores.
2095 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2096 while (InsertPos != MBB->end()
2097 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2100 // If we are moving a pair of loads / stores, see if it makes sense
2101 // to try to allocate a pair of registers that can form register pairs.
2102 MachineInstr *Op0 = Ops.back();
2103 MachineInstr *Op1 = Ops[Ops.size()-2];
2104 unsigned EvenReg = 0, OddReg = 0;
2105 unsigned BaseReg = 0, PredReg = 0;
2106 ARMCC::CondCodes Pred = ARMCC::AL;
2108 unsigned NewOpc = 0;
2111 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2112 EvenReg, OddReg, BaseReg,
2113 Offset, PredReg, Pred, isT2)) {
2117 const MCInstrDesc &MCID = TII->get(NewOpc);
2118 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2119 MRI->constrainRegClass(EvenReg, TRC);
2120 MRI->constrainRegClass(OddReg, TRC);
2122 // Form the pair instruction.
2124 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2125 .addReg(EvenReg, RegState::Define)
2126 .addReg(OddReg, RegState::Define)
2128 // FIXME: We're converting from LDRi12 to an insn that still
2129 // uses addrmode2, so we need an explicit offset reg. It should
2130 // always by reg0 since we're transforming LDRi12s.
2133 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2134 concatenateMemOperands(MIB, Op0, Op1);
2135 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2138 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2142 // FIXME: We're converting from LDRi12 to an insn that still
2143 // uses addrmode2, so we need an explicit offset reg. It should
2144 // always by reg0 since we're transforming STRi12s.
2147 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2148 concatenateMemOperands(MIB, Op0, Op1);
2149 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2155 // Add register allocation hints to form register pairs.
2156 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
2157 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
2159 for (unsigned i = 0; i != NumMove; ++i) {
2160 MachineInstr *Op = Ops.back();
2162 MBB->splice(InsertPos, MBB, Op);
2166 NumLdStMoved += NumMove;
2176 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2177 bool RetVal = false;
2179 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2180 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2181 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2182 SmallVector<unsigned, 4> LdBases;
2183 SmallVector<unsigned, 4> StBases;
2186 MachineBasicBlock::iterator MBBI = MBB->begin();
2187 MachineBasicBlock::iterator E = MBB->end();
2189 for (; MBBI != E; ++MBBI) {
2190 MachineInstr *MI = MBBI;
2191 if (MI->isCall() || MI->isTerminator()) {
2192 // Stop at barriers.
2197 if (!MI->isDebugValue())
2198 MI2LocMap[MI] = ++Loc;
2200 if (!isMemoryOp(MI))
2202 unsigned PredReg = 0;
2203 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2206 int Opc = MI->getOpcode();
2207 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
2208 unsigned Base = MI->getOperand(1).getReg();
2209 int Offset = getMemoryOpOffset(MI);
2211 bool StopHere = false;
2213 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2214 Base2LdsMap.find(Base);
2215 if (BI != Base2LdsMap.end()) {
2216 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2217 if (Offset == getMemoryOpOffset(BI->second[i])) {
2223 BI->second.push_back(MI);
2225 Base2LdsMap[Base].push_back(MI);
2226 LdBases.push_back(Base);
2229 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2230 Base2StsMap.find(Base);
2231 if (BI != Base2StsMap.end()) {
2232 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2233 if (Offset == getMemoryOpOffset(BI->second[i])) {
2239 BI->second.push_back(MI);
2241 Base2StsMap[Base].push_back(MI);
2242 StBases.push_back(Base);
2247 // Found a duplicate (a base+offset combination that's seen earlier).
2254 // Re-schedule loads.
2255 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2256 unsigned Base = LdBases[i];
2257 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2259 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2262 // Re-schedule stores.
2263 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2264 unsigned Base = StBases[i];
2265 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2267 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2271 Base2LdsMap.clear();
2272 Base2StsMap.clear();
2282 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
2283 /// optimization pass.
2284 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2286 return new ARMPreAllocLoadStoreOpt();
2287 return new ARMLoadStoreOpt();