1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "Thumb1RegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterScavenging.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Target/TargetInstrInfo.h"
42 #include "llvm/Target/TargetMachine.h"
43 #include "llvm/Target/TargetRegisterInfo.h"
46 #define DEBUG_TYPE "arm-ldst-opt"
48 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
49 STATISTIC(NumSTMGened , "Number of stm instructions generated");
50 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
51 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
52 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
53 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
54 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
55 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
56 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
57 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
58 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
60 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
61 /// load / store instructions to form ldm / stm instructions.
64 struct ARMLoadStoreOpt : public MachineFunctionPass {
66 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
68 const TargetInstrInfo *TII;
69 const TargetRegisterInfo *TRI;
70 const ARMSubtarget *STI;
71 const TargetLowering *TL;
74 bool isThumb1, isThumb2;
76 bool runOnMachineFunction(MachineFunction &Fn) override;
78 const char *getPassName() const override {
79 return "ARM load / store optimization pass";
83 struct MemOpQueueEntry {
88 MachineBasicBlock::iterator MBBI;
90 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
91 MachineBasicBlock::iterator i)
92 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
94 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
95 typedef MemOpQueue::iterator MemOpQueueIter;
97 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
98 const MemOpQueue &MemOps, unsigned DefReg,
99 unsigned RangeBegin, unsigned RangeEnd);
100 void UpdateBaseRegUses(MachineBasicBlock &MBB,
101 MachineBasicBlock::iterator MBBI,
102 DebugLoc dl, unsigned Base, unsigned WordOffset,
103 ARMCC::CondCodes Pred, unsigned PredReg);
104 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
105 int Offset, unsigned Base, bool BaseKill, int Opcode,
106 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
108 ArrayRef<std::pair<unsigned, bool> > Regs,
109 ArrayRef<unsigned> ImpDefs);
110 void MergeOpsUpdate(MachineBasicBlock &MBB,
112 unsigned memOpsBegin,
114 unsigned insertAfter,
119 ARMCC::CondCodes Pred,
123 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
124 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
125 int Opcode, unsigned Size,
126 ARMCC::CondCodes Pred, unsigned PredReg,
127 unsigned Scratch, MemOpQueue &MemOps,
128 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
129 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
130 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
131 MachineBasicBlock::iterator &MBBI);
132 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
133 MachineBasicBlock::iterator MBBI,
134 const TargetInstrInfo *TII,
136 MachineBasicBlock::iterator &I);
137 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
138 MachineBasicBlock::iterator MBBI,
140 MachineBasicBlock::iterator &I);
141 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
142 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
144 char ARMLoadStoreOpt::ID = 0;
147 static bool definesCPSR(const MachineInstr *MI) {
148 for (const auto &MO : MI->operands()) {
151 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
152 // If the instruction has live CPSR def, then it's not safe to fold it
153 // into load / store.
160 static int getMemoryOpOffset(const MachineInstr *MI) {
161 int Opcode = MI->getOpcode();
162 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
163 unsigned NumOperands = MI->getDesc().getNumOperands();
164 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
166 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
167 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
168 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
169 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
172 // Thumb1 immediate offsets are scaled by 4
173 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
176 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
177 : ARM_AM::getAM5Offset(OffField) * 4;
178 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
179 : ARM_AM::getAM5Op(OffField);
181 if (Op == ARM_AM::sub)
187 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
189 default: llvm_unreachable("Unhandled opcode!");
193 default: llvm_unreachable("Unhandled submode!");
194 case ARM_AM::ia: return ARM::LDMIA;
195 case ARM_AM::da: return ARM::LDMDA;
196 case ARM_AM::db: return ARM::LDMDB;
197 case ARM_AM::ib: return ARM::LDMIB;
202 default: llvm_unreachable("Unhandled submode!");
203 case ARM_AM::ia: return ARM::STMIA;
204 case ARM_AM::da: return ARM::STMDA;
205 case ARM_AM::db: return ARM::STMDB;
206 case ARM_AM::ib: return ARM::STMIB;
209 // tLDMIA is writeback-only - unless the base register is in the input
213 default: llvm_unreachable("Unhandled submode!");
214 case ARM_AM::ia: return ARM::tLDMIA;
217 // There is no non-writeback tSTMIA either.
220 default: llvm_unreachable("Unhandled submode!");
221 case ARM_AM::ia: return ARM::tSTMIA_UPD;
227 default: llvm_unreachable("Unhandled submode!");
228 case ARM_AM::ia: return ARM::t2LDMIA;
229 case ARM_AM::db: return ARM::t2LDMDB;
235 default: llvm_unreachable("Unhandled submode!");
236 case ARM_AM::ia: return ARM::t2STMIA;
237 case ARM_AM::db: return ARM::t2STMDB;
242 default: llvm_unreachable("Unhandled submode!");
243 case ARM_AM::ia: return ARM::VLDMSIA;
244 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
249 default: llvm_unreachable("Unhandled submode!");
250 case ARM_AM::ia: return ARM::VSTMSIA;
251 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
256 default: llvm_unreachable("Unhandled submode!");
257 case ARM_AM::ia: return ARM::VLDMDIA;
258 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
263 default: llvm_unreachable("Unhandled submode!");
264 case ARM_AM::ia: return ARM::VSTMDIA;
265 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
273 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
275 default: llvm_unreachable("Unhandled opcode!");
282 case ARM::tLDMIA_UPD:
283 case ARM::tSTMIA_UPD:
284 case ARM::t2LDMIA_RET:
286 case ARM::t2LDMIA_UPD:
288 case ARM::t2STMIA_UPD:
290 case ARM::VLDMSIA_UPD:
292 case ARM::VSTMSIA_UPD:
294 case ARM::VLDMDIA_UPD:
296 case ARM::VSTMDIA_UPD:
310 case ARM::t2LDMDB_UPD:
312 case ARM::t2STMDB_UPD:
313 case ARM::VLDMSDB_UPD:
314 case ARM::VSTMSDB_UPD:
315 case ARM::VLDMDDB_UPD:
316 case ARM::VSTMDDB_UPD:
327 } // end namespace ARM_AM
328 } // end namespace llvm
330 static bool isT1i32Load(unsigned Opc) {
331 return Opc == ARM::tLDRi;
334 static bool isT2i32Load(unsigned Opc) {
335 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
338 static bool isi32Load(unsigned Opc) {
339 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
342 static bool isT1i32Store(unsigned Opc) {
343 return Opc == ARM::tSTRi;
346 static bool isT2i32Store(unsigned Opc) {
347 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
350 static bool isi32Store(unsigned Opc) {
351 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
354 static unsigned getImmScale(unsigned Opc) {
356 default: llvm_unreachable("Unhandled opcode!");
369 /// Update future uses of the base register with the offset introduced
370 /// due to writeback. This function only works on Thumb1.
372 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
373 MachineBasicBlock::iterator MBBI,
374 DebugLoc dl, unsigned Base,
376 ARMCC::CondCodes Pred, unsigned PredReg) {
377 assert(isThumb1 && "Can only update base register uses for Thumb1!");
378 // Start updating any instructions with immediate offsets. Insert a SUB before
379 // the first non-updateable instruction (if any).
380 for (; MBBI != MBB.end(); ++MBBI) {
381 bool InsertSub = false;
382 unsigned Opc = MBBI->getOpcode();
384 if (MBBI->readsRegister(Base)) {
387 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
389 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
391 if (IsLoad || IsStore) {
392 // Loads and stores with immediate offsets can be updated, but only if
393 // the new offset isn't negative.
394 // The MachineOperand containing the offset immediate is the last one
395 // before predicates.
397 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
398 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
399 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
401 // If storing the base register, it needs to be reset first.
402 unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
404 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
409 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
410 !definesCPSR(MBBI)) {
411 // SUBS/ADDS using this register, with a dead def of the CPSR.
412 // Merge it with the update; if the merged offset is too large,
413 // insert a new sub instead.
415 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
416 Offset = (Opc == ARM::tSUBi8) ?
417 MO.getImm() + WordOffset * 4 :
418 MO.getImm() - WordOffset * 4 ;
419 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
420 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
423 // The base register has now been reset, so exit early.
430 // Can't update the instruction.
434 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
435 // Since SUBS sets the condition flags, we can't place the base reset
436 // after an instruction that has a live CPSR def.
437 // The base register might also contain an argument for a function call.
442 // An instruction above couldn't be updated, so insert a sub.
443 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
444 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
445 .addImm(Pred).addReg(PredReg);
449 if (MBBI->killsRegister(Base))
450 // Register got killed. Stop updating.
454 // End of block was reached.
455 if (MBB.succ_size() > 0) {
456 // FIXME: Because of a bug, live registers are sometimes missing from
457 // the successor blocks' live-in sets. This means we can't trust that
458 // information and *always* have to reset at the end of a block.
460 if (MBBI != MBB.end()) --MBBI;
462 BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
463 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
464 .addImm(Pred).addReg(PredReg);
468 /// MergeOps - Create and insert a LDM or STM with Base as base register and
469 /// registers in Regs as the register operands that would be loaded / stored.
470 /// It returns true if the transformation is done.
472 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
473 MachineBasicBlock::iterator MBBI,
474 int Offset, unsigned Base, bool BaseKill,
475 int Opcode, ARMCC::CondCodes Pred,
476 unsigned PredReg, unsigned Scratch, DebugLoc dl,
477 ArrayRef<std::pair<unsigned, bool> > Regs,
478 ArrayRef<unsigned> ImpDefs) {
479 // Only a single register to load / store. Don't bother.
480 unsigned NumRegs = Regs.size();
484 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
485 // Compute liveness information for that register to make the decision.
486 bool SafeToClobberCPSR = !isThumb1 ||
487 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
488 MachineBasicBlock::LQR_Dead);
490 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
492 // Exception: If the base register is in the input reglist, Thumb1 LDM is
494 // It's also not possible to merge an STR of the base register in Thumb1.
496 for (unsigned I = 0; I < NumRegs; ++I)
497 if (Base == Regs[I].first) {
498 if (Opcode == ARM::tLDRi) {
501 } else if (Opcode == ARM::tSTRi) {
506 ARM_AM::AMSubMode Mode = ARM_AM::ia;
507 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
508 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
509 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
511 if (Offset == 4 && haveIBAndDA) {
513 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
515 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
516 // VLDM/VSTM do not support DB mode without also updating the base reg.
518 } else if (Offset != 0) {
519 // Check if this is a supported opcode before inserting instructions to
520 // calculate a new base register.
521 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
523 // If starting offset isn't zero, insert a MI to materialize a new base.
524 // But only do so if it is cost effective, i.e. merging more than two
529 // On Thumb1, it's not worth materializing a new base register without
530 // clobbering the CPSR (i.e. not using ADDS/SUBS).
531 if (!SafeToClobberCPSR)
535 if (isi32Load(Opcode)) {
536 // If it is a load, then just use one of the destination register to
537 // use as the new base.
538 NewBase = Regs[NumRegs-1].first;
540 // Use the scratch register to use as a new base.
547 isThumb2 ? ARM::t2ADDri :
548 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
549 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
554 isThumb2 ? ARM::t2SUBri :
555 (isThumb1 && Offset < 8) ? ARM::tSUBi3 :
556 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
559 if (!TL->isLegalAddImmediate(Offset))
560 // FIXME: Try add with register operand?
561 return false; // Probably not worth it then.
564 // Thumb1: depending on immediate size, use either
565 // ADDS NewBase, Base, #imm3
568 // ADDS NewBase, #imm8.
569 if (Base != NewBase && Offset >= 8) {
570 // Need to insert a MOV to the new base first.
571 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
572 .addReg(Base, getKillRegState(BaseKill))
573 .addImm(Pred).addReg(PredReg);
574 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
578 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
579 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
580 .addImm(Pred).addReg(PredReg);
582 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
583 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
584 .addImm(Pred).addReg(PredReg).addReg(0);
587 BaseKill = true; // New base is always killed straight away.
590 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
591 Opcode == ARM::VLDRD);
593 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
594 // base register writeback.
595 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
596 if (!Opcode) return false;
598 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
599 // - There is no writeback (LDM of base register),
600 // - the base register is killed by the merged instruction,
601 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
602 // to reset the base register.
603 // Otherwise, don't merge.
604 // It's safe to return here since the code to materialize a new base register
605 // above is also conditional on SafeToClobberCPSR.
606 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
609 MachineInstrBuilder MIB;
612 if (Opcode == ARM::tLDMIA)
613 // Update tLDMIA with writeback if necessary.
614 Opcode = ARM::tLDMIA_UPD;
616 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
618 // Thumb1: we might need to set base writeback when building the MI.
619 MIB.addReg(Base, getDefRegState(true))
620 .addReg(Base, getKillRegState(BaseKill));
622 // The base isn't dead after a merged instruction with writeback.
623 // Insert a sub instruction after the newly formed instruction to reset.
625 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
628 // No writeback, simply build the MachineInstr.
629 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
630 MIB.addReg(Base, getKillRegState(BaseKill));
633 MIB.addImm(Pred).addReg(PredReg);
635 for (unsigned i = 0; i != NumRegs; ++i)
636 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
637 | getKillRegState(Regs[i].second));
639 // Add implicit defs for super-registers.
640 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
641 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
646 /// \brief Find all instructions using a given imp-def within a range.
648 /// We are trying to combine a range of instructions, one of which (located at
649 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
650 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
651 /// and RangeEnd must be modified to use an undefined value.
653 /// The live range continues until we find a second definition or one of the
654 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
655 /// we must consider all uses and decide which are relevant in a second pass.
656 void ARMLoadStoreOpt::findUsesOfImpDef(
657 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
658 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
659 std::map<unsigned, MachineOperand *> Uses;
660 unsigned LastLivePos = RangeEnd;
662 // First we find all uses of this register with Position between RangeBegin
663 // and RangeEnd, any or all of these could be uses of a definition at
664 // RangeBegin. We also record the latest position a definition at RangeBegin
665 // would be considered live.
666 for (unsigned i = 0; i < MemOps.size(); ++i) {
667 MachineInstr &MI = *MemOps[i].MBBI;
668 unsigned MIPosition = MemOps[i].Position;
669 if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
672 // If this instruction defines the register, then any later use will be of
673 // that definition rather than ours.
674 if (MI.definesRegister(DefReg))
675 LastLivePos = std::min(LastLivePos, MIPosition);
677 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
681 // If this instruction kills the register then (assuming liveness is
682 // correct when we start) we don't need to think about anything after here.
684 LastLivePos = std::min(LastLivePos, MIPosition);
686 Uses[MIPosition] = UseOp;
689 // Now we traverse the list of all uses, and append the ones that actually use
690 // our definition to the requested list.
691 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
694 // List is sorted by position so once we've found one out of range there
695 // will be no more to consider.
696 if (I->first > LastLivePos)
698 UsesOfImpDefs.push_back(I->second);
702 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
704 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
706 unsigned memOpsBegin, unsigned memOpsEnd,
707 unsigned insertAfter, int Offset,
708 unsigned Base, bool BaseKill,
710 ARMCC::CondCodes Pred, unsigned PredReg,
713 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
714 // First calculate which of the registers should be killed by the merged
716 const unsigned insertPos = memOps[insertAfter].Position;
717 SmallSet<unsigned, 4> KilledRegs;
718 DenseMap<unsigned, unsigned> Killer;
719 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
720 if (i == memOpsBegin) {
725 if (memOps[i].Position < insertPos && memOps[i].isKill) {
726 unsigned Reg = memOps[i].Reg;
727 KilledRegs.insert(Reg);
732 SmallVector<std::pair<unsigned, bool>, 8> Regs;
733 SmallVector<unsigned, 8> ImpDefs;
734 SmallVector<MachineOperand *, 8> UsesOfImpDefs;
735 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
736 unsigned Reg = memOps[i].Reg;
737 // If we are inserting the merged operation after an operation that
738 // uses the same register, make sure to transfer any kill flag.
739 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
740 Regs.push_back(std::make_pair(Reg, isKill));
742 // Collect any implicit defs of super-registers. They must be preserved.
743 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
744 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
746 unsigned DefReg = MO->getReg();
747 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
748 ImpDefs.push_back(DefReg);
750 // There may be other uses of the definition between this instruction and
751 // the eventual LDM/STM position. These should be marked undef if the
752 // merge takes place.
753 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
758 // Try to do the merge.
759 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
761 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
762 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
765 // Merge succeeded, update records.
766 Merges.push_back(std::prev(Loc));
768 // In gathering loads together, we may have moved the imp-def of a register
769 // past one of its uses. This is OK, since we know better than the rest of
770 // LLVM what's OK with ARM loads and stores; but we still have to adjust the
772 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
773 E = UsesOfImpDefs.end();
777 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
778 // Remove kill flags from any memops that come before insertPos.
779 if (Regs[i-memOpsBegin].second) {
780 unsigned Reg = Regs[i-memOpsBegin].first;
781 if (KilledRegs.count(Reg)) {
782 unsigned j = Killer[Reg];
783 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
784 assert(Idx >= 0 && "Cannot find killing operand");
785 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
786 memOps[j].isKill = false;
788 memOps[i].isKill = true;
790 MBB.erase(memOps[i].MBBI);
791 // Update this memop to refer to the merged instruction.
792 // We may need to move kill flags again.
793 memOps[i].Merged = true;
794 memOps[i].MBBI = Merges.back();
795 memOps[i].Position = insertPos;
798 // Update memOps offsets, since they may have been modified by MergeOps.
799 for (auto &MemOp : memOps) {
800 MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
804 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
805 /// load / store multiple instructions.
807 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
808 unsigned Base, int Opcode, unsigned Size,
809 ARMCC::CondCodes Pred, unsigned PredReg,
810 unsigned Scratch, MemOpQueue &MemOps,
811 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
812 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
813 int Offset = MemOps[SIndex].Offset;
814 int SOffset = Offset;
815 unsigned insertAfter = SIndex;
816 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
817 DebugLoc dl = Loc->getDebugLoc();
818 const MachineOperand &PMO = Loc->getOperand(0);
819 unsigned PReg = PMO.getReg();
820 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
822 unsigned Limit = ~0U;
823 bool BaseKill = false;
824 // vldm / vstm limit are 32 for S variants, 16 for D variants.
842 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
843 int NewOffset = MemOps[i].Offset;
844 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
845 unsigned Reg = MO.getReg();
846 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
847 // Register numbers must be in ascending order. For VFP / NEON load and
848 // store multiples, the registers must also be consecutive and within the
849 // limit on the number of registers per instruction.
850 if (Reg != ARM::SP &&
851 NewOffset == Offset + (int)Size &&
852 ((isNotVFP && RegNum > PRegNum) ||
853 ((Count < Limit) && RegNum == PRegNum+1)) &&
854 // On Swift we don't want vldm/vstm to start with a odd register num
855 // because Q register unaligned vldm/vstm need more uops.
856 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
861 // Can't merge this in. Try merge the earlier ones first.
862 // We need to compute BaseKill here because the MemOps may have been
864 BaseKill = Loc->killsRegister(Base);
866 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
867 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
868 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
873 if (MemOps[i].Position > MemOps[insertAfter].Position) {
875 Loc = MemOps[i].MBBI;
879 BaseKill = Loc->killsRegister(Base);
880 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
881 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
884 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
885 unsigned Bytes, unsigned Limit,
886 ARMCC::CondCodes Pred, unsigned PredReg) {
887 unsigned MyPredReg = 0;
891 bool CheckCPSRDef = false;
892 switch (MI->getOpcode()) {
893 default: return false;
903 // Make sure the offset fits in 8 bits.
904 if (Bytes == 0 || (Limit && Bytes >= Limit))
907 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
908 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
909 if (!(MI->getOperand(0).getReg() == Base &&
910 MI->getOperand(1).getReg() == Base &&
911 (MI->getOperand(2).getImm() * Scale) == Bytes &&
912 getInstrPredicate(MI, MyPredReg) == Pred &&
913 MyPredReg == PredReg))
916 return CheckCPSRDef ? !definesCPSR(MI) : true;
919 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
920 unsigned Bytes, unsigned Limit,
921 ARMCC::CondCodes Pred, unsigned PredReg) {
922 unsigned MyPredReg = 0;
926 bool CheckCPSRDef = false;
927 switch (MI->getOpcode()) {
928 default: return false;
938 if (Bytes == 0 || (Limit && Bytes >= Limit))
939 // Make sure the offset fits in 8 bits.
942 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
943 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
944 if (!(MI->getOperand(0).getReg() == Base &&
945 MI->getOperand(1).getReg() == Base &&
946 (MI->getOperand(2).getImm() * Scale) == Bytes &&
947 getInstrPredicate(MI, MyPredReg) == Pred &&
948 MyPredReg == PredReg))
951 return CheckCPSRDef ? !definesCPSR(MI) : true;
954 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
955 switch (MI->getOpcode()) {
980 case ARM::tLDMIA_UPD:
981 case ARM::tSTMIA_UPD:
988 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
991 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
995 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
996 ARM_AM::AMSubMode Mode) {
998 default: llvm_unreachable("Unhandled opcode!");
1004 default: llvm_unreachable("Unhandled submode!");
1005 case ARM_AM::ia: return ARM::LDMIA_UPD;
1006 case ARM_AM::ib: return ARM::LDMIB_UPD;
1007 case ARM_AM::da: return ARM::LDMDA_UPD;
1008 case ARM_AM::db: return ARM::LDMDB_UPD;
1015 default: llvm_unreachable("Unhandled submode!");
1016 case ARM_AM::ia: return ARM::STMIA_UPD;
1017 case ARM_AM::ib: return ARM::STMIB_UPD;
1018 case ARM_AM::da: return ARM::STMDA_UPD;
1019 case ARM_AM::db: return ARM::STMDB_UPD;
1024 default: llvm_unreachable("Unhandled submode!");
1025 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1026 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1031 default: llvm_unreachable("Unhandled submode!");
1032 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1033 case ARM_AM::db: return ARM::t2STMDB_UPD;
1037 default: llvm_unreachable("Unhandled submode!");
1038 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1039 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1043 default: llvm_unreachable("Unhandled submode!");
1044 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1045 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1049 default: llvm_unreachable("Unhandled submode!");
1050 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1051 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1055 default: llvm_unreachable("Unhandled submode!");
1056 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1057 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1062 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
1063 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1065 /// stmia rn, <ra, rb, rc>
1066 /// rn := rn + 4 * 3;
1068 /// stmia rn!, <ra, rb, rc>
1070 /// rn := rn - 4 * 3;
1071 /// ldmia rn, <ra, rb, rc>
1073 /// ldmdb rn!, <ra, rb, rc>
1074 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
1075 MachineBasicBlock::iterator MBBI,
1077 MachineBasicBlock::iterator &I) {
1078 // Thumb1 is already using updating loads/stores.
1079 if (isThumb1) return false;
1081 MachineInstr *MI = MBBI;
1082 unsigned Base = MI->getOperand(0).getReg();
1083 bool BaseKill = MI->getOperand(0).isKill();
1084 unsigned Bytes = getLSMultipleTransferSize(MI);
1085 unsigned PredReg = 0;
1086 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1087 int Opcode = MI->getOpcode();
1088 DebugLoc dl = MI->getDebugLoc();
1090 // Can't use an updating ld/st if the base register is also a dest
1091 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1092 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1093 if (MI->getOperand(i).getReg() == Base)
1096 bool DoMerge = false;
1097 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
1099 // Try merging with the previous instruction.
1100 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1101 if (MBBI != BeginMBBI) {
1102 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1103 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1105 if (Mode == ARM_AM::ia &&
1106 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1109 } else if (Mode == ARM_AM::ib &&
1110 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1115 MBB.erase(PrevMBBI);
1118 // Try merging with the next instruction.
1119 MachineBasicBlock::iterator EndMBBI = MBB.end();
1120 if (!DoMerge && MBBI != EndMBBI) {
1121 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1122 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1124 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
1125 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1127 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
1128 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1132 if (NextMBBI == I) {
1136 MBB.erase(NextMBBI);
1143 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1144 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1145 .addReg(Base, getDefRegState(true)) // WB base register
1146 .addReg(Base, getKillRegState(BaseKill))
1147 .addImm(Pred).addReg(PredReg);
1149 // Transfer the rest of operands.
1150 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1151 MIB.addOperand(MI->getOperand(OpNum));
1153 // Transfer memoperands.
1154 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1160 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1161 ARM_AM::AddrOpc Mode) {
1164 return ARM::LDR_PRE_IMM;
1166 return ARM::STR_PRE_IMM;
1168 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1170 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1172 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1174 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1177 return ARM::t2LDR_PRE;
1180 return ARM::t2STR_PRE;
1181 default: llvm_unreachable("Unhandled opcode!");
1185 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1186 ARM_AM::AddrOpc Mode) {
1189 return ARM::LDR_POST_IMM;
1191 return ARM::STR_POST_IMM;
1193 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1195 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1197 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1199 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1202 return ARM::t2LDR_POST;
1205 return ARM::t2STR_POST;
1206 default: llvm_unreachable("Unhandled opcode!");
1210 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
1211 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1212 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
1213 MachineBasicBlock::iterator MBBI,
1214 const TargetInstrInfo *TII,
1216 MachineBasicBlock::iterator &I) {
1217 // Thumb1 doesn't have updating LDR/STR.
1218 // FIXME: Use LDM/STM with single register instead.
1219 if (isThumb1) return false;
1221 MachineInstr *MI = MBBI;
1222 unsigned Base = MI->getOperand(1).getReg();
1223 bool BaseKill = MI->getOperand(1).isKill();
1224 unsigned Bytes = getLSMultipleTransferSize(MI);
1225 int Opcode = MI->getOpcode();
1226 DebugLoc dl = MI->getDebugLoc();
1227 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1228 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1229 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1230 if (isi32Load(Opcode) || isi32Store(Opcode))
1231 if (MI->getOperand(2).getImm() != 0)
1233 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1236 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
1237 // Can't do the merge if the destination register is the same as the would-be
1238 // writeback register.
1239 if (MI->getOperand(0).getReg() == Base)
1242 unsigned PredReg = 0;
1243 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1244 bool DoMerge = false;
1245 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1246 unsigned NewOpc = 0;
1247 // AM2 - 12 bits, thumb2 - 8 bits.
1248 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1250 // Try merging with the previous instruction.
1251 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1252 if (MBBI != BeginMBBI) {
1253 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1254 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1256 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1258 AddSub = ARM_AM::sub;
1259 } else if (!isAM5 &&
1260 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1264 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1265 MBB.erase(PrevMBBI);
1269 // Try merging with the next instruction.
1270 MachineBasicBlock::iterator EndMBBI = MBB.end();
1271 if (!DoMerge && MBBI != EndMBBI) {
1272 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1273 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1276 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1278 AddSub = ARM_AM::sub;
1279 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1283 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1284 if (NextMBBI == I) {
1288 MBB.erase(NextMBBI);
1296 // VLDM[SD]_UPD, VSTM[SD]_UPD
1297 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1298 // updating load/store-multiple instructions can be used with only one
1300 MachineOperand &MO = MI->getOperand(0);
1301 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1302 .addReg(Base, getDefRegState(true)) // WB base register
1303 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1304 .addImm(Pred).addReg(PredReg)
1305 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1306 getKillRegState(MO.isKill())));
1309 // LDR_PRE, LDR_POST
1310 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1311 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1312 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1313 .addReg(Base, RegState::Define)
1314 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1316 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1317 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1318 .addReg(Base, RegState::Define)
1319 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1322 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1323 // t2LDR_PRE, t2LDR_POST
1324 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1325 .addReg(Base, RegState::Define)
1326 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1329 MachineOperand &MO = MI->getOperand(0);
1330 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1331 // the vestigal zero-reg offset register. When that's fixed, this clause
1332 // can be removed entirely.
1333 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1334 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1335 // STR_PRE, STR_POST
1336 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1337 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1338 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1340 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1341 // t2STR_PRE, t2STR_POST
1342 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1343 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1344 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1352 /// isMemoryOp - Returns true if instruction is a memory operation that this
1353 /// pass is capable of operating on.
1354 static bool isMemoryOp(const MachineInstr *MI) {
1355 // When no memory operands are present, conservatively assume unaligned,
1356 // volatile, unfoldable.
1357 if (!MI->hasOneMemOperand())
1360 const MachineMemOperand *MMO = *MI->memoperands_begin();
1362 // Don't touch volatile memory accesses - we may be changing their order.
1363 if (MMO->isVolatile())
1366 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1368 if (MMO->getAlignment() < 4)
1371 // str <undef> could probably be eliminated entirely, but for now we just want
1372 // to avoid making a mess of it.
1373 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1374 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1375 MI->getOperand(0).isUndef())
1378 // Likewise don't mess with references to undefined addresses.
1379 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1380 MI->getOperand(1).isUndef())
1383 int Opcode = MI->getOpcode();
1388 return MI->getOperand(1).isReg();
1391 return MI->getOperand(1).isReg();
1400 return MI->getOperand(1).isReg();
1405 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1406 /// op that is being merged.
1407 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1408 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1409 unsigned Position = MemOps[0].Position;
1410 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1411 if (MemOps[i].Position < Position) {
1412 Position = MemOps[i].Position;
1413 Loc = MemOps[i].MBBI;
1417 if (Loc != MBB.begin())
1418 RS->forward(std::prev(Loc));
1421 static void InsertLDR_STR(MachineBasicBlock &MBB,
1422 MachineBasicBlock::iterator &MBBI,
1423 int Offset, bool isDef,
1424 DebugLoc dl, unsigned NewOpc,
1425 unsigned Reg, bool RegDeadKill, bool RegUndef,
1426 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1427 bool OffKill, bool OffUndef,
1428 ARMCC::CondCodes Pred, unsigned PredReg,
1429 const TargetInstrInfo *TII, bool isT2) {
1431 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1433 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1434 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1435 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1437 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1439 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1440 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1441 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1445 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1446 MachineBasicBlock::iterator &MBBI) {
1447 MachineInstr *MI = &*MBBI;
1448 unsigned Opcode = MI->getOpcode();
1449 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1450 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1451 const MachineOperand &BaseOp = MI->getOperand(2);
1452 unsigned BaseReg = BaseOp.getReg();
1453 unsigned EvenReg = MI->getOperand(0).getReg();
1454 unsigned OddReg = MI->getOperand(1).getReg();
1455 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1456 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1457 // ARM errata 602117: LDRD with base in list may result in incorrect base
1458 // register when interrupted or faulted.
1459 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1460 if (!Errata602117 &&
1461 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1464 MachineBasicBlock::iterator NewBBI = MBBI;
1465 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1466 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1467 bool EvenDeadKill = isLd ?
1468 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1469 bool EvenUndef = MI->getOperand(0).isUndef();
1470 bool OddDeadKill = isLd ?
1471 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1472 bool OddUndef = MI->getOperand(1).isUndef();
1473 bool BaseKill = BaseOp.isKill();
1474 bool BaseUndef = BaseOp.isUndef();
1475 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1476 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1477 int OffImm = getMemoryOpOffset(MI);
1478 unsigned PredReg = 0;
1479 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1481 if (OddRegNum > EvenRegNum && OffImm == 0) {
1482 // Ascending register numbers and no offset. It's safe to change it to a
1484 unsigned NewOpc = (isLd)
1485 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1486 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1488 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1489 .addReg(BaseReg, getKillRegState(BaseKill))
1490 .addImm(Pred).addReg(PredReg)
1491 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1492 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1495 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1496 .addReg(BaseReg, getKillRegState(BaseKill))
1497 .addImm(Pred).addReg(PredReg)
1499 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1501 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1504 NewBBI = std::prev(MBBI);
1506 // Split into two instructions.
1507 unsigned NewOpc = (isLd)
1508 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1509 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1510 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1511 // so adjust and use t2LDRi12 here for that.
1512 unsigned NewOpc2 = (isLd)
1513 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1514 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1515 DebugLoc dl = MBBI->getDebugLoc();
1516 // If this is a load and base register is killed, it may have been
1517 // re-defed by the load, make sure the first load does not clobber it.
1519 (BaseKill || OffKill) &&
1520 (TRI->regsOverlap(EvenReg, BaseReg))) {
1521 assert(!TRI->regsOverlap(OddReg, BaseReg));
1522 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1523 OddReg, OddDeadKill, false,
1524 BaseReg, false, BaseUndef, false, OffUndef,
1525 Pred, PredReg, TII, isT2);
1526 NewBBI = std::prev(MBBI);
1527 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1528 EvenReg, EvenDeadKill, false,
1529 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1530 Pred, PredReg, TII, isT2);
1532 if (OddReg == EvenReg && EvenDeadKill) {
1533 // If the two source operands are the same, the kill marker is
1534 // probably on the first one. e.g.
1535 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1536 EvenDeadKill = false;
1539 // Never kill the base register in the first instruction.
1540 if (EvenReg == BaseReg)
1541 EvenDeadKill = false;
1542 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1543 EvenReg, EvenDeadKill, EvenUndef,
1544 BaseReg, false, BaseUndef, false, OffUndef,
1545 Pred, PredReg, TII, isT2);
1546 NewBBI = std::prev(MBBI);
1547 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1548 OddReg, OddDeadKill, OddUndef,
1549 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1550 Pred, PredReg, TII, isT2);
1565 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1566 /// ops of the same base and incrementing offset into LDM / STM ops.
1567 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1568 unsigned NumMerges = 0;
1569 unsigned NumMemOps = 0;
1571 unsigned CurrBase = 0;
1573 unsigned CurrSize = 0;
1574 ARMCC::CondCodes CurrPred = ARMCC::AL;
1575 unsigned CurrPredReg = 0;
1576 unsigned Position = 0;
1577 SmallVector<MachineBasicBlock::iterator,4> Merges;
1579 RS->enterBasicBlock(&MBB);
1580 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1582 if (FixInvalidRegPairOp(MBB, MBBI))
1585 bool Advance = false;
1586 bool TryMerge = false;
1587 bool Clobber = false;
1589 bool isMemOp = isMemoryOp(MBBI);
1591 int Opcode = MBBI->getOpcode();
1592 unsigned Size = getLSMultipleTransferSize(MBBI);
1593 const MachineOperand &MO = MBBI->getOperand(0);
1594 unsigned Reg = MO.getReg();
1595 bool isKill = MO.isDef() ? false : MO.isKill();
1596 unsigned Base = MBBI->getOperand(1).getReg();
1597 unsigned PredReg = 0;
1598 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1599 int Offset = getMemoryOpOffset(MBBI);
1602 // r5 := ldr [r5, #4]
1603 // r6 := ldr [r5, #8]
1605 // The second ldr has effectively broken the chain even though it
1606 // looks like the later ldr(s) use the same base register. Try to
1607 // merge the ldr's so far, including this one. But don't try to
1608 // combine the following ldr(s).
1609 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1612 // r4 := ldr [r0, #8]
1613 // r4 := ldr [r0, #4]
1615 // The optimization may reorder the second ldr in front of the first
1616 // ldr, which violates write after write(WAW) dependence. The same as
1617 // str. Try to merge inst(s) already in MemOps.
1618 bool Overlap = false;
1619 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1620 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1626 if (CurrBase == 0 && !Clobber) {
1627 // Start of a new chain.
1632 CurrPredReg = PredReg;
1633 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1636 } else if (!Overlap) {
1642 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1643 // No need to match PredReg.
1644 // Continue adding to the queue.
1645 if (Offset > MemOps.back().Offset) {
1646 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1651 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1653 if (Offset < I->Offset) {
1654 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1659 } else if (Offset == I->Offset) {
1660 // Collision! This can't be merged!
1669 if (MBBI->isDebugValue()) {
1672 // Reach the end of the block, try merging the memory instructions.
1674 } else if (Advance) {
1678 // Reach the end of the block, try merging the memory instructions.
1685 if (NumMemOps > 1) {
1686 // Try to find a free register to use as a new base in case it's needed.
1687 // First advance to the instruction just before the start of the chain.
1688 AdvanceRS(MBB, MemOps);
1690 // Find a scratch register.
1692 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
1694 // Process the load / store instructions.
1695 RS->forward(std::prev(MBBI));
1699 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1700 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1702 // Try folding preceding/trailing base inc/dec into the generated
1704 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1705 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1707 NumMerges += Merges.size();
1709 // Try folding preceding/trailing base inc/dec into those load/store
1710 // that were not merged to form LDM/STM ops.
1711 for (unsigned i = 0; i != NumMemOps; ++i)
1712 if (!MemOps[i].Merged)
1713 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1716 // RS may be pointing to an instruction that's deleted.
1717 RS->skipTo(std::prev(MBBI));
1718 } else if (NumMemOps == 1) {
1719 // Try folding preceding/trailing base inc/dec into the single
1721 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1723 RS->forward(std::prev(MBBI));
1730 CurrPred = ARMCC::AL;
1737 // If iterator hasn't been advanced and this is not a memory op, skip it.
1738 // It can't start a new chain anyway.
1739 if (!Advance && !isMemOp && MBBI != E) {
1745 return NumMerges > 0;
1748 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1749 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1750 /// directly restore the value of LR into pc.
1751 /// ldmfd sp!, {..., lr}
1754 /// ldmfd sp!, {..., lr}
1757 /// ldmfd sp!, {..., pc}
1758 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1759 // Thumb1 LDM doesn't allow high registers.
1760 if (isThumb1) return false;
1761 if (MBB.empty()) return false;
1763 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1764 if (MBBI != MBB.begin() &&
1765 (MBBI->getOpcode() == ARM::BX_RET ||
1766 MBBI->getOpcode() == ARM::tBX_RET ||
1767 MBBI->getOpcode() == ARM::MOVPCLR)) {
1768 MachineInstr *PrevMI = std::prev(MBBI);
1769 unsigned Opcode = PrevMI->getOpcode();
1770 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1771 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1772 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1773 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1774 if (MO.getReg() != ARM::LR)
1776 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1777 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1778 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1779 PrevMI->setDesc(TII->get(NewOpc));
1781 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1789 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1790 const TargetMachine &TM = Fn.getTarget();
1791 TL = TM.getSubtargetImpl()->getTargetLowering();
1792 AFI = Fn.getInfo<ARMFunctionInfo>();
1793 TII = TM.getSubtargetImpl()->getInstrInfo();
1794 TRI = TM.getSubtargetImpl()->getRegisterInfo();
1795 STI = &TM.getSubtarget<ARMSubtarget>();
1796 RS = new RegScavenger();
1797 isThumb2 = AFI->isThumb2Function();
1798 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1800 bool Modified = false;
1801 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1803 MachineBasicBlock &MBB = *MFI;
1804 Modified |= LoadStoreMultipleOpti(MBB);
1805 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1806 Modified |= MergeReturnIntoLDM(MBB);
1814 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1815 /// load / stores from consecutive locations close to make it more
1816 /// likely they will be combined later.
1819 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1821 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1823 const DataLayout *TD;
1824 const TargetInstrInfo *TII;
1825 const TargetRegisterInfo *TRI;
1826 const ARMSubtarget *STI;
1827 MachineRegisterInfo *MRI;
1828 MachineFunction *MF;
1830 bool runOnMachineFunction(MachineFunction &Fn) override;
1832 const char *getPassName() const override {
1833 return "ARM pre- register allocation load / store optimization pass";
1837 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1838 unsigned &NewOpc, unsigned &EvenReg,
1839 unsigned &OddReg, unsigned &BaseReg,
1841 unsigned &PredReg, ARMCC::CondCodes &Pred,
1843 bool RescheduleOps(MachineBasicBlock *MBB,
1844 SmallVectorImpl<MachineInstr *> &Ops,
1845 unsigned Base, bool isLd,
1846 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1847 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1849 char ARMPreAllocLoadStoreOpt::ID = 0;
1852 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1853 TD = Fn.getSubtarget().getDataLayout();
1854 TII = Fn.getSubtarget().getInstrInfo();
1855 TRI = Fn.getSubtarget().getRegisterInfo();
1856 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1857 MRI = &Fn.getRegInfo();
1860 bool Modified = false;
1861 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1863 Modified |= RescheduleLoadStoreInstrs(MFI);
1868 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1869 MachineBasicBlock::iterator I,
1870 MachineBasicBlock::iterator E,
1871 SmallPtrSetImpl<MachineInstr*> &MemOps,
1872 SmallSet<unsigned, 4> &MemRegs,
1873 const TargetRegisterInfo *TRI) {
1874 // Are there stores / loads / calls between them?
1875 // FIXME: This is overly conservative. We should make use of alias information
1877 SmallSet<unsigned, 4> AddedRegPressure;
1879 if (I->isDebugValue() || MemOps.count(&*I))
1881 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1883 if (isLd && I->mayStore())
1888 // It's not safe to move the first 'str' down.
1891 // str r4, [r0, #+4]
1895 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1896 MachineOperand &MO = I->getOperand(j);
1899 unsigned Reg = MO.getReg();
1900 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1902 if (Reg != Base && !MemRegs.count(Reg))
1903 AddedRegPressure.insert(Reg);
1907 // Estimate register pressure increase due to the transformation.
1908 if (MemRegs.size() <= 4)
1909 // Ok if we are moving small number of instructions.
1911 return AddedRegPressure.size() <= MemRegs.size() * 2;
1915 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1916 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1917 MachineInstr *Op1) {
1918 assert(MI->memoperands_empty() && "expected a new machineinstr");
1919 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1920 + (Op1->memoperands_end() - Op1->memoperands_begin());
1922 MachineFunction *MF = MI->getParent()->getParent();
1923 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1924 MachineSDNode::mmo_iterator MemEnd =
1925 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1927 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1928 MI->setMemRefs(MemBegin, MemEnd);
1932 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1934 unsigned &NewOpc, unsigned &EvenReg,
1935 unsigned &OddReg, unsigned &BaseReg,
1936 int &Offset, unsigned &PredReg,
1937 ARMCC::CondCodes &Pred,
1939 // Make sure we're allowed to generate LDRD/STRD.
1940 if (!STI->hasV5TEOps())
1943 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1945 unsigned Opcode = Op0->getOpcode();
1946 if (Opcode == ARM::LDRi12) {
1948 } else if (Opcode == ARM::STRi12) {
1950 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1951 NewOpc = ARM::t2LDRDi8;
1954 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1955 NewOpc = ARM::t2STRDi8;
1962 // Make sure the base address satisfies i64 ld / st alignment requirement.
1963 // At the moment, we ignore the memoryoperand's value.
1964 // If we want to use AliasAnalysis, we should check it accordingly.
1965 if (!Op0->hasOneMemOperand() ||
1966 (*Op0->memoperands_begin())->isVolatile())
1969 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1970 const Function *Func = MF->getFunction();
1971 unsigned ReqAlign = STI->hasV6Ops()
1972 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1973 : 8; // Pre-v6 need 8-byte align
1974 if (Align < ReqAlign)
1977 // Then make sure the immediate offset fits.
1978 int OffImm = getMemoryOpOffset(Op0);
1980 int Limit = (1 << 8) * Scale;
1981 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1985 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1987 AddSub = ARM_AM::sub;
1990 int Limit = (1 << 8) * Scale;
1991 if (OffImm >= Limit || (OffImm & (Scale-1)))
1993 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1995 EvenReg = Op0->getOperand(0).getReg();
1996 OddReg = Op1->getOperand(0).getReg();
1997 if (EvenReg == OddReg)
1999 BaseReg = Op0->getOperand(1).getReg();
2000 Pred = getInstrPredicate(Op0, PredReg);
2001 dl = Op0->getDebugLoc();
2005 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2006 SmallVectorImpl<MachineInstr *> &Ops,
2007 unsigned Base, bool isLd,
2008 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2009 bool RetVal = false;
2011 // Sort by offset (in reverse order).
2012 std::sort(Ops.begin(), Ops.end(),
2013 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2014 int LOffset = getMemoryOpOffset(LHS);
2015 int ROffset = getMemoryOpOffset(RHS);
2016 assert(LHS == RHS || LOffset != ROffset);
2017 return LOffset > ROffset;
2020 // The loads / stores of the same base are in order. Scan them from first to
2021 // last and check for the following:
2022 // 1. Any def of base.
2024 while (Ops.size() > 1) {
2025 unsigned FirstLoc = ~0U;
2026 unsigned LastLoc = 0;
2027 MachineInstr *FirstOp = nullptr;
2028 MachineInstr *LastOp = nullptr;
2030 unsigned LastOpcode = 0;
2031 unsigned LastBytes = 0;
2032 unsigned NumMove = 0;
2033 for (int i = Ops.size() - 1; i >= 0; --i) {
2034 MachineInstr *Op = Ops[i];
2035 unsigned Loc = MI2LocMap[Op];
2036 if (Loc <= FirstLoc) {
2040 if (Loc >= LastLoc) {
2046 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2047 if (LastOpcode && LSMOpcode != LastOpcode)
2050 int Offset = getMemoryOpOffset(Op);
2051 unsigned Bytes = getLSMultipleTransferSize(Op);
2053 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2056 LastOffset = Offset;
2058 LastOpcode = LSMOpcode;
2059 if (++NumMove == 8) // FIXME: Tune this limit.
2066 SmallPtrSet<MachineInstr*, 4> MemOps;
2067 SmallSet<unsigned, 4> MemRegs;
2068 for (int i = NumMove-1; i >= 0; --i) {
2069 MemOps.insert(Ops[i]);
2070 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2073 // Be conservative, if the instructions are too far apart, don't
2074 // move them. We want to limit the increase of register pressure.
2075 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2077 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2078 MemOps, MemRegs, TRI);
2080 for (unsigned i = 0; i != NumMove; ++i)
2083 // This is the new location for the loads / stores.
2084 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2085 while (InsertPos != MBB->end()
2086 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2089 // If we are moving a pair of loads / stores, see if it makes sense
2090 // to try to allocate a pair of registers that can form register pairs.
2091 MachineInstr *Op0 = Ops.back();
2092 MachineInstr *Op1 = Ops[Ops.size()-2];
2093 unsigned EvenReg = 0, OddReg = 0;
2094 unsigned BaseReg = 0, PredReg = 0;
2095 ARMCC::CondCodes Pred = ARMCC::AL;
2097 unsigned NewOpc = 0;
2100 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2101 EvenReg, OddReg, BaseReg,
2102 Offset, PredReg, Pred, isT2)) {
2106 const MCInstrDesc &MCID = TII->get(NewOpc);
2107 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2108 MRI->constrainRegClass(EvenReg, TRC);
2109 MRI->constrainRegClass(OddReg, TRC);
2111 // Form the pair instruction.
2113 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2114 .addReg(EvenReg, RegState::Define)
2115 .addReg(OddReg, RegState::Define)
2117 // FIXME: We're converting from LDRi12 to an insn that still
2118 // uses addrmode2, so we need an explicit offset reg. It should
2119 // always by reg0 since we're transforming LDRi12s.
2122 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2123 concatenateMemOperands(MIB, Op0, Op1);
2124 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2127 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2131 // FIXME: We're converting from LDRi12 to an insn that still
2132 // uses addrmode2, so we need an explicit offset reg. It should
2133 // always by reg0 since we're transforming STRi12s.
2136 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2137 concatenateMemOperands(MIB, Op0, Op1);
2138 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2144 // Add register allocation hints to form register pairs.
2145 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
2146 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
2148 for (unsigned i = 0; i != NumMove; ++i) {
2149 MachineInstr *Op = Ops.back();
2151 MBB->splice(InsertPos, MBB, Op);
2155 NumLdStMoved += NumMove;
2165 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2166 bool RetVal = false;
2168 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2169 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2170 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2171 SmallVector<unsigned, 4> LdBases;
2172 SmallVector<unsigned, 4> StBases;
2175 MachineBasicBlock::iterator MBBI = MBB->begin();
2176 MachineBasicBlock::iterator E = MBB->end();
2178 for (; MBBI != E; ++MBBI) {
2179 MachineInstr *MI = MBBI;
2180 if (MI->isCall() || MI->isTerminator()) {
2181 // Stop at barriers.
2186 if (!MI->isDebugValue())
2187 MI2LocMap[MI] = ++Loc;
2189 if (!isMemoryOp(MI))
2191 unsigned PredReg = 0;
2192 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2195 int Opc = MI->getOpcode();
2196 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
2197 unsigned Base = MI->getOperand(1).getReg();
2198 int Offset = getMemoryOpOffset(MI);
2200 bool StopHere = false;
2202 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2203 Base2LdsMap.find(Base);
2204 if (BI != Base2LdsMap.end()) {
2205 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2206 if (Offset == getMemoryOpOffset(BI->second[i])) {
2212 BI->second.push_back(MI);
2214 Base2LdsMap[Base].push_back(MI);
2215 LdBases.push_back(Base);
2218 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2219 Base2StsMap.find(Base);
2220 if (BI != Base2StsMap.end()) {
2221 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2222 if (Offset == getMemoryOpOffset(BI->second[i])) {
2228 BI->second.push_back(MI);
2230 Base2StsMap[Base].push_back(MI);
2231 StBases.push_back(Base);
2236 // Found a duplicate (a base+offset combination that's seen earlier).
2243 // Re-schedule loads.
2244 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2245 unsigned Base = LdBases[i];
2246 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2248 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2251 // Re-schedule stores.
2252 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2253 unsigned Base = StBases[i];
2254 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2256 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2260 Base2LdsMap.clear();
2261 Base2StsMap.clear();
2271 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
2272 /// optimization pass.
2273 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2275 return new ARMPreAllocLoadStoreOpt();
2276 return new ARMLoadStoreOpt();