1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "Thumb1RegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterScavenging.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Target/TargetInstrInfo.h"
42 #include "llvm/Target/TargetMachine.h"
43 #include "llvm/Target/TargetRegisterInfo.h"
46 #define DEBUG_TYPE "arm-ldst-opt"
48 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
49 STATISTIC(NumSTMGened , "Number of stm instructions generated");
50 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
51 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
52 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
53 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
54 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
55 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
56 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
57 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
58 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
60 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
61 /// load / store instructions to form ldm / stm instructions.
64 struct ARMLoadStoreOpt : public MachineFunctionPass {
66 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
68 const TargetInstrInfo *TII;
69 const TargetRegisterInfo *TRI;
70 const ARMSubtarget *STI;
71 const TargetLowering *TL;
74 bool isThumb1, isThumb2;
76 bool runOnMachineFunction(MachineFunction &Fn) override;
78 const char *getPassName() const override {
79 return "ARM load / store optimization pass";
83 struct MemOpQueueEntry {
88 MachineBasicBlock::iterator MBBI;
90 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
91 MachineBasicBlock::iterator i)
92 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
94 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
95 typedef MemOpQueue::iterator MemOpQueueIter;
97 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
98 const MemOpQueue &MemOps, unsigned DefReg,
99 unsigned RangeBegin, unsigned RangeEnd);
100 void UpdateBaseRegUses(MachineBasicBlock &MBB,
101 MachineBasicBlock::iterator MBBI,
102 DebugLoc dl, unsigned Base, unsigned WordOffset,
103 ARMCC::CondCodes Pred, unsigned PredReg);
104 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
105 int Offset, unsigned Base, bool BaseKill, int Opcode,
106 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
108 ArrayRef<std::pair<unsigned, bool> > Regs,
109 ArrayRef<unsigned> ImpDefs);
110 void MergeOpsUpdate(MachineBasicBlock &MBB,
112 unsigned memOpsBegin,
114 unsigned insertAfter,
119 ARMCC::CondCodes Pred,
123 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
124 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
125 int Opcode, unsigned Size,
126 ARMCC::CondCodes Pred, unsigned PredReg,
127 unsigned Scratch, MemOpQueue &MemOps,
128 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
129 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
130 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
131 MachineBasicBlock::iterator &MBBI);
132 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
133 MachineBasicBlock::iterator MBBI,
134 const TargetInstrInfo *TII,
136 MachineBasicBlock::iterator &I);
137 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
138 MachineBasicBlock::iterator MBBI,
140 MachineBasicBlock::iterator &I);
141 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
142 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
144 char ARMLoadStoreOpt::ID = 0;
147 static bool definesCPSR(const MachineInstr *MI) {
148 for (const auto &MO : MI->operands()) {
151 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
152 // If the instruction has live CPSR def, then it's not safe to fold it
153 // into load / store.
160 static int getMemoryOpOffset(const MachineInstr *MI) {
161 int Opcode = MI->getOpcode();
162 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
163 unsigned NumOperands = MI->getDesc().getNumOperands();
164 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
166 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
167 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
168 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
169 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
172 // Thumb1 immediate offsets are scaled by 4
173 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
176 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
177 : ARM_AM::getAM5Offset(OffField) * 4;
178 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
179 : ARM_AM::getAM5Op(OffField);
181 if (Op == ARM_AM::sub)
187 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
189 default: llvm_unreachable("Unhandled opcode!");
193 default: llvm_unreachable("Unhandled submode!");
194 case ARM_AM::ia: return ARM::LDMIA;
195 case ARM_AM::da: return ARM::LDMDA;
196 case ARM_AM::db: return ARM::LDMDB;
197 case ARM_AM::ib: return ARM::LDMIB;
202 default: llvm_unreachable("Unhandled submode!");
203 case ARM_AM::ia: return ARM::STMIA;
204 case ARM_AM::da: return ARM::STMDA;
205 case ARM_AM::db: return ARM::STMDB;
206 case ARM_AM::ib: return ARM::STMIB;
209 // tLDMIA is writeback-only - unless the base register is in the input
213 default: llvm_unreachable("Unhandled submode!");
214 case ARM_AM::ia: return ARM::tLDMIA;
217 // There is no non-writeback tSTMIA either.
220 default: llvm_unreachable("Unhandled submode!");
221 case ARM_AM::ia: return ARM::tSTMIA_UPD;
227 default: llvm_unreachable("Unhandled submode!");
228 case ARM_AM::ia: return ARM::t2LDMIA;
229 case ARM_AM::db: return ARM::t2LDMDB;
235 default: llvm_unreachable("Unhandled submode!");
236 case ARM_AM::ia: return ARM::t2STMIA;
237 case ARM_AM::db: return ARM::t2STMDB;
242 default: llvm_unreachable("Unhandled submode!");
243 case ARM_AM::ia: return ARM::VLDMSIA;
244 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
249 default: llvm_unreachable("Unhandled submode!");
250 case ARM_AM::ia: return ARM::VSTMSIA;
251 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
256 default: llvm_unreachable("Unhandled submode!");
257 case ARM_AM::ia: return ARM::VLDMDIA;
258 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
263 default: llvm_unreachable("Unhandled submode!");
264 case ARM_AM::ia: return ARM::VSTMDIA;
265 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
273 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
275 default: llvm_unreachable("Unhandled opcode!");
282 case ARM::tLDMIA_UPD:
283 case ARM::tSTMIA_UPD:
284 case ARM::t2LDMIA_RET:
286 case ARM::t2LDMIA_UPD:
288 case ARM::t2STMIA_UPD:
290 case ARM::VLDMSIA_UPD:
292 case ARM::VSTMSIA_UPD:
294 case ARM::VLDMDIA_UPD:
296 case ARM::VSTMDIA_UPD:
310 case ARM::t2LDMDB_UPD:
312 case ARM::t2STMDB_UPD:
313 case ARM::VLDMSDB_UPD:
314 case ARM::VSTMSDB_UPD:
315 case ARM::VLDMDDB_UPD:
316 case ARM::VSTMDDB_UPD:
327 } // end namespace ARM_AM
328 } // end namespace llvm
330 static bool isT1i32Load(unsigned Opc) {
331 return Opc == ARM::tLDRi;
334 static bool isT2i32Load(unsigned Opc) {
335 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
338 static bool isi32Load(unsigned Opc) {
339 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
342 static bool isT1i32Store(unsigned Opc) {
343 return Opc == ARM::tSTRi;
346 static bool isT2i32Store(unsigned Opc) {
347 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
350 static bool isi32Store(unsigned Opc) {
351 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
354 static unsigned getImmScale(unsigned Opc) {
356 default: llvm_unreachable("Unhandled opcode!");
369 /// Update future uses of the base register with the offset introduced
370 /// due to writeback. This function only works on Thumb1.
372 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
373 MachineBasicBlock::iterator MBBI,
374 DebugLoc dl, unsigned Base,
376 ARMCC::CondCodes Pred, unsigned PredReg) {
377 assert(isThumb1 && "Can only update base register uses for Thumb1!");
378 // Start updating any instructions with immediate offsets. Insert a SUB before
379 // the first non-updateable instruction (if any).
380 for (; MBBI != MBB.end(); ++MBBI) {
381 bool InsertSub = false;
382 unsigned Opc = MBBI->getOpcode();
384 if (MBBI->readsRegister(Base)) {
387 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
389 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
391 if (IsLoad || IsStore) {
392 // Loads and stores with immediate offsets can be updated, but only if
393 // the new offset isn't negative.
394 // The MachineOperand containing the offset immediate is the last one
395 // before predicates.
397 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
398 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
399 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
401 // If storing the base register, it needs to be reset first.
402 unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
404 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
409 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
410 !definesCPSR(MBBI)) {
411 // SUBS/ADDS using this register, with a dead def of the CPSR.
412 // Merge it with the update; if the merged offset is too large,
413 // insert a new sub instead.
415 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
416 Offset = (Opc == ARM::tSUBi8) ?
417 MO.getImm() + WordOffset * 4 :
418 MO.getImm() - WordOffset * 4 ;
419 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
420 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
423 // The base register has now been reset, so exit early.
430 // Can't update the instruction.
434 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
435 // Since SUBS sets the condition flags, we can't place the base reset
436 // after an instruction that has a live CPSR def.
437 // The base register might also contain an argument for a function call.
442 // An instruction above couldn't be updated, so insert a sub.
443 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
444 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
445 .addImm(Pred).addReg(PredReg);
449 if (MBBI->killsRegister(Base))
450 // Register got killed. Stop updating.
454 // End of block was reached.
455 if (MBB.succ_size() > 0) {
456 // FIXME: Because of a bug, live registers are sometimes missing from
457 // the successor blocks' live-in sets. This means we can't trust that
458 // information and *always* have to reset at the end of a block.
460 if (MBBI != MBB.end()) --MBBI;
462 BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
463 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
464 .addImm(Pred).addReg(PredReg);
468 /// MergeOps - Create and insert a LDM or STM with Base as base register and
469 /// registers in Regs as the register operands that would be loaded / stored.
470 /// It returns true if the transformation is done.
472 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
473 MachineBasicBlock::iterator MBBI,
474 int Offset, unsigned Base, bool BaseKill,
475 int Opcode, ARMCC::CondCodes Pred,
476 unsigned PredReg, unsigned Scratch, DebugLoc dl,
477 ArrayRef<std::pair<unsigned, bool> > Regs,
478 ArrayRef<unsigned> ImpDefs) {
479 // Only a single register to load / store. Don't bother.
480 unsigned NumRegs = Regs.size();
484 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
485 // Compute liveness information for that register to make the decision.
486 bool SafeToClobberCPSR = !isThumb1 ||
487 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
488 MachineBasicBlock::LQR_Dead);
490 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
492 // Exception: If the base register is in the input reglist, Thumb1 LDM is
494 // It's also not possible to merge an STR of the base register in Thumb1.
496 for (unsigned I = 0; I < NumRegs; ++I)
497 if (Base == Regs[I].first) {
498 if (Opcode == ARM::tLDRi) {
501 } else if (Opcode == ARM::tSTRi) {
506 ARM_AM::AMSubMode Mode = ARM_AM::ia;
507 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
508 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
509 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
511 if (Offset == 4 && haveIBAndDA) {
513 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
515 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
516 // VLDM/VSTM do not support DB mode without also updating the base reg.
518 } else if (Offset != 0) {
519 // Check if this is a supported opcode before inserting instructions to
520 // calculate a new base register.
521 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
523 // If starting offset isn't zero, insert a MI to materialize a new base.
524 // But only do so if it is cost effective, i.e. merging more than two
529 // On Thumb1, it's not worth materializing a new base register without
530 // clobbering the CPSR (i.e. not using ADDS/SUBS).
531 if (!SafeToClobberCPSR)
535 if (isi32Load(Opcode)) {
536 // If it is a load, then just use one of the destination register to
537 // use as the new base.
538 NewBase = Regs[NumRegs-1].first;
540 // Use the scratch register to use as a new base.
547 isThumb2 ? ARM::t2ADDri :
548 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
549 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
554 isThumb2 ? ARM::t2SUBri :
555 (isThumb1 && Offset < 8) ? ARM::tSUBi3 :
556 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
559 if (!TL->isLegalAddImmediate(Offset))
560 // FIXME: Try add with register operand?
561 return false; // Probably not worth it then.
564 // Thumb1: depending on immediate size, use either
565 // ADDS NewBase, Base, #imm3
568 // ADDS NewBase, #imm8.
569 if (Base != NewBase && Offset >= 8) {
570 // Need to insert a MOV to the new base first.
571 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
573 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
574 if (Pred != ARMCC::AL)
576 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
577 .addReg(Base, getKillRegState(BaseKill));
579 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
580 .addReg(Base, getKillRegState(BaseKill))
581 .addImm(Pred).addReg(PredReg);
583 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
587 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
588 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
589 .addImm(Pred).addReg(PredReg);
591 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
592 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
593 .addImm(Pred).addReg(PredReg).addReg(0);
596 BaseKill = true; // New base is always killed straight away.
599 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
600 Opcode == ARM::VLDRD);
602 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
603 // base register writeback.
604 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
605 if (!Opcode) return false;
607 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
608 // - There is no writeback (LDM of base register),
609 // - the base register is killed by the merged instruction,
610 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
611 // to reset the base register.
612 // Otherwise, don't merge.
613 // It's safe to return here since the code to materialize a new base register
614 // above is also conditional on SafeToClobberCPSR.
615 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
618 MachineInstrBuilder MIB;
621 if (Opcode == ARM::tLDMIA)
622 // Update tLDMIA with writeback if necessary.
623 Opcode = ARM::tLDMIA_UPD;
625 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
627 // Thumb1: we might need to set base writeback when building the MI.
628 MIB.addReg(Base, getDefRegState(true))
629 .addReg(Base, getKillRegState(BaseKill));
631 // The base isn't dead after a merged instruction with writeback.
632 // Insert a sub instruction after the newly formed instruction to reset.
634 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
637 // No writeback, simply build the MachineInstr.
638 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
639 MIB.addReg(Base, getKillRegState(BaseKill));
642 MIB.addImm(Pred).addReg(PredReg);
644 for (unsigned i = 0; i != NumRegs; ++i)
645 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
646 | getKillRegState(Regs[i].second));
648 // Add implicit defs for super-registers.
649 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
650 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
655 /// \brief Find all instructions using a given imp-def within a range.
657 /// We are trying to combine a range of instructions, one of which (located at
658 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
659 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
660 /// and RangeEnd must be modified to use an undefined value.
662 /// The live range continues until we find a second definition or one of the
663 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
664 /// we must consider all uses and decide which are relevant in a second pass.
665 void ARMLoadStoreOpt::findUsesOfImpDef(
666 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
667 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
668 std::map<unsigned, MachineOperand *> Uses;
669 unsigned LastLivePos = RangeEnd;
671 // First we find all uses of this register with Position between RangeBegin
672 // and RangeEnd, any or all of these could be uses of a definition at
673 // RangeBegin. We also record the latest position a definition at RangeBegin
674 // would be considered live.
675 for (unsigned i = 0; i < MemOps.size(); ++i) {
676 MachineInstr &MI = *MemOps[i].MBBI;
677 unsigned MIPosition = MemOps[i].Position;
678 if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
681 // If this instruction defines the register, then any later use will be of
682 // that definition rather than ours.
683 if (MI.definesRegister(DefReg))
684 LastLivePos = std::min(LastLivePos, MIPosition);
686 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
690 // If this instruction kills the register then (assuming liveness is
691 // correct when we start) we don't need to think about anything after here.
693 LastLivePos = std::min(LastLivePos, MIPosition);
695 Uses[MIPosition] = UseOp;
698 // Now we traverse the list of all uses, and append the ones that actually use
699 // our definition to the requested list.
700 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
703 // List is sorted by position so once we've found one out of range there
704 // will be no more to consider.
705 if (I->first > LastLivePos)
707 UsesOfImpDefs.push_back(I->second);
711 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
713 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
715 unsigned memOpsBegin, unsigned memOpsEnd,
716 unsigned insertAfter, int Offset,
717 unsigned Base, bool BaseKill,
719 ARMCC::CondCodes Pred, unsigned PredReg,
722 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
723 // First calculate which of the registers should be killed by the merged
725 const unsigned insertPos = memOps[insertAfter].Position;
726 SmallSet<unsigned, 4> KilledRegs;
727 DenseMap<unsigned, unsigned> Killer;
728 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
729 if (i == memOpsBegin) {
734 if (memOps[i].Position < insertPos && memOps[i].isKill) {
735 unsigned Reg = memOps[i].Reg;
736 KilledRegs.insert(Reg);
741 SmallVector<std::pair<unsigned, bool>, 8> Regs;
742 SmallVector<unsigned, 8> ImpDefs;
743 SmallVector<MachineOperand *, 8> UsesOfImpDefs;
744 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
745 unsigned Reg = memOps[i].Reg;
746 // If we are inserting the merged operation after an operation that
747 // uses the same register, make sure to transfer any kill flag.
748 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
749 Regs.push_back(std::make_pair(Reg, isKill));
751 // Collect any implicit defs of super-registers. They must be preserved.
752 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
753 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
755 unsigned DefReg = MO->getReg();
756 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
757 ImpDefs.push_back(DefReg);
759 // There may be other uses of the definition between this instruction and
760 // the eventual LDM/STM position. These should be marked undef if the
761 // merge takes place.
762 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
767 // Try to do the merge.
768 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
770 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
771 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
774 // Merge succeeded, update records.
775 Merges.push_back(std::prev(Loc));
777 // In gathering loads together, we may have moved the imp-def of a register
778 // past one of its uses. This is OK, since we know better than the rest of
779 // LLVM what's OK with ARM loads and stores; but we still have to adjust the
781 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
782 E = UsesOfImpDefs.end();
786 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
787 // Remove kill flags from any memops that come before insertPos.
788 if (Regs[i-memOpsBegin].second) {
789 unsigned Reg = Regs[i-memOpsBegin].first;
790 if (KilledRegs.count(Reg)) {
791 unsigned j = Killer[Reg];
792 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
793 assert(Idx >= 0 && "Cannot find killing operand");
794 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
795 memOps[j].isKill = false;
797 memOps[i].isKill = true;
799 MBB.erase(memOps[i].MBBI);
800 // Update this memop to refer to the merged instruction.
801 // We may need to move kill flags again.
802 memOps[i].Merged = true;
803 memOps[i].MBBI = Merges.back();
804 memOps[i].Position = insertPos;
807 // Update memOps offsets, since they may have been modified by MergeOps.
808 for (auto &MemOp : memOps) {
809 MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
813 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
814 /// load / store multiple instructions.
816 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
817 unsigned Base, int Opcode, unsigned Size,
818 ARMCC::CondCodes Pred, unsigned PredReg,
819 unsigned Scratch, MemOpQueue &MemOps,
820 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
821 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
822 int Offset = MemOps[SIndex].Offset;
823 int SOffset = Offset;
824 unsigned insertAfter = SIndex;
825 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
826 DebugLoc dl = Loc->getDebugLoc();
827 const MachineOperand &PMO = Loc->getOperand(0);
828 unsigned PReg = PMO.getReg();
829 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
831 unsigned Limit = ~0U;
832 bool BaseKill = false;
833 // vldm / vstm limit are 32 for S variants, 16 for D variants.
851 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
852 int NewOffset = MemOps[i].Offset;
853 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
854 unsigned Reg = MO.getReg();
855 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
856 // Register numbers must be in ascending order. For VFP / NEON load and
857 // store multiples, the registers must also be consecutive and within the
858 // limit on the number of registers per instruction.
859 if (Reg != ARM::SP &&
860 NewOffset == Offset + (int)Size &&
861 ((isNotVFP && RegNum > PRegNum) ||
862 ((Count < Limit) && RegNum == PRegNum+1)) &&
863 // On Swift we don't want vldm/vstm to start with a odd register num
864 // because Q register unaligned vldm/vstm need more uops.
865 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
870 // Can't merge this in. Try merge the earlier ones first.
871 // We need to compute BaseKill here because the MemOps may have been
873 BaseKill = Loc->killsRegister(Base);
875 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
876 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
877 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
882 if (MemOps[i].Position > MemOps[insertAfter].Position) {
884 Loc = MemOps[i].MBBI;
888 BaseKill = Loc->killsRegister(Base);
889 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
890 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
893 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
894 unsigned Bytes, unsigned Limit,
895 ARMCC::CondCodes Pred, unsigned PredReg) {
896 unsigned MyPredReg = 0;
900 bool CheckCPSRDef = false;
901 switch (MI->getOpcode()) {
902 default: return false;
912 // Make sure the offset fits in 8 bits.
913 if (Bytes == 0 || (Limit && Bytes >= Limit))
916 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
917 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
918 if (!(MI->getOperand(0).getReg() == Base &&
919 MI->getOperand(1).getReg() == Base &&
920 (MI->getOperand(2).getImm() * Scale) == Bytes &&
921 getInstrPredicate(MI, MyPredReg) == Pred &&
922 MyPredReg == PredReg))
925 return CheckCPSRDef ? !definesCPSR(MI) : true;
928 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
929 unsigned Bytes, unsigned Limit,
930 ARMCC::CondCodes Pred, unsigned PredReg) {
931 unsigned MyPredReg = 0;
935 bool CheckCPSRDef = false;
936 switch (MI->getOpcode()) {
937 default: return false;
947 if (Bytes == 0 || (Limit && Bytes >= Limit))
948 // Make sure the offset fits in 8 bits.
951 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
952 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
953 if (!(MI->getOperand(0).getReg() == Base &&
954 MI->getOperand(1).getReg() == Base &&
955 (MI->getOperand(2).getImm() * Scale) == Bytes &&
956 getInstrPredicate(MI, MyPredReg) == Pred &&
957 MyPredReg == PredReg))
960 return CheckCPSRDef ? !definesCPSR(MI) : true;
963 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
964 switch (MI->getOpcode()) {
989 case ARM::tLDMIA_UPD:
990 case ARM::tSTMIA_UPD:
997 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
1000 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
1004 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1005 ARM_AM::AMSubMode Mode) {
1007 default: llvm_unreachable("Unhandled opcode!");
1013 default: llvm_unreachable("Unhandled submode!");
1014 case ARM_AM::ia: return ARM::LDMIA_UPD;
1015 case ARM_AM::ib: return ARM::LDMIB_UPD;
1016 case ARM_AM::da: return ARM::LDMDA_UPD;
1017 case ARM_AM::db: return ARM::LDMDB_UPD;
1024 default: llvm_unreachable("Unhandled submode!");
1025 case ARM_AM::ia: return ARM::STMIA_UPD;
1026 case ARM_AM::ib: return ARM::STMIB_UPD;
1027 case ARM_AM::da: return ARM::STMDA_UPD;
1028 case ARM_AM::db: return ARM::STMDB_UPD;
1033 default: llvm_unreachable("Unhandled submode!");
1034 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1035 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1040 default: llvm_unreachable("Unhandled submode!");
1041 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1042 case ARM_AM::db: return ARM::t2STMDB_UPD;
1046 default: llvm_unreachable("Unhandled submode!");
1047 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1048 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1052 default: llvm_unreachable("Unhandled submode!");
1053 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1054 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1058 default: llvm_unreachable("Unhandled submode!");
1059 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1060 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1064 default: llvm_unreachable("Unhandled submode!");
1065 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1066 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1071 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
1072 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1074 /// stmia rn, <ra, rb, rc>
1075 /// rn := rn + 4 * 3;
1077 /// stmia rn!, <ra, rb, rc>
1079 /// rn := rn - 4 * 3;
1080 /// ldmia rn, <ra, rb, rc>
1082 /// ldmdb rn!, <ra, rb, rc>
1083 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
1084 MachineBasicBlock::iterator MBBI,
1086 MachineBasicBlock::iterator &I) {
1087 // Thumb1 is already using updating loads/stores.
1088 if (isThumb1) return false;
1090 MachineInstr *MI = MBBI;
1091 unsigned Base = MI->getOperand(0).getReg();
1092 bool BaseKill = MI->getOperand(0).isKill();
1093 unsigned Bytes = getLSMultipleTransferSize(MI);
1094 unsigned PredReg = 0;
1095 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1096 int Opcode = MI->getOpcode();
1097 DebugLoc dl = MI->getDebugLoc();
1099 // Can't use an updating ld/st if the base register is also a dest
1100 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1101 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1102 if (MI->getOperand(i).getReg() == Base)
1105 bool DoMerge = false;
1106 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
1108 // Try merging with the previous instruction.
1109 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1110 if (MBBI != BeginMBBI) {
1111 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1112 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1114 if (Mode == ARM_AM::ia &&
1115 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1118 } else if (Mode == ARM_AM::ib &&
1119 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1124 MBB.erase(PrevMBBI);
1127 // Try merging with the next instruction.
1128 MachineBasicBlock::iterator EndMBBI = MBB.end();
1129 if (!DoMerge && MBBI != EndMBBI) {
1130 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1131 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1133 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
1134 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1136 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
1137 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1141 if (NextMBBI == I) {
1145 MBB.erase(NextMBBI);
1152 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1153 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1154 .addReg(Base, getDefRegState(true)) // WB base register
1155 .addReg(Base, getKillRegState(BaseKill))
1156 .addImm(Pred).addReg(PredReg);
1158 // Transfer the rest of operands.
1159 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1160 MIB.addOperand(MI->getOperand(OpNum));
1162 // Transfer memoperands.
1163 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1169 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1170 ARM_AM::AddrOpc Mode) {
1173 return ARM::LDR_PRE_IMM;
1175 return ARM::STR_PRE_IMM;
1177 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1179 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1181 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1183 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1186 return ARM::t2LDR_PRE;
1189 return ARM::t2STR_PRE;
1190 default: llvm_unreachable("Unhandled opcode!");
1194 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1195 ARM_AM::AddrOpc Mode) {
1198 return ARM::LDR_POST_IMM;
1200 return ARM::STR_POST_IMM;
1202 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1204 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1206 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1208 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1211 return ARM::t2LDR_POST;
1214 return ARM::t2STR_POST;
1215 default: llvm_unreachable("Unhandled opcode!");
1219 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
1220 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1221 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
1222 MachineBasicBlock::iterator MBBI,
1223 const TargetInstrInfo *TII,
1225 MachineBasicBlock::iterator &I) {
1226 // Thumb1 doesn't have updating LDR/STR.
1227 // FIXME: Use LDM/STM with single register instead.
1228 if (isThumb1) return false;
1230 MachineInstr *MI = MBBI;
1231 unsigned Base = MI->getOperand(1).getReg();
1232 bool BaseKill = MI->getOperand(1).isKill();
1233 unsigned Bytes = getLSMultipleTransferSize(MI);
1234 int Opcode = MI->getOpcode();
1235 DebugLoc dl = MI->getDebugLoc();
1236 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1237 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1238 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1239 if (isi32Load(Opcode) || isi32Store(Opcode))
1240 if (MI->getOperand(2).getImm() != 0)
1242 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1245 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
1246 // Can't do the merge if the destination register is the same as the would-be
1247 // writeback register.
1248 if (MI->getOperand(0).getReg() == Base)
1251 unsigned PredReg = 0;
1252 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1253 bool DoMerge = false;
1254 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1255 unsigned NewOpc = 0;
1256 // AM2 - 12 bits, thumb2 - 8 bits.
1257 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1259 // Try merging with the previous instruction.
1260 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1261 if (MBBI != BeginMBBI) {
1262 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1263 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1265 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1267 AddSub = ARM_AM::sub;
1268 } else if (!isAM5 &&
1269 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1273 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1274 MBB.erase(PrevMBBI);
1278 // Try merging with the next instruction.
1279 MachineBasicBlock::iterator EndMBBI = MBB.end();
1280 if (!DoMerge && MBBI != EndMBBI) {
1281 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1282 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1285 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1287 AddSub = ARM_AM::sub;
1288 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1292 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1293 if (NextMBBI == I) {
1297 MBB.erase(NextMBBI);
1305 // VLDM[SD]_UPD, VSTM[SD]_UPD
1306 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1307 // updating load/store-multiple instructions can be used with only one
1309 MachineOperand &MO = MI->getOperand(0);
1310 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1311 .addReg(Base, getDefRegState(true)) // WB base register
1312 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1313 .addImm(Pred).addReg(PredReg)
1314 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1315 getKillRegState(MO.isKill())));
1318 // LDR_PRE, LDR_POST
1319 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1320 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1321 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1322 .addReg(Base, RegState::Define)
1323 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1325 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1326 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1327 .addReg(Base, RegState::Define)
1328 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1331 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1332 // t2LDR_PRE, t2LDR_POST
1333 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1334 .addReg(Base, RegState::Define)
1335 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1338 MachineOperand &MO = MI->getOperand(0);
1339 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1340 // the vestigal zero-reg offset register. When that's fixed, this clause
1341 // can be removed entirely.
1342 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1343 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1344 // STR_PRE, STR_POST
1345 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1346 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1347 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1349 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1350 // t2STR_PRE, t2STR_POST
1351 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1352 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1353 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1361 /// isMemoryOp - Returns true if instruction is a memory operation that this
1362 /// pass is capable of operating on.
1363 static bool isMemoryOp(const MachineInstr *MI) {
1364 // When no memory operands are present, conservatively assume unaligned,
1365 // volatile, unfoldable.
1366 if (!MI->hasOneMemOperand())
1369 const MachineMemOperand *MMO = *MI->memoperands_begin();
1371 // Don't touch volatile memory accesses - we may be changing their order.
1372 if (MMO->isVolatile())
1375 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1377 if (MMO->getAlignment() < 4)
1380 // str <undef> could probably be eliminated entirely, but for now we just want
1381 // to avoid making a mess of it.
1382 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1383 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1384 MI->getOperand(0).isUndef())
1387 // Likewise don't mess with references to undefined addresses.
1388 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1389 MI->getOperand(1).isUndef())
1392 int Opcode = MI->getOpcode();
1397 return MI->getOperand(1).isReg();
1400 return MI->getOperand(1).isReg();
1409 return MI->getOperand(1).isReg();
1414 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1415 /// op that is being merged.
1416 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1417 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1418 unsigned Position = MemOps[0].Position;
1419 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1420 if (MemOps[i].Position < Position) {
1421 Position = MemOps[i].Position;
1422 Loc = MemOps[i].MBBI;
1426 if (Loc != MBB.begin())
1427 RS->forward(std::prev(Loc));
1430 static void InsertLDR_STR(MachineBasicBlock &MBB,
1431 MachineBasicBlock::iterator &MBBI,
1432 int Offset, bool isDef,
1433 DebugLoc dl, unsigned NewOpc,
1434 unsigned Reg, bool RegDeadKill, bool RegUndef,
1435 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1436 bool OffKill, bool OffUndef,
1437 ARMCC::CondCodes Pred, unsigned PredReg,
1438 const TargetInstrInfo *TII, bool isT2) {
1440 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1442 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1443 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1444 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1446 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1448 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1449 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1450 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1454 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1455 MachineBasicBlock::iterator &MBBI) {
1456 MachineInstr *MI = &*MBBI;
1457 unsigned Opcode = MI->getOpcode();
1458 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1459 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1460 const MachineOperand &BaseOp = MI->getOperand(2);
1461 unsigned BaseReg = BaseOp.getReg();
1462 unsigned EvenReg = MI->getOperand(0).getReg();
1463 unsigned OddReg = MI->getOperand(1).getReg();
1464 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1465 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1466 // ARM errata 602117: LDRD with base in list may result in incorrect base
1467 // register when interrupted or faulted.
1468 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1469 if (!Errata602117 &&
1470 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1473 MachineBasicBlock::iterator NewBBI = MBBI;
1474 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1475 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1476 bool EvenDeadKill = isLd ?
1477 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1478 bool EvenUndef = MI->getOperand(0).isUndef();
1479 bool OddDeadKill = isLd ?
1480 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1481 bool OddUndef = MI->getOperand(1).isUndef();
1482 bool BaseKill = BaseOp.isKill();
1483 bool BaseUndef = BaseOp.isUndef();
1484 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1485 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1486 int OffImm = getMemoryOpOffset(MI);
1487 unsigned PredReg = 0;
1488 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1490 if (OddRegNum > EvenRegNum && OffImm == 0) {
1491 // Ascending register numbers and no offset. It's safe to change it to a
1493 unsigned NewOpc = (isLd)
1494 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1495 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1497 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1498 .addReg(BaseReg, getKillRegState(BaseKill))
1499 .addImm(Pred).addReg(PredReg)
1500 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1501 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1504 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1505 .addReg(BaseReg, getKillRegState(BaseKill))
1506 .addImm(Pred).addReg(PredReg)
1508 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1510 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1513 NewBBI = std::prev(MBBI);
1515 // Split into two instructions.
1516 unsigned NewOpc = (isLd)
1517 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1518 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1519 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1520 // so adjust and use t2LDRi12 here for that.
1521 unsigned NewOpc2 = (isLd)
1522 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1523 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1524 DebugLoc dl = MBBI->getDebugLoc();
1525 // If this is a load and base register is killed, it may have been
1526 // re-defed by the load, make sure the first load does not clobber it.
1528 (BaseKill || OffKill) &&
1529 (TRI->regsOverlap(EvenReg, BaseReg))) {
1530 assert(!TRI->regsOverlap(OddReg, BaseReg));
1531 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1532 OddReg, OddDeadKill, false,
1533 BaseReg, false, BaseUndef, false, OffUndef,
1534 Pred, PredReg, TII, isT2);
1535 NewBBI = std::prev(MBBI);
1536 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1537 EvenReg, EvenDeadKill, false,
1538 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1539 Pred, PredReg, TII, isT2);
1541 if (OddReg == EvenReg && EvenDeadKill) {
1542 // If the two source operands are the same, the kill marker is
1543 // probably on the first one. e.g.
1544 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1545 EvenDeadKill = false;
1548 // Never kill the base register in the first instruction.
1549 if (EvenReg == BaseReg)
1550 EvenDeadKill = false;
1551 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1552 EvenReg, EvenDeadKill, EvenUndef,
1553 BaseReg, false, BaseUndef, false, OffUndef,
1554 Pred, PredReg, TII, isT2);
1555 NewBBI = std::prev(MBBI);
1556 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1557 OddReg, OddDeadKill, OddUndef,
1558 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1559 Pred, PredReg, TII, isT2);
1574 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1575 /// ops of the same base and incrementing offset into LDM / STM ops.
1576 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1577 unsigned NumMerges = 0;
1578 unsigned NumMemOps = 0;
1580 unsigned CurrBase = 0;
1582 unsigned CurrSize = 0;
1583 ARMCC::CondCodes CurrPred = ARMCC::AL;
1584 unsigned CurrPredReg = 0;
1585 unsigned Position = 0;
1586 SmallVector<MachineBasicBlock::iterator,4> Merges;
1588 RS->enterBasicBlock(&MBB);
1589 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1591 if (FixInvalidRegPairOp(MBB, MBBI))
1594 bool Advance = false;
1595 bool TryMerge = false;
1596 bool Clobber = false;
1598 bool isMemOp = isMemoryOp(MBBI);
1600 int Opcode = MBBI->getOpcode();
1601 unsigned Size = getLSMultipleTransferSize(MBBI);
1602 const MachineOperand &MO = MBBI->getOperand(0);
1603 unsigned Reg = MO.getReg();
1604 bool isKill = MO.isDef() ? false : MO.isKill();
1605 unsigned Base = MBBI->getOperand(1).getReg();
1606 unsigned PredReg = 0;
1607 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1608 int Offset = getMemoryOpOffset(MBBI);
1611 // r5 := ldr [r5, #4]
1612 // r6 := ldr [r5, #8]
1614 // The second ldr has effectively broken the chain even though it
1615 // looks like the later ldr(s) use the same base register. Try to
1616 // merge the ldr's so far, including this one. But don't try to
1617 // combine the following ldr(s).
1618 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1621 // r4 := ldr [r0, #8]
1622 // r4 := ldr [r0, #4]
1624 // The optimization may reorder the second ldr in front of the first
1625 // ldr, which violates write after write(WAW) dependence. The same as
1626 // str. Try to merge inst(s) already in MemOps.
1627 bool Overlap = false;
1628 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1629 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1635 if (CurrBase == 0 && !Clobber) {
1636 // Start of a new chain.
1641 CurrPredReg = PredReg;
1642 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1645 } else if (!Overlap) {
1651 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1652 // No need to match PredReg.
1653 // Continue adding to the queue.
1654 if (Offset > MemOps.back().Offset) {
1655 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1660 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1662 if (Offset < I->Offset) {
1663 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1668 } else if (Offset == I->Offset) {
1669 // Collision! This can't be merged!
1678 if (MBBI->isDebugValue()) {
1681 // Reach the end of the block, try merging the memory instructions.
1683 } else if (Advance) {
1687 // Reach the end of the block, try merging the memory instructions.
1694 if (NumMemOps > 1) {
1695 // Try to find a free register to use as a new base in case it's needed.
1696 // First advance to the instruction just before the start of the chain.
1697 AdvanceRS(MBB, MemOps);
1699 // Find a scratch register.
1701 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
1703 // Process the load / store instructions.
1704 RS->forward(std::prev(MBBI));
1708 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1709 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1711 // Try folding preceding/trailing base inc/dec into the generated
1713 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1714 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1716 NumMerges += Merges.size();
1718 // Try folding preceding/trailing base inc/dec into those load/store
1719 // that were not merged to form LDM/STM ops.
1720 for (unsigned i = 0; i != NumMemOps; ++i)
1721 if (!MemOps[i].Merged)
1722 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1725 // RS may be pointing to an instruction that's deleted.
1726 RS->skipTo(std::prev(MBBI));
1727 } else if (NumMemOps == 1) {
1728 // Try folding preceding/trailing base inc/dec into the single
1730 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1732 RS->forward(std::prev(MBBI));
1739 CurrPred = ARMCC::AL;
1746 // If iterator hasn't been advanced and this is not a memory op, skip it.
1747 // It can't start a new chain anyway.
1748 if (!Advance && !isMemOp && MBBI != E) {
1754 return NumMerges > 0;
1757 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1758 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1759 /// directly restore the value of LR into pc.
1760 /// ldmfd sp!, {..., lr}
1763 /// ldmfd sp!, {..., lr}
1766 /// ldmfd sp!, {..., pc}
1767 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1768 // Thumb1 LDM doesn't allow high registers.
1769 if (isThumb1) return false;
1770 if (MBB.empty()) return false;
1772 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1773 if (MBBI != MBB.begin() &&
1774 (MBBI->getOpcode() == ARM::BX_RET ||
1775 MBBI->getOpcode() == ARM::tBX_RET ||
1776 MBBI->getOpcode() == ARM::MOVPCLR)) {
1777 MachineInstr *PrevMI = std::prev(MBBI);
1778 unsigned Opcode = PrevMI->getOpcode();
1779 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1780 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1781 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1782 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1783 if (MO.getReg() != ARM::LR)
1785 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1786 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1787 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1788 PrevMI->setDesc(TII->get(NewOpc));
1790 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1798 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1799 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1800 TL = STI->getTargetLowering();
1801 AFI = Fn.getInfo<ARMFunctionInfo>();
1802 TII = STI->getInstrInfo();
1803 TRI = STI->getRegisterInfo();
1804 RS = new RegScavenger();
1805 isThumb2 = AFI->isThumb2Function();
1806 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1808 bool Modified = false;
1809 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1811 MachineBasicBlock &MBB = *MFI;
1812 Modified |= LoadStoreMultipleOpti(MBB);
1813 if (STI->hasV5TOps())
1814 Modified |= MergeReturnIntoLDM(MBB);
1822 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1823 /// load / stores from consecutive locations close to make it more
1824 /// likely they will be combined later.
1827 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1829 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1831 const DataLayout *TD;
1832 const TargetInstrInfo *TII;
1833 const TargetRegisterInfo *TRI;
1834 const ARMSubtarget *STI;
1835 MachineRegisterInfo *MRI;
1836 MachineFunction *MF;
1838 bool runOnMachineFunction(MachineFunction &Fn) override;
1840 const char *getPassName() const override {
1841 return "ARM pre- register allocation load / store optimization pass";
1845 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1846 unsigned &NewOpc, unsigned &EvenReg,
1847 unsigned &OddReg, unsigned &BaseReg,
1849 unsigned &PredReg, ARMCC::CondCodes &Pred,
1851 bool RescheduleOps(MachineBasicBlock *MBB,
1852 SmallVectorImpl<MachineInstr *> &Ops,
1853 unsigned Base, bool isLd,
1854 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1855 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1857 char ARMPreAllocLoadStoreOpt::ID = 0;
1860 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1861 TD = Fn.getTarget().getDataLayout();
1862 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1863 TII = STI->getInstrInfo();
1864 TRI = STI->getRegisterInfo();
1865 MRI = &Fn.getRegInfo();
1868 bool Modified = false;
1869 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1871 Modified |= RescheduleLoadStoreInstrs(MFI);
1876 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1877 MachineBasicBlock::iterator I,
1878 MachineBasicBlock::iterator E,
1879 SmallPtrSetImpl<MachineInstr*> &MemOps,
1880 SmallSet<unsigned, 4> &MemRegs,
1881 const TargetRegisterInfo *TRI) {
1882 // Are there stores / loads / calls between them?
1883 // FIXME: This is overly conservative. We should make use of alias information
1885 SmallSet<unsigned, 4> AddedRegPressure;
1887 if (I->isDebugValue() || MemOps.count(&*I))
1889 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1891 if (isLd && I->mayStore())
1896 // It's not safe to move the first 'str' down.
1899 // str r4, [r0, #+4]
1903 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1904 MachineOperand &MO = I->getOperand(j);
1907 unsigned Reg = MO.getReg();
1908 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1910 if (Reg != Base && !MemRegs.count(Reg))
1911 AddedRegPressure.insert(Reg);
1915 // Estimate register pressure increase due to the transformation.
1916 if (MemRegs.size() <= 4)
1917 // Ok if we are moving small number of instructions.
1919 return AddedRegPressure.size() <= MemRegs.size() * 2;
1923 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1924 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1925 MachineInstr *Op1) {
1926 assert(MI->memoperands_empty() && "expected a new machineinstr");
1927 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1928 + (Op1->memoperands_end() - Op1->memoperands_begin());
1930 MachineFunction *MF = MI->getParent()->getParent();
1931 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1932 MachineSDNode::mmo_iterator MemEnd =
1933 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1935 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1936 MI->setMemRefs(MemBegin, MemEnd);
1940 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1942 unsigned &NewOpc, unsigned &EvenReg,
1943 unsigned &OddReg, unsigned &BaseReg,
1944 int &Offset, unsigned &PredReg,
1945 ARMCC::CondCodes &Pred,
1947 // Make sure we're allowed to generate LDRD/STRD.
1948 if (!STI->hasV5TEOps())
1951 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1953 unsigned Opcode = Op0->getOpcode();
1954 if (Opcode == ARM::LDRi12) {
1956 } else if (Opcode == ARM::STRi12) {
1958 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1959 NewOpc = ARM::t2LDRDi8;
1962 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1963 NewOpc = ARM::t2STRDi8;
1970 // Make sure the base address satisfies i64 ld / st alignment requirement.
1971 // At the moment, we ignore the memoryoperand's value.
1972 // If we want to use AliasAnalysis, we should check it accordingly.
1973 if (!Op0->hasOneMemOperand() ||
1974 (*Op0->memoperands_begin())->isVolatile())
1977 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1978 const Function *Func = MF->getFunction();
1979 unsigned ReqAlign = STI->hasV6Ops()
1980 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1981 : 8; // Pre-v6 need 8-byte align
1982 if (Align < ReqAlign)
1985 // Then make sure the immediate offset fits.
1986 int OffImm = getMemoryOpOffset(Op0);
1988 int Limit = (1 << 8) * Scale;
1989 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1993 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1995 AddSub = ARM_AM::sub;
1998 int Limit = (1 << 8) * Scale;
1999 if (OffImm >= Limit || (OffImm & (Scale-1)))
2001 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2003 EvenReg = Op0->getOperand(0).getReg();
2004 OddReg = Op1->getOperand(0).getReg();
2005 if (EvenReg == OddReg)
2007 BaseReg = Op0->getOperand(1).getReg();
2008 Pred = getInstrPredicate(Op0, PredReg);
2009 dl = Op0->getDebugLoc();
2013 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2014 SmallVectorImpl<MachineInstr *> &Ops,
2015 unsigned Base, bool isLd,
2016 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2017 bool RetVal = false;
2019 // Sort by offset (in reverse order).
2020 std::sort(Ops.begin(), Ops.end(),
2021 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2022 int LOffset = getMemoryOpOffset(LHS);
2023 int ROffset = getMemoryOpOffset(RHS);
2024 assert(LHS == RHS || LOffset != ROffset);
2025 return LOffset > ROffset;
2028 // The loads / stores of the same base are in order. Scan them from first to
2029 // last and check for the following:
2030 // 1. Any def of base.
2032 while (Ops.size() > 1) {
2033 unsigned FirstLoc = ~0U;
2034 unsigned LastLoc = 0;
2035 MachineInstr *FirstOp = nullptr;
2036 MachineInstr *LastOp = nullptr;
2038 unsigned LastOpcode = 0;
2039 unsigned LastBytes = 0;
2040 unsigned NumMove = 0;
2041 for (int i = Ops.size() - 1; i >= 0; --i) {
2042 MachineInstr *Op = Ops[i];
2043 unsigned Loc = MI2LocMap[Op];
2044 if (Loc <= FirstLoc) {
2048 if (Loc >= LastLoc) {
2054 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2055 if (LastOpcode && LSMOpcode != LastOpcode)
2058 int Offset = getMemoryOpOffset(Op);
2059 unsigned Bytes = getLSMultipleTransferSize(Op);
2061 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2064 LastOffset = Offset;
2066 LastOpcode = LSMOpcode;
2067 if (++NumMove == 8) // FIXME: Tune this limit.
2074 SmallPtrSet<MachineInstr*, 4> MemOps;
2075 SmallSet<unsigned, 4> MemRegs;
2076 for (int i = NumMove-1; i >= 0; --i) {
2077 MemOps.insert(Ops[i]);
2078 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2081 // Be conservative, if the instructions are too far apart, don't
2082 // move them. We want to limit the increase of register pressure.
2083 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2085 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2086 MemOps, MemRegs, TRI);
2088 for (unsigned i = 0; i != NumMove; ++i)
2091 // This is the new location for the loads / stores.
2092 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2093 while (InsertPos != MBB->end()
2094 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2097 // If we are moving a pair of loads / stores, see if it makes sense
2098 // to try to allocate a pair of registers that can form register pairs.
2099 MachineInstr *Op0 = Ops.back();
2100 MachineInstr *Op1 = Ops[Ops.size()-2];
2101 unsigned EvenReg = 0, OddReg = 0;
2102 unsigned BaseReg = 0, PredReg = 0;
2103 ARMCC::CondCodes Pred = ARMCC::AL;
2105 unsigned NewOpc = 0;
2108 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2109 EvenReg, OddReg, BaseReg,
2110 Offset, PredReg, Pred, isT2)) {
2114 const MCInstrDesc &MCID = TII->get(NewOpc);
2115 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2116 MRI->constrainRegClass(EvenReg, TRC);
2117 MRI->constrainRegClass(OddReg, TRC);
2119 // Form the pair instruction.
2121 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2122 .addReg(EvenReg, RegState::Define)
2123 .addReg(OddReg, RegState::Define)
2125 // FIXME: We're converting from LDRi12 to an insn that still
2126 // uses addrmode2, so we need an explicit offset reg. It should
2127 // always by reg0 since we're transforming LDRi12s.
2130 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2131 concatenateMemOperands(MIB, Op0, Op1);
2132 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2135 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2139 // FIXME: We're converting from LDRi12 to an insn that still
2140 // uses addrmode2, so we need an explicit offset reg. It should
2141 // always by reg0 since we're transforming STRi12s.
2144 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2145 concatenateMemOperands(MIB, Op0, Op1);
2146 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2152 // Add register allocation hints to form register pairs.
2153 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
2154 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
2156 for (unsigned i = 0; i != NumMove; ++i) {
2157 MachineInstr *Op = Ops.back();
2159 MBB->splice(InsertPos, MBB, Op);
2163 NumLdStMoved += NumMove;
2173 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2174 bool RetVal = false;
2176 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2177 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2178 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2179 SmallVector<unsigned, 4> LdBases;
2180 SmallVector<unsigned, 4> StBases;
2183 MachineBasicBlock::iterator MBBI = MBB->begin();
2184 MachineBasicBlock::iterator E = MBB->end();
2186 for (; MBBI != E; ++MBBI) {
2187 MachineInstr *MI = MBBI;
2188 if (MI->isCall() || MI->isTerminator()) {
2189 // Stop at barriers.
2194 if (!MI->isDebugValue())
2195 MI2LocMap[MI] = ++Loc;
2197 if (!isMemoryOp(MI))
2199 unsigned PredReg = 0;
2200 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2203 int Opc = MI->getOpcode();
2204 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
2205 unsigned Base = MI->getOperand(1).getReg();
2206 int Offset = getMemoryOpOffset(MI);
2208 bool StopHere = false;
2210 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2211 Base2LdsMap.find(Base);
2212 if (BI != Base2LdsMap.end()) {
2213 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2214 if (Offset == getMemoryOpOffset(BI->second[i])) {
2220 BI->second.push_back(MI);
2222 Base2LdsMap[Base].push_back(MI);
2223 LdBases.push_back(Base);
2226 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2227 Base2StsMap.find(Base);
2228 if (BI != Base2StsMap.end()) {
2229 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2230 if (Offset == getMemoryOpOffset(BI->second[i])) {
2236 BI->second.push_back(MI);
2238 Base2StsMap[Base].push_back(MI);
2239 StBases.push_back(Base);
2244 // Found a duplicate (a base+offset combination that's seen earlier).
2251 // Re-schedule loads.
2252 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2253 unsigned Base = LdBases[i];
2254 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2256 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2259 // Re-schedule stores.
2260 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2261 unsigned Base = StBases[i];
2262 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2264 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2268 Base2LdsMap.clear();
2269 Base2StsMap.clear();
2279 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
2280 /// optimization pass.
2281 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2283 return new ARMPreAllocLoadStoreOpt();
2284 return new ARMLoadStoreOpt();