1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMSubtarget.h"
21 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "ThumbRegisterInfo.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterScavenging.h"
35 #include "llvm/CodeGen/SelectionDAGNodes.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Target/TargetInstrInfo.h"
42 #include "llvm/Target/TargetMachine.h"
43 #include "llvm/Target/TargetRegisterInfo.h"
46 #define DEBUG_TYPE "arm-ldst-opt"
48 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
49 STATISTIC(NumSTMGened , "Number of stm instructions generated");
50 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
51 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
52 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
53 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
54 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
55 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
56 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
57 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
58 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
60 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
61 /// load / store instructions to form ldm / stm instructions.
64 struct ARMLoadStoreOpt : public MachineFunctionPass {
66 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
68 const TargetInstrInfo *TII;
69 const TargetRegisterInfo *TRI;
70 const ARMSubtarget *STI;
71 const TargetLowering *TL;
74 bool isThumb1, isThumb2;
76 bool runOnMachineFunction(MachineFunction &Fn) override;
78 const char *getPassName() const override {
79 return "ARM load / store optimization pass";
83 struct MemOpQueueEntry {
88 MachineBasicBlock::iterator MBBI;
90 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
91 MachineBasicBlock::iterator i)
92 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
94 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
95 typedef MemOpQueue::iterator MemOpQueueIter;
97 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
98 const MemOpQueue &MemOps, unsigned DefReg,
99 unsigned RangeBegin, unsigned RangeEnd);
100 void UpdateBaseRegUses(MachineBasicBlock &MBB,
101 MachineBasicBlock::iterator MBBI,
102 DebugLoc dl, unsigned Base, unsigned WordOffset,
103 ARMCC::CondCodes Pred, unsigned PredReg);
104 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
105 int Offset, unsigned Base, bool BaseKill, int Opcode,
106 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
108 ArrayRef<std::pair<unsigned, bool> > Regs,
109 ArrayRef<unsigned> ImpDefs);
110 void MergeOpsUpdate(MachineBasicBlock &MBB,
112 unsigned memOpsBegin,
114 unsigned insertAfter,
119 ARMCC::CondCodes Pred,
123 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
124 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
125 int Opcode, unsigned Size,
126 ARMCC::CondCodes Pred, unsigned PredReg,
127 unsigned Scratch, MemOpQueue &MemOps,
128 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
129 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
130 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
131 MachineBasicBlock::iterator &MBBI);
132 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
133 MachineBasicBlock::iterator MBBI,
134 const TargetInstrInfo *TII,
136 MachineBasicBlock::iterator &I);
137 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
138 MachineBasicBlock::iterator MBBI,
140 MachineBasicBlock::iterator &I);
141 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
142 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
144 char ARMLoadStoreOpt::ID = 0;
147 static bool definesCPSR(const MachineInstr *MI) {
148 for (const auto &MO : MI->operands()) {
151 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
152 // If the instruction has live CPSR def, then it's not safe to fold it
153 // into load / store.
160 static int getMemoryOpOffset(const MachineInstr *MI) {
161 int Opcode = MI->getOpcode();
162 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
163 unsigned NumOperands = MI->getDesc().getNumOperands();
164 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
166 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
167 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
168 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
169 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
172 // Thumb1 immediate offsets are scaled by 4
173 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
174 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
177 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
178 : ARM_AM::getAM5Offset(OffField) * 4;
179 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
180 : ARM_AM::getAM5Op(OffField);
182 if (Op == ARM_AM::sub)
188 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
190 default: llvm_unreachable("Unhandled opcode!");
194 default: llvm_unreachable("Unhandled submode!");
195 case ARM_AM::ia: return ARM::LDMIA;
196 case ARM_AM::da: return ARM::LDMDA;
197 case ARM_AM::db: return ARM::LDMDB;
198 case ARM_AM::ib: return ARM::LDMIB;
203 default: llvm_unreachable("Unhandled submode!");
204 case ARM_AM::ia: return ARM::STMIA;
205 case ARM_AM::da: return ARM::STMDA;
206 case ARM_AM::db: return ARM::STMDB;
207 case ARM_AM::ib: return ARM::STMIB;
211 // tLDMIA is writeback-only - unless the base register is in the input
215 default: llvm_unreachable("Unhandled submode!");
216 case ARM_AM::ia: return ARM::tLDMIA;
220 // There is no non-writeback tSTMIA either.
223 default: llvm_unreachable("Unhandled submode!");
224 case ARM_AM::ia: return ARM::tSTMIA_UPD;
230 default: llvm_unreachable("Unhandled submode!");
231 case ARM_AM::ia: return ARM::t2LDMIA;
232 case ARM_AM::db: return ARM::t2LDMDB;
238 default: llvm_unreachable("Unhandled submode!");
239 case ARM_AM::ia: return ARM::t2STMIA;
240 case ARM_AM::db: return ARM::t2STMDB;
245 default: llvm_unreachable("Unhandled submode!");
246 case ARM_AM::ia: return ARM::VLDMSIA;
247 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
252 default: llvm_unreachable("Unhandled submode!");
253 case ARM_AM::ia: return ARM::VSTMSIA;
254 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
259 default: llvm_unreachable("Unhandled submode!");
260 case ARM_AM::ia: return ARM::VLDMDIA;
261 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
266 default: llvm_unreachable("Unhandled submode!");
267 case ARM_AM::ia: return ARM::VSTMDIA;
268 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
276 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
278 default: llvm_unreachable("Unhandled opcode!");
285 case ARM::tLDMIA_UPD:
286 case ARM::tSTMIA_UPD:
287 case ARM::t2LDMIA_RET:
289 case ARM::t2LDMIA_UPD:
291 case ARM::t2STMIA_UPD:
293 case ARM::VLDMSIA_UPD:
295 case ARM::VSTMSIA_UPD:
297 case ARM::VLDMDIA_UPD:
299 case ARM::VSTMDIA_UPD:
313 case ARM::t2LDMDB_UPD:
315 case ARM::t2STMDB_UPD:
316 case ARM::VLDMSDB_UPD:
317 case ARM::VSTMSDB_UPD:
318 case ARM::VLDMDDB_UPD:
319 case ARM::VSTMDDB_UPD:
330 } // end namespace ARM_AM
331 } // end namespace llvm
333 static bool isT1i32Load(unsigned Opc) {
334 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
337 static bool isT2i32Load(unsigned Opc) {
338 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
341 static bool isi32Load(unsigned Opc) {
342 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
345 static bool isT1i32Store(unsigned Opc) {
346 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
349 static bool isT2i32Store(unsigned Opc) {
350 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
353 static bool isi32Store(unsigned Opc) {
354 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
357 static unsigned getImmScale(unsigned Opc) {
359 default: llvm_unreachable("Unhandled opcode!");
374 /// Update future uses of the base register with the offset introduced
375 /// due to writeback. This function only works on Thumb1.
377 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
378 MachineBasicBlock::iterator MBBI,
379 DebugLoc dl, unsigned Base,
381 ARMCC::CondCodes Pred, unsigned PredReg) {
382 assert(isThumb1 && "Can only update base register uses for Thumb1!");
383 // Start updating any instructions with immediate offsets. Insert a SUB before
384 // the first non-updateable instruction (if any).
385 for (; MBBI != MBB.end(); ++MBBI) {
386 bool InsertSub = false;
387 unsigned Opc = MBBI->getOpcode();
389 if (MBBI->readsRegister(Base)) {
392 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
394 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
396 if (IsLoad || IsStore) {
397 // Loads and stores with immediate offsets can be updated, but only if
398 // the new offset isn't negative.
399 // The MachineOperand containing the offset immediate is the last one
400 // before predicates.
402 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
403 // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
404 Offset = MO.getImm() - WordOffset * getImmScale(Opc);
406 // If storing the base register, it needs to be reset first.
407 unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
409 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
414 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
415 !definesCPSR(MBBI)) {
416 // SUBS/ADDS using this register, with a dead def of the CPSR.
417 // Merge it with the update; if the merged offset is too large,
418 // insert a new sub instead.
420 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
421 Offset = (Opc == ARM::tSUBi8) ?
422 MO.getImm() + WordOffset * 4 :
423 MO.getImm() - WordOffset * 4 ;
424 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
425 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
428 // The base register has now been reset, so exit early.
435 // Can't update the instruction.
439 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
440 // Since SUBS sets the condition flags, we can't place the base reset
441 // after an instruction that has a live CPSR def.
442 // The base register might also contain an argument for a function call.
447 // An instruction above couldn't be updated, so insert a sub.
448 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
449 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
450 .addImm(Pred).addReg(PredReg);
454 if (MBBI->killsRegister(Base))
455 // Register got killed. Stop updating.
459 // End of block was reached.
460 if (MBB.succ_size() > 0) {
461 // FIXME: Because of a bug, live registers are sometimes missing from
462 // the successor blocks' live-in sets. This means we can't trust that
463 // information and *always* have to reset at the end of a block.
465 if (MBBI != MBB.end()) --MBBI;
467 BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
468 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
469 .addImm(Pred).addReg(PredReg);
473 /// MergeOps - Create and insert a LDM or STM with Base as base register and
474 /// registers in Regs as the register operands that would be loaded / stored.
475 /// It returns true if the transformation is done.
477 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
478 MachineBasicBlock::iterator MBBI,
479 int Offset, unsigned Base, bool BaseKill,
480 int Opcode, ARMCC::CondCodes Pred,
481 unsigned PredReg, unsigned Scratch, DebugLoc dl,
482 ArrayRef<std::pair<unsigned, bool> > Regs,
483 ArrayRef<unsigned> ImpDefs) {
484 // Only a single register to load / store. Don't bother.
485 unsigned NumRegs = Regs.size();
489 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
490 // Compute liveness information for that register to make the decision.
491 bool SafeToClobberCPSR = !isThumb1 ||
492 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
493 MachineBasicBlock::LQR_Dead);
495 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
497 // Exception: If the base register is in the input reglist, Thumb1 LDM is
499 // It's also not possible to merge an STR of the base register in Thumb1.
501 for (unsigned I = 0; I < NumRegs; ++I)
502 if (Base == Regs[I].first) {
503 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
504 if (Opcode == ARM::tLDRi) {
507 } else if (Opcode == ARM::tSTRi) {
512 ARM_AM::AMSubMode Mode = ARM_AM::ia;
513 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
514 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
515 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
517 if (Offset == 4 && haveIBAndDA) {
519 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
521 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
522 // VLDM/VSTM do not support DB mode without also updating the base reg.
524 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
525 // Check if this is a supported opcode before inserting instructions to
526 // calculate a new base register.
527 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
529 // If starting offset isn't zero, insert a MI to materialize a new base.
530 // But only do so if it is cost effective, i.e. merging more than two
535 // On Thumb1, it's not worth materializing a new base register without
536 // clobbering the CPSR (i.e. not using ADDS/SUBS).
537 if (!SafeToClobberCPSR)
541 if (isi32Load(Opcode)) {
542 // If it is a load, then just use one of the destination register to
543 // use as the new base.
544 NewBase = Regs[NumRegs-1].first;
546 // Use the scratch register to use as a new base.
553 isThumb2 ? ARM::t2ADDri :
554 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
555 (isThumb1 && Offset < 8) ? ARM::tADDi3 :
556 isThumb1 ? ARM::tADDi8 : ARM::ADDri;
561 isThumb2 ? ARM::t2SUBri :
562 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
563 isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
566 if (!TL->isLegalAddImmediate(Offset))
567 // FIXME: Try add with register operand?
568 return false; // Probably not worth it then.
571 // Thumb1: depending on immediate size, use either
572 // ADDS NewBase, Base, #imm3
575 // ADDS NewBase, #imm8.
576 if (Base != NewBase &&
577 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
578 // Need to insert a MOV to the new base first.
579 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
581 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
582 if (Pred != ARMCC::AL)
584 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
585 .addReg(Base, getKillRegState(BaseKill));
587 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
588 .addReg(Base, getKillRegState(BaseKill))
589 .addImm(Pred).addReg(PredReg);
591 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
595 if (BaseOpc == ARM::tADDrSPi) {
596 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
597 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
598 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4)
599 .addImm(Pred).addReg(PredReg);
601 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
602 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
603 .addImm(Pred).addReg(PredReg);
605 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
606 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
607 .addImm(Pred).addReg(PredReg).addReg(0);
610 BaseKill = true; // New base is always killed straight away.
613 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
614 Opcode == ARM::VLDRD);
616 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
617 // base register writeback.
618 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
619 if (!Opcode) return false;
621 // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
622 // - There is no writeback (LDM of base register),
623 // - the base register is killed by the merged instruction,
624 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
625 // to reset the base register.
626 // Otherwise, don't merge.
627 // It's safe to return here since the code to materialize a new base register
628 // above is also conditional on SafeToClobberCPSR.
629 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
632 MachineInstrBuilder MIB;
635 if (Opcode == ARM::tLDMIA)
636 // Update tLDMIA with writeback if necessary.
637 Opcode = ARM::tLDMIA_UPD;
639 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
641 // Thumb1: we might need to set base writeback when building the MI.
642 MIB.addReg(Base, getDefRegState(true))
643 .addReg(Base, getKillRegState(BaseKill));
645 // The base isn't dead after a merged instruction with writeback.
646 // Insert a sub instruction after the newly formed instruction to reset.
648 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
651 // No writeback, simply build the MachineInstr.
652 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
653 MIB.addReg(Base, getKillRegState(BaseKill));
656 MIB.addImm(Pred).addReg(PredReg);
658 for (unsigned i = 0; i != NumRegs; ++i)
659 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
660 | getKillRegState(Regs[i].second));
662 // Add implicit defs for super-registers.
663 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
664 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
669 /// \brief Find all instructions using a given imp-def within a range.
671 /// We are trying to combine a range of instructions, one of which (located at
672 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
673 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
674 /// and RangeEnd must be modified to use an undefined value.
676 /// The live range continues until we find a second definition or one of the
677 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
678 /// we must consider all uses and decide which are relevant in a second pass.
679 void ARMLoadStoreOpt::findUsesOfImpDef(
680 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
681 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
682 std::map<unsigned, MachineOperand *> Uses;
683 unsigned LastLivePos = RangeEnd;
685 // First we find all uses of this register with Position between RangeBegin
686 // and RangeEnd, any or all of these could be uses of a definition at
687 // RangeBegin. We also record the latest position a definition at RangeBegin
688 // would be considered live.
689 for (unsigned i = 0; i < MemOps.size(); ++i) {
690 MachineInstr &MI = *MemOps[i].MBBI;
691 unsigned MIPosition = MemOps[i].Position;
692 if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
695 // If this instruction defines the register, then any later use will be of
696 // that definition rather than ours.
697 if (MI.definesRegister(DefReg))
698 LastLivePos = std::min(LastLivePos, MIPosition);
700 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
704 // If this instruction kills the register then (assuming liveness is
705 // correct when we start) we don't need to think about anything after here.
707 LastLivePos = std::min(LastLivePos, MIPosition);
709 Uses[MIPosition] = UseOp;
712 // Now we traverse the list of all uses, and append the ones that actually use
713 // our definition to the requested list.
714 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
717 // List is sorted by position so once we've found one out of range there
718 // will be no more to consider.
719 if (I->first > LastLivePos)
721 UsesOfImpDefs.push_back(I->second);
725 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
727 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
729 unsigned memOpsBegin, unsigned memOpsEnd,
730 unsigned insertAfter, int Offset,
731 unsigned Base, bool BaseKill,
733 ARMCC::CondCodes Pred, unsigned PredReg,
736 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
737 // First calculate which of the registers should be killed by the merged
739 const unsigned insertPos = memOps[insertAfter].Position;
740 SmallSet<unsigned, 4> KilledRegs;
741 DenseMap<unsigned, unsigned> Killer;
742 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
743 if (i == memOpsBegin) {
748 if (memOps[i].Position < insertPos && memOps[i].isKill) {
749 unsigned Reg = memOps[i].Reg;
750 KilledRegs.insert(Reg);
755 SmallVector<std::pair<unsigned, bool>, 8> Regs;
756 SmallVector<unsigned, 8> ImpDefs;
757 SmallVector<MachineOperand *, 8> UsesOfImpDefs;
758 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
759 unsigned Reg = memOps[i].Reg;
760 // If we are inserting the merged operation after an operation that
761 // uses the same register, make sure to transfer any kill flag.
762 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
763 Regs.push_back(std::make_pair(Reg, isKill));
765 // Collect any implicit defs of super-registers. They must be preserved.
766 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
767 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
769 unsigned DefReg = MO->getReg();
770 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
771 ImpDefs.push_back(DefReg);
773 // There may be other uses of the definition between this instruction and
774 // the eventual LDM/STM position. These should be marked undef if the
775 // merge takes place.
776 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
781 // Try to do the merge.
782 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
784 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
785 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
788 // Merge succeeded, update records.
789 Merges.push_back(std::prev(Loc));
791 // In gathering loads together, we may have moved the imp-def of a register
792 // past one of its uses. This is OK, since we know better than the rest of
793 // LLVM what's OK with ARM loads and stores; but we still have to adjust the
795 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
796 E = UsesOfImpDefs.end();
800 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
801 // Remove kill flags from any memops that come before insertPos.
802 if (Regs[i-memOpsBegin].second) {
803 unsigned Reg = Regs[i-memOpsBegin].first;
804 if (KilledRegs.count(Reg)) {
805 unsigned j = Killer[Reg];
806 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
807 assert(Idx >= 0 && "Cannot find killing operand");
808 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
809 memOps[j].isKill = false;
811 memOps[i].isKill = true;
813 MBB.erase(memOps[i].MBBI);
814 // Update this memop to refer to the merged instruction.
815 // We may need to move kill flags again.
816 memOps[i].Merged = true;
817 memOps[i].MBBI = Merges.back();
818 memOps[i].Position = insertPos;
821 // Update memOps offsets, since they may have been modified by MergeOps.
822 for (auto &MemOp : memOps) {
823 MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
827 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
828 /// load / store multiple instructions.
830 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
831 unsigned Base, int Opcode, unsigned Size,
832 ARMCC::CondCodes Pred, unsigned PredReg,
833 unsigned Scratch, MemOpQueue &MemOps,
834 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
835 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
836 int Offset = MemOps[SIndex].Offset;
837 int SOffset = Offset;
838 unsigned insertAfter = SIndex;
839 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
840 DebugLoc dl = Loc->getDebugLoc();
841 const MachineOperand &PMO = Loc->getOperand(0);
842 unsigned PReg = PMO.getReg();
843 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
845 unsigned Limit = ~0U;
846 bool BaseKill = false;
847 // vldm / vstm limit are 32 for S variants, 16 for D variants.
865 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
866 int NewOffset = MemOps[i].Offset;
867 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
868 unsigned Reg = MO.getReg();
869 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
870 // Register numbers must be in ascending order. For VFP / NEON load and
871 // store multiples, the registers must also be consecutive and within the
872 // limit on the number of registers per instruction.
873 if (Reg != ARM::SP &&
874 NewOffset == Offset + (int)Size &&
875 ((isNotVFP && RegNum > PRegNum) ||
876 ((Count < Limit) && RegNum == PRegNum+1)) &&
877 // On Swift we don't want vldm/vstm to start with a odd register num
878 // because Q register unaligned vldm/vstm need more uops.
879 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
884 // Can't merge this in. Try merge the earlier ones first.
885 // We need to compute BaseKill here because the MemOps may have been
887 BaseKill = Loc->killsRegister(Base);
889 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
890 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
891 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
896 if (MemOps[i].Position > MemOps[insertAfter].Position) {
898 Loc = MemOps[i].MBBI;
902 BaseKill = Loc->killsRegister(Base);
903 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
904 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
907 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
908 unsigned Bytes, unsigned Limit,
909 ARMCC::CondCodes Pred, unsigned PredReg) {
910 unsigned MyPredReg = 0;
914 bool CheckCPSRDef = false;
915 switch (MI->getOpcode()) {
916 default: return false;
926 // Make sure the offset fits in 8 bits.
927 if (Bytes == 0 || (Limit && Bytes >= Limit))
930 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
931 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
932 if (!(MI->getOperand(0).getReg() == Base &&
933 MI->getOperand(1).getReg() == Base &&
934 (MI->getOperand(2).getImm() * Scale) == Bytes &&
935 getInstrPredicate(MI, MyPredReg) == Pred &&
936 MyPredReg == PredReg))
939 return CheckCPSRDef ? !definesCPSR(MI) : true;
942 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
943 unsigned Bytes, unsigned Limit,
944 ARMCC::CondCodes Pred, unsigned PredReg) {
945 unsigned MyPredReg = 0;
949 bool CheckCPSRDef = false;
950 switch (MI->getOpcode()) {
951 default: return false;
961 if (Bytes == 0 || (Limit && Bytes >= Limit))
962 // Make sure the offset fits in 8 bits.
965 unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
966 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
967 if (!(MI->getOperand(0).getReg() == Base &&
968 MI->getOperand(1).getReg() == Base &&
969 (MI->getOperand(2).getImm() * Scale) == Bytes &&
970 getInstrPredicate(MI, MyPredReg) == Pred &&
971 MyPredReg == PredReg))
974 return CheckCPSRDef ? !definesCPSR(MI) : true;
977 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
978 switch (MI->getOpcode()) {
1005 case ARM::tLDMIA_UPD:
1006 case ARM::tSTMIA_UPD:
1013 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
1016 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
1020 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1021 ARM_AM::AMSubMode Mode) {
1023 default: llvm_unreachable("Unhandled opcode!");
1029 default: llvm_unreachable("Unhandled submode!");
1030 case ARM_AM::ia: return ARM::LDMIA_UPD;
1031 case ARM_AM::ib: return ARM::LDMIB_UPD;
1032 case ARM_AM::da: return ARM::LDMDA_UPD;
1033 case ARM_AM::db: return ARM::LDMDB_UPD;
1040 default: llvm_unreachable("Unhandled submode!");
1041 case ARM_AM::ia: return ARM::STMIA_UPD;
1042 case ARM_AM::ib: return ARM::STMIB_UPD;
1043 case ARM_AM::da: return ARM::STMDA_UPD;
1044 case ARM_AM::db: return ARM::STMDB_UPD;
1049 default: llvm_unreachable("Unhandled submode!");
1050 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1051 case ARM_AM::db: return ARM::t2LDMDB_UPD;
1056 default: llvm_unreachable("Unhandled submode!");
1057 case ARM_AM::ia: return ARM::t2STMIA_UPD;
1058 case ARM_AM::db: return ARM::t2STMDB_UPD;
1062 default: llvm_unreachable("Unhandled submode!");
1063 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1064 case ARM_AM::db: return ARM::VLDMSDB_UPD;
1068 default: llvm_unreachable("Unhandled submode!");
1069 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1070 case ARM_AM::db: return ARM::VLDMDDB_UPD;
1074 default: llvm_unreachable("Unhandled submode!");
1075 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1076 case ARM_AM::db: return ARM::VSTMSDB_UPD;
1080 default: llvm_unreachable("Unhandled submode!");
1081 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1082 case ARM_AM::db: return ARM::VSTMDDB_UPD;
1087 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
1088 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1090 /// stmia rn, <ra, rb, rc>
1091 /// rn := rn + 4 * 3;
1093 /// stmia rn!, <ra, rb, rc>
1095 /// rn := rn - 4 * 3;
1096 /// ldmia rn, <ra, rb, rc>
1098 /// ldmdb rn!, <ra, rb, rc>
1099 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
1100 MachineBasicBlock::iterator MBBI,
1102 MachineBasicBlock::iterator &I) {
1103 // Thumb1 is already using updating loads/stores.
1104 if (isThumb1) return false;
1106 MachineInstr *MI = MBBI;
1107 unsigned Base = MI->getOperand(0).getReg();
1108 bool BaseKill = MI->getOperand(0).isKill();
1109 unsigned Bytes = getLSMultipleTransferSize(MI);
1110 unsigned PredReg = 0;
1111 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1112 int Opcode = MI->getOpcode();
1113 DebugLoc dl = MI->getDebugLoc();
1115 // Can't use an updating ld/st if the base register is also a dest
1116 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1117 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1118 if (MI->getOperand(i).getReg() == Base)
1121 bool DoMerge = false;
1122 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
1124 // Try merging with the previous instruction.
1125 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1126 if (MBBI != BeginMBBI) {
1127 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1128 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1130 if (Mode == ARM_AM::ia &&
1131 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1134 } else if (Mode == ARM_AM::ib &&
1135 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1140 MBB.erase(PrevMBBI);
1143 // Try merging with the next instruction.
1144 MachineBasicBlock::iterator EndMBBI = MBB.end();
1145 if (!DoMerge && MBBI != EndMBBI) {
1146 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1147 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1149 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
1150 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1152 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
1153 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1157 if (NextMBBI == I) {
1161 MBB.erase(NextMBBI);
1168 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1169 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1170 .addReg(Base, getDefRegState(true)) // WB base register
1171 .addReg(Base, getKillRegState(BaseKill))
1172 .addImm(Pred).addReg(PredReg);
1174 // Transfer the rest of operands.
1175 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1176 MIB.addOperand(MI->getOperand(OpNum));
1178 // Transfer memoperands.
1179 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1185 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1186 ARM_AM::AddrOpc Mode) {
1189 return ARM::LDR_PRE_IMM;
1191 return ARM::STR_PRE_IMM;
1193 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1195 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1197 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1199 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1202 return ARM::t2LDR_PRE;
1205 return ARM::t2STR_PRE;
1206 default: llvm_unreachable("Unhandled opcode!");
1210 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1211 ARM_AM::AddrOpc Mode) {
1214 return ARM::LDR_POST_IMM;
1216 return ARM::STR_POST_IMM;
1218 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1220 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1222 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1224 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1227 return ARM::t2LDR_POST;
1230 return ARM::t2STR_POST;
1231 default: llvm_unreachable("Unhandled opcode!");
1235 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
1236 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1237 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
1238 MachineBasicBlock::iterator MBBI,
1239 const TargetInstrInfo *TII,
1241 MachineBasicBlock::iterator &I) {
1242 // Thumb1 doesn't have updating LDR/STR.
1243 // FIXME: Use LDM/STM with single register instead.
1244 if (isThumb1) return false;
1246 MachineInstr *MI = MBBI;
1247 unsigned Base = MI->getOperand(1).getReg();
1248 bool BaseKill = MI->getOperand(1).isKill();
1249 unsigned Bytes = getLSMultipleTransferSize(MI);
1250 int Opcode = MI->getOpcode();
1251 DebugLoc dl = MI->getDebugLoc();
1252 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1253 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1254 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1255 if (isi32Load(Opcode) || isi32Store(Opcode))
1256 if (MI->getOperand(2).getImm() != 0)
1258 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1261 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
1262 // Can't do the merge if the destination register is the same as the would-be
1263 // writeback register.
1264 if (MI->getOperand(0).getReg() == Base)
1267 unsigned PredReg = 0;
1268 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1269 bool DoMerge = false;
1270 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1271 unsigned NewOpc = 0;
1272 // AM2 - 12 bits, thumb2 - 8 bits.
1273 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1275 // Try merging with the previous instruction.
1276 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1277 if (MBBI != BeginMBBI) {
1278 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1279 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1281 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1283 AddSub = ARM_AM::sub;
1284 } else if (!isAM5 &&
1285 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1289 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1290 MBB.erase(PrevMBBI);
1294 // Try merging with the next instruction.
1295 MachineBasicBlock::iterator EndMBBI = MBB.end();
1296 if (!DoMerge && MBBI != EndMBBI) {
1297 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1298 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1301 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1303 AddSub = ARM_AM::sub;
1304 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1308 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1309 if (NextMBBI == I) {
1313 MBB.erase(NextMBBI);
1321 // VLDM[SD]_UPD, VSTM[SD]_UPD
1322 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1323 // updating load/store-multiple instructions can be used with only one
1325 MachineOperand &MO = MI->getOperand(0);
1326 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1327 .addReg(Base, getDefRegState(true)) // WB base register
1328 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1329 .addImm(Pred).addReg(PredReg)
1330 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1331 getKillRegState(MO.isKill())));
1334 // LDR_PRE, LDR_POST
1335 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1336 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1337 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1338 .addReg(Base, RegState::Define)
1339 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1341 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1342 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1343 .addReg(Base, RegState::Define)
1344 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1347 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1348 // t2LDR_PRE, t2LDR_POST
1349 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1350 .addReg(Base, RegState::Define)
1351 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1354 MachineOperand &MO = MI->getOperand(0);
1355 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1356 // the vestigal zero-reg offset register. When that's fixed, this clause
1357 // can be removed entirely.
1358 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1359 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1360 // STR_PRE, STR_POST
1361 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1362 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1363 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1365 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1366 // t2STR_PRE, t2STR_POST
1367 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1368 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1369 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1377 /// isMemoryOp - Returns true if instruction is a memory operation that this
1378 /// pass is capable of operating on.
1379 static bool isMemoryOp(const MachineInstr *MI) {
1380 // When no memory operands are present, conservatively assume unaligned,
1381 // volatile, unfoldable.
1382 if (!MI->hasOneMemOperand())
1385 const MachineMemOperand *MMO = *MI->memoperands_begin();
1387 // Don't touch volatile memory accesses - we may be changing their order.
1388 if (MMO->isVolatile())
1391 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1393 if (MMO->getAlignment() < 4)
1396 // str <undef> could probably be eliminated entirely, but for now we just want
1397 // to avoid making a mess of it.
1398 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1399 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1400 MI->getOperand(0).isUndef())
1403 // Likewise don't mess with references to undefined addresses.
1404 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1405 MI->getOperand(1).isUndef())
1408 int Opcode = MI->getOpcode();
1413 return MI->getOperand(1).isReg();
1416 return MI->getOperand(1).isReg();
1427 return MI->getOperand(1).isReg();
1432 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1433 /// op that is being merged.
1434 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1435 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1436 unsigned Position = MemOps[0].Position;
1437 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1438 if (MemOps[i].Position < Position) {
1439 Position = MemOps[i].Position;
1440 Loc = MemOps[i].MBBI;
1444 if (Loc != MBB.begin())
1445 RS->forward(std::prev(Loc));
1448 static void InsertLDR_STR(MachineBasicBlock &MBB,
1449 MachineBasicBlock::iterator &MBBI,
1450 int Offset, bool isDef,
1451 DebugLoc dl, unsigned NewOpc,
1452 unsigned Reg, bool RegDeadKill, bool RegUndef,
1453 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1454 bool OffKill, bool OffUndef,
1455 ARMCC::CondCodes Pred, unsigned PredReg,
1456 const TargetInstrInfo *TII, bool isT2) {
1458 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1460 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1461 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1462 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1464 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1466 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1467 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1468 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1472 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1473 MachineBasicBlock::iterator &MBBI) {
1474 MachineInstr *MI = &*MBBI;
1475 unsigned Opcode = MI->getOpcode();
1476 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1477 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1478 const MachineOperand &BaseOp = MI->getOperand(2);
1479 unsigned BaseReg = BaseOp.getReg();
1480 unsigned EvenReg = MI->getOperand(0).getReg();
1481 unsigned OddReg = MI->getOperand(1).getReg();
1482 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1483 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1484 // ARM errata 602117: LDRD with base in list may result in incorrect base
1485 // register when interrupted or faulted.
1486 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1487 if (!Errata602117 &&
1488 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1491 MachineBasicBlock::iterator NewBBI = MBBI;
1492 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1493 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1494 bool EvenDeadKill = isLd ?
1495 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1496 bool EvenUndef = MI->getOperand(0).isUndef();
1497 bool OddDeadKill = isLd ?
1498 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1499 bool OddUndef = MI->getOperand(1).isUndef();
1500 bool BaseKill = BaseOp.isKill();
1501 bool BaseUndef = BaseOp.isUndef();
1502 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1503 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1504 int OffImm = getMemoryOpOffset(MI);
1505 unsigned PredReg = 0;
1506 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1508 if (OddRegNum > EvenRegNum && OffImm == 0) {
1509 // Ascending register numbers and no offset. It's safe to change it to a
1511 unsigned NewOpc = (isLd)
1512 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1513 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1515 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1516 .addReg(BaseReg, getKillRegState(BaseKill))
1517 .addImm(Pred).addReg(PredReg)
1518 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1519 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1522 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1523 .addReg(BaseReg, getKillRegState(BaseKill))
1524 .addImm(Pred).addReg(PredReg)
1526 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1528 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1531 NewBBI = std::prev(MBBI);
1533 // Split into two instructions.
1534 unsigned NewOpc = (isLd)
1535 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1536 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1537 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1538 // so adjust and use t2LDRi12 here for that.
1539 unsigned NewOpc2 = (isLd)
1540 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1541 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1542 DebugLoc dl = MBBI->getDebugLoc();
1543 // If this is a load and base register is killed, it may have been
1544 // re-defed by the load, make sure the first load does not clobber it.
1546 (BaseKill || OffKill) &&
1547 (TRI->regsOverlap(EvenReg, BaseReg))) {
1548 assert(!TRI->regsOverlap(OddReg, BaseReg));
1549 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1550 OddReg, OddDeadKill, false,
1551 BaseReg, false, BaseUndef, false, OffUndef,
1552 Pred, PredReg, TII, isT2);
1553 NewBBI = std::prev(MBBI);
1554 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1555 EvenReg, EvenDeadKill, false,
1556 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1557 Pred, PredReg, TII, isT2);
1559 if (OddReg == EvenReg && EvenDeadKill) {
1560 // If the two source operands are the same, the kill marker is
1561 // probably on the first one. e.g.
1562 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1563 EvenDeadKill = false;
1566 // Never kill the base register in the first instruction.
1567 if (EvenReg == BaseReg)
1568 EvenDeadKill = false;
1569 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1570 EvenReg, EvenDeadKill, EvenUndef,
1571 BaseReg, false, BaseUndef, false, OffUndef,
1572 Pred, PredReg, TII, isT2);
1573 NewBBI = std::prev(MBBI);
1574 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1575 OddReg, OddDeadKill, OddUndef,
1576 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1577 Pred, PredReg, TII, isT2);
1592 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1593 /// ops of the same base and incrementing offset into LDM / STM ops.
1594 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1595 unsigned NumMerges = 0;
1596 unsigned NumMemOps = 0;
1598 unsigned CurrBase = 0;
1600 unsigned CurrSize = 0;
1601 ARMCC::CondCodes CurrPred = ARMCC::AL;
1602 unsigned CurrPredReg = 0;
1603 unsigned Position = 0;
1604 SmallVector<MachineBasicBlock::iterator,4> Merges;
1606 RS->enterBasicBlock(&MBB);
1607 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1609 if (FixInvalidRegPairOp(MBB, MBBI))
1612 bool Advance = false;
1613 bool TryMerge = false;
1614 bool Clobber = false;
1616 bool isMemOp = isMemoryOp(MBBI);
1618 int Opcode = MBBI->getOpcode();
1619 unsigned Size = getLSMultipleTransferSize(MBBI);
1620 const MachineOperand &MO = MBBI->getOperand(0);
1621 unsigned Reg = MO.getReg();
1622 bool isKill = MO.isDef() ? false : MO.isKill();
1623 unsigned Base = MBBI->getOperand(1).getReg();
1624 unsigned PredReg = 0;
1625 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1626 int Offset = getMemoryOpOffset(MBBI);
1629 // r5 := ldr [r5, #4]
1630 // r6 := ldr [r5, #8]
1632 // The second ldr has effectively broken the chain even though it
1633 // looks like the later ldr(s) use the same base register. Try to
1634 // merge the ldr's so far, including this one. But don't try to
1635 // combine the following ldr(s).
1636 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1639 // r4 := ldr [r0, #8]
1640 // r4 := ldr [r0, #4]
1642 // The optimization may reorder the second ldr in front of the first
1643 // ldr, which violates write after write(WAW) dependence. The same as
1644 // str. Try to merge inst(s) already in MemOps.
1645 bool Overlap = false;
1646 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1647 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1653 if (CurrBase == 0 && !Clobber) {
1654 // Start of a new chain.
1659 CurrPredReg = PredReg;
1660 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1663 } else if (!Overlap) {
1669 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1670 // No need to match PredReg.
1671 // Continue adding to the queue.
1672 if (Offset > MemOps.back().Offset) {
1673 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1678 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1680 if (Offset < I->Offset) {
1681 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1686 } else if (Offset == I->Offset) {
1687 // Collision! This can't be merged!
1696 if (MBBI->isDebugValue()) {
1699 // Reach the end of the block, try merging the memory instructions.
1701 } else if (Advance) {
1705 // Reach the end of the block, try merging the memory instructions.
1712 if (NumMemOps > 1) {
1713 // Try to find a free register to use as a new base in case it's needed.
1714 // First advance to the instruction just before the start of the chain.
1715 AdvanceRS(MBB, MemOps);
1717 // Find a scratch register.
1719 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
1721 // Process the load / store instructions.
1722 RS->forward(std::prev(MBBI));
1726 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1727 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1729 // Try folding preceding/trailing base inc/dec into the generated
1731 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1732 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1734 NumMerges += Merges.size();
1736 // Try folding preceding/trailing base inc/dec into those load/store
1737 // that were not merged to form LDM/STM ops.
1738 for (unsigned i = 0; i != NumMemOps; ++i)
1739 if (!MemOps[i].Merged)
1740 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1743 // RS may be pointing to an instruction that's deleted.
1744 RS->skipTo(std::prev(MBBI));
1745 } else if (NumMemOps == 1) {
1746 // Try folding preceding/trailing base inc/dec into the single
1748 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1750 RS->forward(std::prev(MBBI));
1757 CurrPred = ARMCC::AL;
1764 // If iterator hasn't been advanced and this is not a memory op, skip it.
1765 // It can't start a new chain anyway.
1766 if (!Advance && !isMemOp && MBBI != E) {
1772 return NumMerges > 0;
1775 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1776 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1777 /// directly restore the value of LR into pc.
1778 /// ldmfd sp!, {..., lr}
1781 /// ldmfd sp!, {..., lr}
1784 /// ldmfd sp!, {..., pc}
1785 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1786 // Thumb1 LDM doesn't allow high registers.
1787 if (isThumb1) return false;
1788 if (MBB.empty()) return false;
1790 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1791 if (MBBI != MBB.begin() &&
1792 (MBBI->getOpcode() == ARM::BX_RET ||
1793 MBBI->getOpcode() == ARM::tBX_RET ||
1794 MBBI->getOpcode() == ARM::MOVPCLR)) {
1795 MachineInstr *PrevMI = std::prev(MBBI);
1796 unsigned Opcode = PrevMI->getOpcode();
1797 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1798 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1799 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1800 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1801 if (MO.getReg() != ARM::LR)
1803 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1804 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1805 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1806 PrevMI->setDesc(TII->get(NewOpc));
1808 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1816 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1817 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1818 TL = STI->getTargetLowering();
1819 AFI = Fn.getInfo<ARMFunctionInfo>();
1820 TII = STI->getInstrInfo();
1821 TRI = STI->getRegisterInfo();
1822 RS = new RegScavenger();
1823 isThumb2 = AFI->isThumb2Function();
1824 isThumb1 = AFI->isThumbFunction() && !isThumb2;
1826 bool Modified = false;
1827 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1829 MachineBasicBlock &MBB = *MFI;
1830 Modified |= LoadStoreMultipleOpti(MBB);
1831 if (STI->hasV5TOps())
1832 Modified |= MergeReturnIntoLDM(MBB);
1840 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1841 /// load / stores from consecutive locations close to make it more
1842 /// likely they will be combined later.
1845 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1847 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1849 const DataLayout *TD;
1850 const TargetInstrInfo *TII;
1851 const TargetRegisterInfo *TRI;
1852 const ARMSubtarget *STI;
1853 MachineRegisterInfo *MRI;
1854 MachineFunction *MF;
1856 bool runOnMachineFunction(MachineFunction &Fn) override;
1858 const char *getPassName() const override {
1859 return "ARM pre- register allocation load / store optimization pass";
1863 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1864 unsigned &NewOpc, unsigned &EvenReg,
1865 unsigned &OddReg, unsigned &BaseReg,
1867 unsigned &PredReg, ARMCC::CondCodes &Pred,
1869 bool RescheduleOps(MachineBasicBlock *MBB,
1870 SmallVectorImpl<MachineInstr *> &Ops,
1871 unsigned Base, bool isLd,
1872 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1873 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1875 char ARMPreAllocLoadStoreOpt::ID = 0;
1878 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1879 TD = Fn.getTarget().getDataLayout();
1880 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1881 TII = STI->getInstrInfo();
1882 TRI = STI->getRegisterInfo();
1883 MRI = &Fn.getRegInfo();
1886 bool Modified = false;
1887 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1889 Modified |= RescheduleLoadStoreInstrs(MFI);
1894 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1895 MachineBasicBlock::iterator I,
1896 MachineBasicBlock::iterator E,
1897 SmallPtrSetImpl<MachineInstr*> &MemOps,
1898 SmallSet<unsigned, 4> &MemRegs,
1899 const TargetRegisterInfo *TRI) {
1900 // Are there stores / loads / calls between them?
1901 // FIXME: This is overly conservative. We should make use of alias information
1903 SmallSet<unsigned, 4> AddedRegPressure;
1905 if (I->isDebugValue() || MemOps.count(&*I))
1907 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1909 if (isLd && I->mayStore())
1914 // It's not safe to move the first 'str' down.
1917 // str r4, [r0, #+4]
1921 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1922 MachineOperand &MO = I->getOperand(j);
1925 unsigned Reg = MO.getReg();
1926 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1928 if (Reg != Base && !MemRegs.count(Reg))
1929 AddedRegPressure.insert(Reg);
1933 // Estimate register pressure increase due to the transformation.
1934 if (MemRegs.size() <= 4)
1935 // Ok if we are moving small number of instructions.
1937 return AddedRegPressure.size() <= MemRegs.size() * 2;
1941 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1942 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1943 MachineInstr *Op1) {
1944 assert(MI->memoperands_empty() && "expected a new machineinstr");
1945 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1946 + (Op1->memoperands_end() - Op1->memoperands_begin());
1948 MachineFunction *MF = MI->getParent()->getParent();
1949 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1950 MachineSDNode::mmo_iterator MemEnd =
1951 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1953 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1954 MI->setMemRefs(MemBegin, MemEnd);
1958 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1960 unsigned &NewOpc, unsigned &EvenReg,
1961 unsigned &OddReg, unsigned &BaseReg,
1962 int &Offset, unsigned &PredReg,
1963 ARMCC::CondCodes &Pred,
1965 // Make sure we're allowed to generate LDRD/STRD.
1966 if (!STI->hasV5TEOps())
1969 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1971 unsigned Opcode = Op0->getOpcode();
1972 if (Opcode == ARM::LDRi12) {
1974 } else if (Opcode == ARM::STRi12) {
1976 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1977 NewOpc = ARM::t2LDRDi8;
1980 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1981 NewOpc = ARM::t2STRDi8;
1988 // Make sure the base address satisfies i64 ld / st alignment requirement.
1989 // At the moment, we ignore the memoryoperand's value.
1990 // If we want to use AliasAnalysis, we should check it accordingly.
1991 if (!Op0->hasOneMemOperand() ||
1992 (*Op0->memoperands_begin())->isVolatile())
1995 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1996 const Function *Func = MF->getFunction();
1997 unsigned ReqAlign = STI->hasV6Ops()
1998 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1999 : 8; // Pre-v6 need 8-byte align
2000 if (Align < ReqAlign)
2003 // Then make sure the immediate offset fits.
2004 int OffImm = getMemoryOpOffset(Op0);
2006 int Limit = (1 << 8) * Scale;
2007 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2011 ARM_AM::AddrOpc AddSub = ARM_AM::add;
2013 AddSub = ARM_AM::sub;
2016 int Limit = (1 << 8) * Scale;
2017 if (OffImm >= Limit || (OffImm & (Scale-1)))
2019 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2021 EvenReg = Op0->getOperand(0).getReg();
2022 OddReg = Op1->getOperand(0).getReg();
2023 if (EvenReg == OddReg)
2025 BaseReg = Op0->getOperand(1).getReg();
2026 Pred = getInstrPredicate(Op0, PredReg);
2027 dl = Op0->getDebugLoc();
2031 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2032 SmallVectorImpl<MachineInstr *> &Ops,
2033 unsigned Base, bool isLd,
2034 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2035 bool RetVal = false;
2037 // Sort by offset (in reverse order).
2038 std::sort(Ops.begin(), Ops.end(),
2039 [](const MachineInstr *LHS, const MachineInstr *RHS) {
2040 int LOffset = getMemoryOpOffset(LHS);
2041 int ROffset = getMemoryOpOffset(RHS);
2042 assert(LHS == RHS || LOffset != ROffset);
2043 return LOffset > ROffset;
2046 // The loads / stores of the same base are in order. Scan them from first to
2047 // last and check for the following:
2048 // 1. Any def of base.
2050 while (Ops.size() > 1) {
2051 unsigned FirstLoc = ~0U;
2052 unsigned LastLoc = 0;
2053 MachineInstr *FirstOp = nullptr;
2054 MachineInstr *LastOp = nullptr;
2056 unsigned LastOpcode = 0;
2057 unsigned LastBytes = 0;
2058 unsigned NumMove = 0;
2059 for (int i = Ops.size() - 1; i >= 0; --i) {
2060 MachineInstr *Op = Ops[i];
2061 unsigned Loc = MI2LocMap[Op];
2062 if (Loc <= FirstLoc) {
2066 if (Loc >= LastLoc) {
2072 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2073 if (LastOpcode && LSMOpcode != LastOpcode)
2076 int Offset = getMemoryOpOffset(Op);
2077 unsigned Bytes = getLSMultipleTransferSize(Op);
2079 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2082 LastOffset = Offset;
2084 LastOpcode = LSMOpcode;
2085 if (++NumMove == 8) // FIXME: Tune this limit.
2092 SmallPtrSet<MachineInstr*, 4> MemOps;
2093 SmallSet<unsigned, 4> MemRegs;
2094 for (int i = NumMove-1; i >= 0; --i) {
2095 MemOps.insert(Ops[i]);
2096 MemRegs.insert(Ops[i]->getOperand(0).getReg());
2099 // Be conservative, if the instructions are too far apart, don't
2100 // move them. We want to limit the increase of register pressure.
2101 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2103 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2104 MemOps, MemRegs, TRI);
2106 for (unsigned i = 0; i != NumMove; ++i)
2109 // This is the new location for the loads / stores.
2110 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2111 while (InsertPos != MBB->end()
2112 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2115 // If we are moving a pair of loads / stores, see if it makes sense
2116 // to try to allocate a pair of registers that can form register pairs.
2117 MachineInstr *Op0 = Ops.back();
2118 MachineInstr *Op1 = Ops[Ops.size()-2];
2119 unsigned EvenReg = 0, OddReg = 0;
2120 unsigned BaseReg = 0, PredReg = 0;
2121 ARMCC::CondCodes Pred = ARMCC::AL;
2123 unsigned NewOpc = 0;
2126 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2127 EvenReg, OddReg, BaseReg,
2128 Offset, PredReg, Pred, isT2)) {
2132 const MCInstrDesc &MCID = TII->get(NewOpc);
2133 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2134 MRI->constrainRegClass(EvenReg, TRC);
2135 MRI->constrainRegClass(OddReg, TRC);
2137 // Form the pair instruction.
2139 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2140 .addReg(EvenReg, RegState::Define)
2141 .addReg(OddReg, RegState::Define)
2143 // FIXME: We're converting from LDRi12 to an insn that still
2144 // uses addrmode2, so we need an explicit offset reg. It should
2145 // always by reg0 since we're transforming LDRi12s.
2148 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2149 concatenateMemOperands(MIB, Op0, Op1);
2150 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2153 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2157 // FIXME: We're converting from LDRi12 to an insn that still
2158 // uses addrmode2, so we need an explicit offset reg. It should
2159 // always by reg0 since we're transforming STRi12s.
2162 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2163 concatenateMemOperands(MIB, Op0, Op1);
2164 DEBUG(dbgs() << "Formed " << *MIB << "\n");
2170 // Add register allocation hints to form register pairs.
2171 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
2172 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
2174 for (unsigned i = 0; i != NumMove; ++i) {
2175 MachineInstr *Op = Ops.back();
2177 MBB->splice(InsertPos, MBB, Op);
2181 NumLdStMoved += NumMove;
2191 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2192 bool RetVal = false;
2194 DenseMap<MachineInstr*, unsigned> MI2LocMap;
2195 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2196 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2197 SmallVector<unsigned, 4> LdBases;
2198 SmallVector<unsigned, 4> StBases;
2201 MachineBasicBlock::iterator MBBI = MBB->begin();
2202 MachineBasicBlock::iterator E = MBB->end();
2204 for (; MBBI != E; ++MBBI) {
2205 MachineInstr *MI = MBBI;
2206 if (MI->isCall() || MI->isTerminator()) {
2207 // Stop at barriers.
2212 if (!MI->isDebugValue())
2213 MI2LocMap[MI] = ++Loc;
2215 if (!isMemoryOp(MI))
2217 unsigned PredReg = 0;
2218 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2221 int Opc = MI->getOpcode();
2222 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
2223 unsigned Base = MI->getOperand(1).getReg();
2224 int Offset = getMemoryOpOffset(MI);
2226 bool StopHere = false;
2228 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2229 Base2LdsMap.find(Base);
2230 if (BI != Base2LdsMap.end()) {
2231 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2232 if (Offset == getMemoryOpOffset(BI->second[i])) {
2238 BI->second.push_back(MI);
2240 Base2LdsMap[Base].push_back(MI);
2241 LdBases.push_back(Base);
2244 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2245 Base2StsMap.find(Base);
2246 if (BI != Base2StsMap.end()) {
2247 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2248 if (Offset == getMemoryOpOffset(BI->second[i])) {
2254 BI->second.push_back(MI);
2256 Base2StsMap[Base].push_back(MI);
2257 StBases.push_back(Base);
2262 // Found a duplicate (a base+offset combination that's seen earlier).
2269 // Re-schedule loads.
2270 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2271 unsigned Base = LdBases[i];
2272 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2274 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2277 // Re-schedule stores.
2278 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2279 unsigned Base = StBases[i];
2280 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2282 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2286 Base2LdsMap.clear();
2287 Base2StsMap.clear();
2297 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
2298 /// optimization pass.
2299 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2301 return new ARMPreAllocLoadStoreOpt();
2302 return new ARMLoadStoreOpt();