1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMBaseRegisterInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "MCTargetDesc/ARMAddressingModes.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/CodeGen/MachineBasicBlock.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstr.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/RegisterScavenging.h"
33 #include "llvm/CodeGen/SelectionDAGNodes.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Target/TargetInstrInfo.h"
41 #include "llvm/Target/TargetMachine.h"
42 #include "llvm/Target/TargetRegisterInfo.h"
45 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
46 STATISTIC(NumSTMGened , "Number of stm instructions generated");
47 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
48 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
49 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
50 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
51 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
52 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
53 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
54 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
55 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
57 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
58 /// load / store instructions to form ldm / stm instructions.
61 struct ARMLoadStoreOpt : public MachineFunctionPass {
63 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
65 const TargetInstrInfo *TII;
66 const TargetRegisterInfo *TRI;
67 const ARMSubtarget *STI;
72 virtual bool runOnMachineFunction(MachineFunction &Fn);
74 virtual const char *getPassName() const {
75 return "ARM load / store optimization pass";
79 struct MemOpQueueEntry {
84 MachineBasicBlock::iterator MBBI;
86 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
87 MachineBasicBlock::iterator i)
88 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
92 UnitRegsMap(const TargetRegisterInfo* _TRI) : TRI(_TRI) {}
93 const SmallVector<unsigned, 4>& operator[](unsigned Reg) {
94 DenseMap<unsigned, SmallVector<unsigned, 4> >::iterator found =
96 if (found != Cache.end())
99 return Cache.insert(std::make_pair(Reg, this->getUnitRegs(Reg)))
103 SmallVector<unsigned, 4> getUnitRegs(unsigned Reg) {
104 SmallVector<unsigned, 4> Res;
106 const TargetRegisterClass* TRC = TRI->getMinimalPhysRegClass(Reg);
107 if (TRC == &ARM::QPRRegClass) {
109 Res.push_back(TRI->getSubReg(Reg, ARM::dsub_0));
110 Res.push_back(TRI->getSubReg(Reg, ARM::dsub_1));
114 Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0));
115 Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1));
116 Res.push_back(TRI->getSubReg(Reg, ARM::ssub_2));
117 Res.push_back(TRI->getSubReg(Reg, ARM::ssub_3));
122 if (TRC == &ARM::DPRRegClass && Reg < ARM::D15) {
123 Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0));
124 Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1));
134 const TargetRegisterInfo* TRI;
135 DenseMap<unsigned, SmallVector<unsigned, 4> > Cache;
137 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
138 typedef MemOpQueue::iterator MemOpQueueIter;
140 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
141 int Offset, unsigned Base, bool BaseKill, int Opcode,
142 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
144 ArrayRef<std::pair<unsigned, bool> > Regs,
145 ArrayRef<unsigned> ImpDefs);
146 void MergeOpsUpdate(MachineBasicBlock &MBB,
148 unsigned memOpsBegin,
150 unsigned insertAfter,
155 ARMCC::CondCodes Pred,
159 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
160 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
161 int Opcode, unsigned Size,
162 ARMCC::CondCodes Pred, unsigned PredReg,
163 unsigned Scratch, MemOpQueue &MemOps,
164 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
166 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
167 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
168 MachineBasicBlock::iterator &MBBI);
169 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
170 MachineBasicBlock::iterator MBBI,
171 const TargetInstrInfo *TII,
173 MachineBasicBlock::iterator &I);
174 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
175 MachineBasicBlock::iterator MBBI,
177 MachineBasicBlock::iterator &I);
178 unsigned AddMemOp(MemOpQueue& MemOps,
179 const MemOpQueueEntry newEntry,
180 UnitRegsMap& UnitRegsInfo,
181 SmallSet<unsigned, 4>& UsedUnitRegs,
183 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
184 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
186 char ARMLoadStoreOpt::ID = 0;
189 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
191 default: llvm_unreachable("Unhandled opcode!");
195 default: llvm_unreachable("Unhandled submode!");
196 case ARM_AM::ia: return ARM::LDMIA;
197 case ARM_AM::da: return ARM::LDMDA;
198 case ARM_AM::db: return ARM::LDMDB;
199 case ARM_AM::ib: return ARM::LDMIB;
204 default: llvm_unreachable("Unhandled submode!");
205 case ARM_AM::ia: return ARM::STMIA;
206 case ARM_AM::da: return ARM::STMDA;
207 case ARM_AM::db: return ARM::STMDB;
208 case ARM_AM::ib: return ARM::STMIB;
214 default: llvm_unreachable("Unhandled submode!");
215 case ARM_AM::ia: return ARM::t2LDMIA;
216 case ARM_AM::db: return ARM::t2LDMDB;
222 default: llvm_unreachable("Unhandled submode!");
223 case ARM_AM::ia: return ARM::t2STMIA;
224 case ARM_AM::db: return ARM::t2STMDB;
229 default: llvm_unreachable("Unhandled submode!");
230 case ARM_AM::ia: return ARM::VLDMSIA;
231 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
236 default: llvm_unreachable("Unhandled submode!");
237 case ARM_AM::ia: return ARM::VSTMSIA;
238 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
243 default: llvm_unreachable("Unhandled submode!");
244 case ARM_AM::ia: return ARM::VLDMDIA;
245 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
250 default: llvm_unreachable("Unhandled submode!");
251 case ARM_AM::ia: return ARM::VSTMDIA;
252 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
260 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
262 default: llvm_unreachable("Unhandled opcode!");
268 case ARM::t2LDMIA_RET:
270 case ARM::t2LDMIA_UPD:
272 case ARM::t2STMIA_UPD:
274 case ARM::VLDMSIA_UPD:
276 case ARM::VSTMSIA_UPD:
278 case ARM::VLDMDIA_UPD:
280 case ARM::VSTMDIA_UPD:
294 case ARM::t2LDMDB_UPD:
296 case ARM::t2STMDB_UPD:
297 case ARM::VLDMSDB_UPD:
298 case ARM::VSTMSDB_UPD:
299 case ARM::VLDMDDB_UPD:
300 case ARM::VSTMDDB_UPD:
311 } // end namespace ARM_AM
312 } // end namespace llvm
314 static bool isT2i32Load(unsigned Opc) {
315 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
318 static bool isi32Load(unsigned Opc) {
319 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
322 static bool isT2i32Store(unsigned Opc) {
323 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
326 static bool isi32Store(unsigned Opc) {
327 return Opc == ARM::STRi12 || isT2i32Store(Opc);
330 /// MergeOps - Create and insert a LDM or STM with Base as base register and
331 /// registers in Regs as the register operands that would be loaded / stored.
332 /// It returns true if the transformation is done.
334 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
335 MachineBasicBlock::iterator MBBI,
336 int Offset, unsigned Base, bool BaseKill,
337 int Opcode, ARMCC::CondCodes Pred,
338 unsigned PredReg, unsigned Scratch, DebugLoc dl,
339 ArrayRef<std::pair<unsigned, bool> > Regs,
340 ArrayRef<unsigned> ImpDefs) {
341 // Only a single register to load / store. Don't bother.
342 unsigned NumRegs = Regs.size();
346 ARM_AM::AMSubMode Mode = ARM_AM::ia;
347 // VFP and Thumb2 do not support IB or DA modes.
348 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
349 bool haveIBAndDA = isNotVFP && !isThumb2;
350 if (Offset == 4 && haveIBAndDA)
352 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
354 else if (Offset == -4 * (int)NumRegs && isNotVFP)
355 // VLDM/VSTM do not support DB mode without also updating the base reg.
357 else if (Offset != 0) {
358 // Check if this is a supported opcode before we insert instructions to
359 // calculate a new base register.
360 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
362 // If starting offset isn't zero, insert a MI to materialize a new base.
363 // But only do so if it is cost effective, i.e. merging more than two
369 if (isi32Load(Opcode))
370 // If it is a load, then just use one of the destination register to
371 // use as the new base.
372 NewBase = Regs[NumRegs-1].first;
374 // Use the scratch register to use as a new base.
379 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
381 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
384 int ImmedOffset = isThumb2
385 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
386 if (ImmedOffset == -1)
387 // FIXME: Try t2ADDri12 or t2SUBri12?
388 return false; // Probably not worth it then.
390 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
391 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
392 .addImm(Pred).addReg(PredReg).addReg(0);
394 BaseKill = true; // New base is always killed right its use.
397 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
398 Opcode == ARM::VLDRD);
399 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
400 if (!Opcode) return false;
401 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
402 .addReg(Base, getKillRegState(BaseKill))
403 .addImm(Pred).addReg(PredReg);
404 for (unsigned i = 0; i != NumRegs; ++i)
405 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
406 | getKillRegState(Regs[i].second));
408 // Add implicit defs for super-registers.
409 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
410 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
415 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
417 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
419 unsigned memOpsBegin, unsigned memOpsEnd,
420 unsigned insertAfter, int Offset,
421 unsigned Base, bool BaseKill,
423 ARMCC::CondCodes Pred, unsigned PredReg,
426 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
427 // First calculate which of the registers should be killed by the merged
429 const unsigned insertPos = memOps[insertAfter].Position;
430 SmallSet<unsigned, 4> KilledRegs;
431 DenseMap<unsigned, unsigned> Killer;
432 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
433 if (i == memOpsBegin) {
438 if (memOps[i].Position < insertPos && memOps[i].isKill) {
439 unsigned Reg = memOps[i].Reg;
440 KilledRegs.insert(Reg);
445 SmallVector<std::pair<unsigned, bool>, 8> Regs;
446 SmallVector<unsigned, 8> ImpDefs;
447 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
448 unsigned Reg = memOps[i].Reg;
449 // If we are inserting the merged operation after an operation that
450 // uses the same register, make sure to transfer any kill flag.
451 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
452 Regs.push_back(std::make_pair(Reg, isKill));
454 // Collect any implicit defs of super-registers. They must be preserved.
455 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
456 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
458 unsigned DefReg = MO->getReg();
459 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
460 ImpDefs.push_back(DefReg);
464 // Try to do the merge.
465 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
467 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
468 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
471 // Merge succeeded, update records.
472 Merges.push_back(prior(Loc));
473 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
474 // Remove kill flags from any memops that come before insertPos.
475 if (Regs[i-memOpsBegin].second) {
476 unsigned Reg = Regs[i-memOpsBegin].first;
477 if (KilledRegs.count(Reg)) {
478 unsigned j = Killer[Reg];
479 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
480 assert(Idx >= 0 && "Cannot find killing operand");
481 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
482 memOps[j].isKill = false;
484 memOps[i].isKill = true;
486 MBB.erase(memOps[i].MBBI);
487 // Update this memop to refer to the merged instruction.
488 // We may need to move kill flags again.
489 memOps[i].Merged = true;
490 memOps[i].MBBI = Merges.back();
491 memOps[i].Position = insertPos;
495 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
496 /// load / store multiple instructions.
498 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
499 unsigned Base, int Opcode, unsigned Size,
500 ARMCC::CondCodes Pred, unsigned PredReg,
501 unsigned Scratch, MemOpQueue &MemOps,
502 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
503 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
504 int Offset = MemOps[SIndex].Offset;
505 int SOffset = Offset;
506 unsigned insertAfter = SIndex;
507 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
508 DebugLoc dl = Loc->getDebugLoc();
509 const MachineOperand &PMO = Loc->getOperand(0);
510 unsigned PReg = PMO.getReg();
511 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
513 unsigned Limit = ~0U;
515 // vldm / vstm limit are 32 for S variants, 16 for D variants.
533 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
534 int NewOffset = MemOps[i].Offset;
535 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
536 unsigned Reg = MO.getReg();
537 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
538 // Register numbers must be in ascending order. For VFP / NEON load and
539 // store multiples, the registers must also be consecutive and within the
540 // limit on the number of registers per instruction.
541 if (Reg != ARM::SP &&
542 NewOffset == Offset + (int)Size &&
543 ((isNotVFP && RegNum > PRegNum) ||
544 ((Count < Limit) && RegNum == PRegNum+1))) {
549 // Can't merge this in. Try merge the earlier ones first.
550 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
551 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
552 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
557 if (MemOps[i].Position > MemOps[insertAfter].Position)
561 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
562 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
563 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
567 static bool definesCPSR(MachineInstr *MI) {
568 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
569 const MachineOperand &MO = MI->getOperand(i);
572 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
573 // If the instruction has live CPSR def, then it's not safe to fold it
574 // into load / store.
581 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
582 unsigned Bytes, unsigned Limit,
583 ARMCC::CondCodes Pred, unsigned PredReg) {
584 unsigned MyPredReg = 0;
588 bool CheckCPSRDef = false;
589 switch (MI->getOpcode()) {
590 default: return false;
599 // Make sure the offset fits in 8 bits.
600 if (Bytes == 0 || (Limit && Bytes >= Limit))
603 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
604 if (!(MI->getOperand(0).getReg() == Base &&
605 MI->getOperand(1).getReg() == Base &&
606 (MI->getOperand(2).getImm()*Scale) == Bytes &&
607 getInstrPredicate(MI, MyPredReg) == Pred &&
608 MyPredReg == PredReg))
611 return CheckCPSRDef ? !definesCPSR(MI) : true;
614 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
615 unsigned Bytes, unsigned Limit,
616 ARMCC::CondCodes Pred, unsigned PredReg) {
617 unsigned MyPredReg = 0;
621 bool CheckCPSRDef = false;
622 switch (MI->getOpcode()) {
623 default: return false;
632 if (Bytes == 0 || (Limit && Bytes >= Limit))
633 // Make sure the offset fits in 8 bits.
636 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
637 if (!(MI->getOperand(0).getReg() == Base &&
638 MI->getOperand(1).getReg() == Base &&
639 (MI->getOperand(2).getImm()*Scale) == Bytes &&
640 getInstrPredicate(MI, MyPredReg) == Pred &&
641 MyPredReg == PredReg))
644 return CheckCPSRDef ? !definesCPSR(MI) : true;
647 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
648 switch (MI->getOpcode()) {
676 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
679 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
683 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
684 ARM_AM::AMSubMode Mode) {
686 default: llvm_unreachable("Unhandled opcode!");
692 default: llvm_unreachable("Unhandled submode!");
693 case ARM_AM::ia: return ARM::LDMIA_UPD;
694 case ARM_AM::ib: return ARM::LDMIB_UPD;
695 case ARM_AM::da: return ARM::LDMDA_UPD;
696 case ARM_AM::db: return ARM::LDMDB_UPD;
703 default: llvm_unreachable("Unhandled submode!");
704 case ARM_AM::ia: return ARM::STMIA_UPD;
705 case ARM_AM::ib: return ARM::STMIB_UPD;
706 case ARM_AM::da: return ARM::STMDA_UPD;
707 case ARM_AM::db: return ARM::STMDB_UPD;
712 default: llvm_unreachable("Unhandled submode!");
713 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
714 case ARM_AM::db: return ARM::t2LDMDB_UPD;
719 default: llvm_unreachable("Unhandled submode!");
720 case ARM_AM::ia: return ARM::t2STMIA_UPD;
721 case ARM_AM::db: return ARM::t2STMDB_UPD;
725 default: llvm_unreachable("Unhandled submode!");
726 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
727 case ARM_AM::db: return ARM::VLDMSDB_UPD;
731 default: llvm_unreachable("Unhandled submode!");
732 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
733 case ARM_AM::db: return ARM::VLDMDDB_UPD;
737 default: llvm_unreachable("Unhandled submode!");
738 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
739 case ARM_AM::db: return ARM::VSTMSDB_UPD;
743 default: llvm_unreachable("Unhandled submode!");
744 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
745 case ARM_AM::db: return ARM::VSTMDDB_UPD;
750 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
751 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
753 /// stmia rn, <ra, rb, rc>
754 /// rn := rn + 4 * 3;
756 /// stmia rn!, <ra, rb, rc>
758 /// rn := rn - 4 * 3;
759 /// ldmia rn, <ra, rb, rc>
761 /// ldmdb rn!, <ra, rb, rc>
762 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
763 MachineBasicBlock::iterator MBBI,
765 MachineBasicBlock::iterator &I) {
766 MachineInstr *MI = MBBI;
767 unsigned Base = MI->getOperand(0).getReg();
768 bool BaseKill = MI->getOperand(0).isKill();
769 unsigned Bytes = getLSMultipleTransferSize(MI);
770 unsigned PredReg = 0;
771 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
772 int Opcode = MI->getOpcode();
773 DebugLoc dl = MI->getDebugLoc();
775 // Can't use an updating ld/st if the base register is also a dest
776 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
777 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
778 if (MI->getOperand(i).getReg() == Base)
781 bool DoMerge = false;
782 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
784 // Try merging with the previous instruction.
785 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
786 if (MBBI != BeginMBBI) {
787 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
788 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
790 if (Mode == ARM_AM::ia &&
791 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
794 } else if (Mode == ARM_AM::ib &&
795 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
803 // Try merging with the next instruction.
804 MachineBasicBlock::iterator EndMBBI = MBB.end();
805 if (!DoMerge && MBBI != EndMBBI) {
806 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
807 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
809 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
810 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
812 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
813 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
828 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
829 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
830 .addReg(Base, getDefRegState(true)) // WB base register
831 .addReg(Base, getKillRegState(BaseKill))
832 .addImm(Pred).addReg(PredReg);
834 // Transfer the rest of operands.
835 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
836 MIB.addOperand(MI->getOperand(OpNum));
838 // Transfer memoperands.
839 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
845 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
846 ARM_AM::AddrOpc Mode) {
849 return ARM::LDR_PRE_IMM;
851 return ARM::STR_PRE_IMM;
853 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
855 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
857 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
859 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
862 return ARM::t2LDR_PRE;
865 return ARM::t2STR_PRE;
866 default: llvm_unreachable("Unhandled opcode!");
870 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
871 ARM_AM::AddrOpc Mode) {
874 return ARM::LDR_POST_IMM;
876 return ARM::STR_POST_IMM;
878 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
880 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
882 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
884 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
887 return ARM::t2LDR_POST;
890 return ARM::t2STR_POST;
891 default: llvm_unreachable("Unhandled opcode!");
895 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
896 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
897 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
898 MachineBasicBlock::iterator MBBI,
899 const TargetInstrInfo *TII,
901 MachineBasicBlock::iterator &I) {
902 MachineInstr *MI = MBBI;
903 unsigned Base = MI->getOperand(1).getReg();
904 bool BaseKill = MI->getOperand(1).isKill();
905 unsigned Bytes = getLSMultipleTransferSize(MI);
906 int Opcode = MI->getOpcode();
907 DebugLoc dl = MI->getDebugLoc();
908 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
909 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
910 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
911 if (isi32Load(Opcode) || isi32Store(Opcode))
912 if (MI->getOperand(2).getImm() != 0)
914 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
917 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
918 // Can't do the merge if the destination register is the same as the would-be
919 // writeback register.
920 if (MI->getOperand(0).getReg() == Base)
923 unsigned PredReg = 0;
924 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
925 bool DoMerge = false;
926 ARM_AM::AddrOpc AddSub = ARM_AM::add;
928 // AM2 - 12 bits, thumb2 - 8 bits.
929 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
931 // Try merging with the previous instruction.
932 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
933 if (MBBI != BeginMBBI) {
934 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
935 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
937 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
939 AddSub = ARM_AM::sub;
941 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
945 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
950 // Try merging with the next instruction.
951 MachineBasicBlock::iterator EndMBBI = MBB.end();
952 if (!DoMerge && MBBI != EndMBBI) {
953 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
954 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
957 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
959 AddSub = ARM_AM::sub;
960 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
964 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
977 // VLDM[SD}_UPD, VSTM[SD]_UPD
978 // (There are no base-updating versions of VLDR/VSTR instructions, but the
979 // updating load/store-multiple instructions can be used with only one
981 MachineOperand &MO = MI->getOperand(0);
982 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
983 .addReg(Base, getDefRegState(true)) // WB base register
984 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
985 .addImm(Pred).addReg(PredReg)
986 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
987 getKillRegState(MO.isKill())));
991 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
992 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
993 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
994 .addReg(Base, RegState::Define)
995 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
997 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
998 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
999 .addReg(Base, RegState::Define)
1000 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1003 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1004 // t2LDR_PRE, t2LDR_POST
1005 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1006 .addReg(Base, RegState::Define)
1007 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1010 MachineOperand &MO = MI->getOperand(0);
1011 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1012 // the vestigal zero-reg offset register. When that's fixed, this clause
1013 // can be removed entirely.
1014 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1015 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1016 // STR_PRE, STR_POST
1017 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1018 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1019 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1021 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1022 // t2STR_PRE, t2STR_POST
1023 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1024 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1025 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1033 /// isMemoryOp - Returns true if instruction is a memory operation that this
1034 /// pass is capable of operating on.
1035 static bool isMemoryOp(const MachineInstr *MI) {
1036 // When no memory operands are present, conservatively assume unaligned,
1037 // volatile, unfoldable.
1038 if (!MI->hasOneMemOperand())
1041 const MachineMemOperand *MMO = *MI->memoperands_begin();
1043 // Don't touch volatile memory accesses - we may be changing their order.
1044 if (MMO->isVolatile())
1047 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1049 if (MMO->getAlignment() < 4)
1052 // str <undef> could probably be eliminated entirely, but for now we just want
1053 // to avoid making a mess of it.
1054 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1055 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1056 MI->getOperand(0).isUndef())
1059 // Likewise don't mess with references to undefined addresses.
1060 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1061 MI->getOperand(1).isUndef())
1064 int Opcode = MI->getOpcode();
1069 return MI->getOperand(1).isReg();
1072 return MI->getOperand(1).isReg();
1079 return MI->getOperand(1).isReg();
1084 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1085 /// op that is being merged.
1086 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1087 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1088 unsigned Position = MemOps[0].Position;
1089 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1090 if (MemOps[i].Position < Position) {
1091 Position = MemOps[i].Position;
1092 Loc = MemOps[i].MBBI;
1096 if (Loc != MBB.begin())
1097 RS->forward(prior(Loc));
1100 static int getMemoryOpOffset(const MachineInstr *MI) {
1101 int Opcode = MI->getOpcode();
1102 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1103 unsigned NumOperands = MI->getDesc().getNumOperands();
1104 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1106 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1107 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1108 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1109 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1112 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1113 : ARM_AM::getAM5Offset(OffField) * 4;
1115 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1118 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1124 static void InsertLDR_STR(MachineBasicBlock &MBB,
1125 MachineBasicBlock::iterator &MBBI,
1126 int Offset, bool isDef,
1127 DebugLoc dl, unsigned NewOpc,
1128 unsigned Reg, bool RegDeadKill, bool RegUndef,
1129 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1130 bool OffKill, bool OffUndef,
1131 ARMCC::CondCodes Pred, unsigned PredReg,
1132 const TargetInstrInfo *TII, bool isT2) {
1134 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1136 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1137 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1138 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1140 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1142 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1143 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1144 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1148 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1149 MachineBasicBlock::iterator &MBBI) {
1150 MachineInstr *MI = &*MBBI;
1151 unsigned Opcode = MI->getOpcode();
1152 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1153 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1154 const MachineOperand &BaseOp = MI->getOperand(2);
1155 unsigned BaseReg = BaseOp.getReg();
1156 unsigned EvenReg = MI->getOperand(0).getReg();
1157 unsigned OddReg = MI->getOperand(1).getReg();
1158 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1159 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1160 // ARM errata 602117: LDRD with base in list may result in incorrect base
1161 // register when interrupted or faulted.
1162 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1163 if (!Errata602117 &&
1164 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1167 MachineBasicBlock::iterator NewBBI = MBBI;
1168 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1169 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1170 bool EvenDeadKill = isLd ?
1171 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1172 bool EvenUndef = MI->getOperand(0).isUndef();
1173 bool OddDeadKill = isLd ?
1174 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1175 bool OddUndef = MI->getOperand(1).isUndef();
1176 bool BaseKill = BaseOp.isKill();
1177 bool BaseUndef = BaseOp.isUndef();
1178 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1179 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1180 int OffImm = getMemoryOpOffset(MI);
1181 unsigned PredReg = 0;
1182 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1184 if (OddRegNum > EvenRegNum && OffImm == 0) {
1185 // Ascending register numbers and no offset. It's safe to change it to a
1187 unsigned NewOpc = (isLd)
1188 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1189 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1191 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1192 .addReg(BaseReg, getKillRegState(BaseKill))
1193 .addImm(Pred).addReg(PredReg)
1194 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1195 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1198 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1199 .addReg(BaseReg, getKillRegState(BaseKill))
1200 .addImm(Pred).addReg(PredReg)
1202 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1204 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1207 NewBBI = llvm::prior(MBBI);
1209 // Split into two instructions.
1210 unsigned NewOpc = (isLd)
1211 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1212 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1213 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1214 // so adjust and use t2LDRi12 here for that.
1215 unsigned NewOpc2 = (isLd)
1216 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1217 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1218 DebugLoc dl = MBBI->getDebugLoc();
1219 // If this is a load and base register is killed, it may have been
1220 // re-defed by the load, make sure the first load does not clobber it.
1222 (BaseKill || OffKill) &&
1223 (TRI->regsOverlap(EvenReg, BaseReg))) {
1224 assert(!TRI->regsOverlap(OddReg, BaseReg));
1225 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1226 OddReg, OddDeadKill, false,
1227 BaseReg, false, BaseUndef, false, OffUndef,
1228 Pred, PredReg, TII, isT2);
1229 NewBBI = llvm::prior(MBBI);
1230 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1231 EvenReg, EvenDeadKill, false,
1232 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1233 Pred, PredReg, TII, isT2);
1235 if (OddReg == EvenReg && EvenDeadKill) {
1236 // If the two source operands are the same, the kill marker is
1237 // probably on the first one. e.g.
1238 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1239 EvenDeadKill = false;
1242 // Never kill the base register in the first instruction.
1243 if (EvenReg == BaseReg)
1244 EvenDeadKill = false;
1245 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1246 EvenReg, EvenDeadKill, EvenUndef,
1247 BaseReg, false, BaseUndef, false, OffUndef,
1248 Pred, PredReg, TII, isT2);
1249 NewBBI = llvm::prior(MBBI);
1250 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1251 OddReg, OddDeadKill, OddUndef,
1252 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1253 Pred, PredReg, TII, isT2);
1268 /// AddMemOp - helper for ARMLoadStoreOpt::LoadStoreMultipleOpti.
1269 /// It adds store mem ops with simple push_back/insert method,
1270 /// without any additional logic.
1271 /// For load operation it does the next:
1272 /// 1. Adds new load operation into MemOp collection at "At" position.
1273 /// 2. Removes any "load" operations from MemOps, that changes "Reg" register
1274 /// contents, prior to "At".
1275 /// UnitRegsInfo - Map of type Map< Register, UnitRegisters-vector >
1276 /// UsedUnitRegs - set of unit-registers currently in use.
1277 /// At - position at which it would added, and prior which the clean-up
1278 /// should be made (for load operation).
1279 /// FIXME: The clean-up also should be made for store operations,
1280 /// but the memory address should be analyzed instead of unit registers.
1281 unsigned ARMLoadStoreOpt::AddMemOp(MemOpQueue& MemOps,
1282 const MemOpQueueEntry NewEntry,
1283 UnitRegsMap& UnitRegsInfo,
1284 SmallSet<unsigned, 4>& UsedUnitRegs,
1286 unsigned Cleaned = 0;
1290 MemOps.push_back(NewEntry);
1292 MemOps.insert(&MemOps[At], NewEntry);
1295 // If operation is not load, leave it as is by now,
1296 // So 0 overridden ops would cleaned in this case.
1297 if (!NewEntry.MBBI->mayLoad())
1300 const SmallVector<unsigned, 4>& NewEntryUnitRegs = UnitRegsInfo[NewEntry.Reg];
1302 bool FoundOverriddenLoads = false;
1304 for (unsigned i = 0, e = NewEntryUnitRegs.size(); i != e; ++i)
1305 if (UsedUnitRegs.count(NewEntryUnitRegs[i])) {
1306 FoundOverriddenLoads = true;
1310 // If we detect that this register is used by load operations that are
1311 // predecessors for the new one, remove them from MemOps then.
1312 if (FoundOverriddenLoads) {
1313 MemOpQueue UpdatedMemOps;
1315 // Scan through MemOps entries.
1316 for (unsigned i = 0; i != At; ++i) {
1317 MemOpQueueEntry& MemOpEntry = MemOps[i];
1319 // FIXME: Skip non-load operations by now.
1320 if (!MemOpEntry.MBBI->mayLoad())
1323 const SmallVector<unsigned, 4>& MemOpUnitRegs =
1324 UnitRegsInfo[MemOpEntry.Reg];
1326 // Lookup entry that loads contents into register used by new entry.
1327 bool ReleaseThisEntry = false;
1328 for (unsigned m = 0, em = MemOpUnitRegs.size(); m != em; ++m) {
1329 if (std::find(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end(),
1330 MemOpUnitRegs[m]) != NewEntryUnitRegs.end()) {
1331 ReleaseThisEntry = true;
1337 if (ReleaseThisEntry) {
1338 const SmallVector<unsigned, 4>& RelesedRegs = UnitRegsInfo[MemOpEntry.Reg];
1339 for (unsigned r = 0, er = RelesedRegs.size(); r != er; ++r)
1340 UsedUnitRegs.erase(RelesedRegs[r]);
1342 UpdatedMemOps.push_back(MemOpEntry);
1345 // Keep anything without changes after At position.
1346 for (unsigned i = At, e = MemOps.size(); i != e; ++i)
1347 UpdatedMemOps.push_back(MemOps[i]);
1349 MemOps.swap(UpdatedMemOps);
1352 UsedUnitRegs.insert(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end());
1357 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1358 /// ops of the same base and incrementing offset into LDM / STM ops.
1359 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1360 unsigned NumMerges = 0;
1361 unsigned NumMemOps = 0;
1363 UnitRegsMap UnitRegsInfo(TRI);
1364 SmallSet<unsigned, 4> UsedRegUnits;
1365 unsigned CurrBase = 0;
1367 unsigned CurrSize = 0;
1368 ARMCC::CondCodes CurrPred = ARMCC::AL;
1369 unsigned CurrPredReg = 0;
1370 unsigned Position = 0;
1371 SmallVector<MachineBasicBlock::iterator,4> Merges;
1373 RS->enterBasicBlock(&MBB);
1374 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1376 if (FixInvalidRegPairOp(MBB, MBBI))
1379 bool Advance = false;
1380 bool TryMerge = false;
1381 bool Clobber = false;
1383 bool isMemOp = isMemoryOp(MBBI);
1385 int Opcode = MBBI->getOpcode();
1386 unsigned Size = getLSMultipleTransferSize(MBBI);
1387 const MachineOperand &MO = MBBI->getOperand(0);
1388 unsigned Reg = MO.getReg();
1389 bool isKill = MO.isDef() ? false : MO.isKill();
1390 unsigned Base = MBBI->getOperand(1).getReg();
1391 unsigned PredReg = 0;
1392 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1393 int Offset = getMemoryOpOffset(MBBI);
1396 // r5 := ldr [r5, #4]
1397 // r6 := ldr [r5, #8]
1399 // The second ldr has effectively broken the chain even though it
1400 // looks like the later ldr(s) use the same base register. Try to
1401 // merge the ldr's so far, including this one. But don't try to
1402 // combine the following ldr(s).
1403 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1404 if (CurrBase == 0 && !Clobber) {
1405 // Start of a new chain.
1410 CurrPredReg = PredReg;
1412 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1414 const SmallVector<unsigned, 4>& EntryUnitRegs = UnitRegsInfo[Reg];
1415 UsedRegUnits.insert(EntryUnitRegs.begin(), EntryUnitRegs.end());
1423 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1424 // No need to match PredReg.
1425 // Continue adding to the queue.
1426 if (Offset > MemOps.back().Offset) {
1427 unsigned OverridesCleaned =
1429 MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI),
1430 UnitRegsInfo, UsedRegUnits) != 0;
1431 NumMemOps += 1 - OverridesCleaned;
1434 for (unsigned I = 0; I != NumMemOps; ++I) {
1435 if (Offset < MemOps[I].Offset) {
1436 MemOpQueueEntry entry(Offset, Reg, isKill, Position, MBBI);
1437 unsigned OverridesCleaned =
1438 AddMemOp(MemOps, entry, UnitRegsInfo,
1439 UsedRegUnits, I) != 0;
1440 NumMemOps += 1 - OverridesCleaned;
1444 } else if (Offset == MemOps[I].Offset) {
1445 // Collision! This can't be merged!
1454 if (MBBI->isDebugValue()) {
1457 // Reach the end of the block, try merging the memory instructions.
1459 } else if (Advance) {
1463 // Reach the end of the block, try merging the memory instructions.
1469 if (NumMemOps > 1) {
1470 // Try to find a free register to use as a new base in case it's needed.
1471 // First advance to the instruction just before the start of the chain.
1472 AdvanceRS(MBB, MemOps);
1473 // Find a scratch register.
1474 unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
1475 // Process the load / store instructions.
1476 RS->forward(prior(MBBI));
1480 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1481 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1483 // Try folding preceding/trailing base inc/dec into the generated
1485 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1486 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1488 NumMerges += Merges.size();
1490 // Try folding preceding/trailing base inc/dec into those load/store
1491 // that were not merged to form LDM/STM ops.
1492 for (unsigned i = 0; i != NumMemOps; ++i)
1493 if (!MemOps[i].Merged)
1494 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1497 // RS may be pointing to an instruction that's deleted.
1498 RS->skipTo(prior(MBBI));
1499 } else if (NumMemOps == 1) {
1500 // Try folding preceding/trailing base inc/dec into the single
1502 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1504 RS->forward(prior(MBBI));
1511 CurrPred = ARMCC::AL;
1515 UsedRegUnits.clear();
1519 // If iterator hasn't been advanced and this is not a memory op, skip it.
1520 // It can't start a new chain anyway.
1521 if (!Advance && !isMemOp && MBBI != E) {
1527 return NumMerges > 0;
1530 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1531 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1532 /// directly restore the value of LR into pc.
1533 /// ldmfd sp!, {..., lr}
1536 /// ldmfd sp!, {..., lr}
1539 /// ldmfd sp!, {..., pc}
1540 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1541 if (MBB.empty()) return false;
1543 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1544 if (MBBI != MBB.begin() &&
1545 (MBBI->getOpcode() == ARM::BX_RET ||
1546 MBBI->getOpcode() == ARM::tBX_RET ||
1547 MBBI->getOpcode() == ARM::MOVPCLR)) {
1548 MachineInstr *PrevMI = prior(MBBI);
1549 unsigned Opcode = PrevMI->getOpcode();
1550 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1551 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1552 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1553 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1554 if (MO.getReg() != ARM::LR)
1556 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1557 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1558 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1559 PrevMI->setDesc(TII->get(NewOpc));
1561 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1569 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1570 const TargetMachine &TM = Fn.getTarget();
1571 AFI = Fn.getInfo<ARMFunctionInfo>();
1572 TII = TM.getInstrInfo();
1573 TRI = TM.getRegisterInfo();
1574 STI = &TM.getSubtarget<ARMSubtarget>();
1575 RS = new RegScavenger();
1576 isThumb2 = AFI->isThumb2Function();
1578 bool Modified = false;
1579 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1581 MachineBasicBlock &MBB = *MFI;
1582 Modified |= LoadStoreMultipleOpti(MBB);
1583 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1584 Modified |= MergeReturnIntoLDM(MBB);
1592 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1593 /// load / stores from consecutive locations close to make it more
1594 /// likely they will be combined later.
1597 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1599 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1601 const DataLayout *TD;
1602 const TargetInstrInfo *TII;
1603 const TargetRegisterInfo *TRI;
1604 const ARMSubtarget *STI;
1605 MachineRegisterInfo *MRI;
1606 MachineFunction *MF;
1608 virtual bool runOnMachineFunction(MachineFunction &Fn);
1610 virtual const char *getPassName() const {
1611 return "ARM pre- register allocation load / store optimization pass";
1615 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1616 unsigned &NewOpc, unsigned &EvenReg,
1617 unsigned &OddReg, unsigned &BaseReg,
1619 unsigned &PredReg, ARMCC::CondCodes &Pred,
1621 bool RescheduleOps(MachineBasicBlock *MBB,
1622 SmallVector<MachineInstr*, 4> &Ops,
1623 unsigned Base, bool isLd,
1624 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1625 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1627 char ARMPreAllocLoadStoreOpt::ID = 0;
1630 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1631 TD = Fn.getTarget().getDataLayout();
1632 TII = Fn.getTarget().getInstrInfo();
1633 TRI = Fn.getTarget().getRegisterInfo();
1634 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1635 MRI = &Fn.getRegInfo();
1638 bool Modified = false;
1639 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1641 Modified |= RescheduleLoadStoreInstrs(MFI);
1646 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1647 MachineBasicBlock::iterator I,
1648 MachineBasicBlock::iterator E,
1649 SmallPtrSet<MachineInstr*, 4> &MemOps,
1650 SmallSet<unsigned, 4> &MemRegs,
1651 const TargetRegisterInfo *TRI) {
1652 // Are there stores / loads / calls between them?
1653 // FIXME: This is overly conservative. We should make use of alias information
1655 SmallSet<unsigned, 4> AddedRegPressure;
1657 if (I->isDebugValue() || MemOps.count(&*I))
1659 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1661 if (isLd && I->mayStore())
1666 // It's not safe to move the first 'str' down.
1669 // str r4, [r0, #+4]
1673 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1674 MachineOperand &MO = I->getOperand(j);
1677 unsigned Reg = MO.getReg();
1678 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1680 if (Reg != Base && !MemRegs.count(Reg))
1681 AddedRegPressure.insert(Reg);
1685 // Estimate register pressure increase due to the transformation.
1686 if (MemRegs.size() <= 4)
1687 // Ok if we are moving small number of instructions.
1689 return AddedRegPressure.size() <= MemRegs.size() * 2;
1693 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1694 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1695 MachineInstr *Op1) {
1696 assert(MI->memoperands_empty() && "expected a new machineinstr");
1697 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1698 + (Op1->memoperands_end() - Op1->memoperands_begin());
1700 MachineFunction *MF = MI->getParent()->getParent();
1701 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1702 MachineSDNode::mmo_iterator MemEnd =
1703 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1705 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1706 MI->setMemRefs(MemBegin, MemEnd);
1710 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1712 unsigned &NewOpc, unsigned &EvenReg,
1713 unsigned &OddReg, unsigned &BaseReg,
1714 int &Offset, unsigned &PredReg,
1715 ARMCC::CondCodes &Pred,
1717 // Make sure we're allowed to generate LDRD/STRD.
1718 if (!STI->hasV5TEOps())
1721 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1723 unsigned Opcode = Op0->getOpcode();
1724 if (Opcode == ARM::LDRi12)
1726 else if (Opcode == ARM::STRi12)
1728 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1729 NewOpc = ARM::t2LDRDi8;
1732 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1733 NewOpc = ARM::t2STRDi8;
1739 // Make sure the base address satisfies i64 ld / st alignment requirement.
1740 if (!Op0->hasOneMemOperand() ||
1741 !(*Op0->memoperands_begin())->getValue() ||
1742 (*Op0->memoperands_begin())->isVolatile())
1745 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1746 const Function *Func = MF->getFunction();
1747 unsigned ReqAlign = STI->hasV6Ops()
1748 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1749 : 8; // Pre-v6 need 8-byte align
1750 if (Align < ReqAlign)
1753 // Then make sure the immediate offset fits.
1754 int OffImm = getMemoryOpOffset(Op0);
1756 int Limit = (1 << 8) * Scale;
1757 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1761 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1763 AddSub = ARM_AM::sub;
1766 int Limit = (1 << 8) * Scale;
1767 if (OffImm >= Limit || (OffImm & (Scale-1)))
1769 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1771 EvenReg = Op0->getOperand(0).getReg();
1772 OddReg = Op1->getOperand(0).getReg();
1773 if (EvenReg == OddReg)
1775 BaseReg = Op0->getOperand(1).getReg();
1776 Pred = getInstrPredicate(Op0, PredReg);
1777 dl = Op0->getDebugLoc();
1782 struct OffsetCompare {
1783 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1784 int LOffset = getMemoryOpOffset(LHS);
1785 int ROffset = getMemoryOpOffset(RHS);
1786 assert(LHS == RHS || LOffset != ROffset);
1787 return LOffset > ROffset;
1792 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1793 SmallVector<MachineInstr*, 4> &Ops,
1794 unsigned Base, bool isLd,
1795 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1796 bool RetVal = false;
1798 // Sort by offset (in reverse order).
1799 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1801 // The loads / stores of the same base are in order. Scan them from first to
1802 // last and check for the following:
1803 // 1. Any def of base.
1805 while (Ops.size() > 1) {
1806 unsigned FirstLoc = ~0U;
1807 unsigned LastLoc = 0;
1808 MachineInstr *FirstOp = 0;
1809 MachineInstr *LastOp = 0;
1811 unsigned LastOpcode = 0;
1812 unsigned LastBytes = 0;
1813 unsigned NumMove = 0;
1814 for (int i = Ops.size() - 1; i >= 0; --i) {
1815 MachineInstr *Op = Ops[i];
1816 unsigned Loc = MI2LocMap[Op];
1817 if (Loc <= FirstLoc) {
1821 if (Loc >= LastLoc) {
1827 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1828 if (LastOpcode && LSMOpcode != LastOpcode)
1831 int Offset = getMemoryOpOffset(Op);
1832 unsigned Bytes = getLSMultipleTransferSize(Op);
1834 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1837 LastOffset = Offset;
1839 LastOpcode = LSMOpcode;
1840 if (++NumMove == 8) // FIXME: Tune this limit.
1847 SmallPtrSet<MachineInstr*, 4> MemOps;
1848 SmallSet<unsigned, 4> MemRegs;
1849 for (int i = NumMove-1; i >= 0; --i) {
1850 MemOps.insert(Ops[i]);
1851 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1854 // Be conservative, if the instructions are too far apart, don't
1855 // move them. We want to limit the increase of register pressure.
1856 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1858 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1859 MemOps, MemRegs, TRI);
1861 for (unsigned i = 0; i != NumMove; ++i)
1864 // This is the new location for the loads / stores.
1865 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1866 while (InsertPos != MBB->end()
1867 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1870 // If we are moving a pair of loads / stores, see if it makes sense
1871 // to try to allocate a pair of registers that can form register pairs.
1872 MachineInstr *Op0 = Ops.back();
1873 MachineInstr *Op1 = Ops[Ops.size()-2];
1874 unsigned EvenReg = 0, OddReg = 0;
1875 unsigned BaseReg = 0, PredReg = 0;
1876 ARMCC::CondCodes Pred = ARMCC::AL;
1878 unsigned NewOpc = 0;
1881 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1882 EvenReg, OddReg, BaseReg,
1883 Offset, PredReg, Pred, isT2)) {
1887 const MCInstrDesc &MCID = TII->get(NewOpc);
1888 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
1889 MRI->constrainRegClass(EvenReg, TRC);
1890 MRI->constrainRegClass(OddReg, TRC);
1892 // Form the pair instruction.
1894 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1895 .addReg(EvenReg, RegState::Define)
1896 .addReg(OddReg, RegState::Define)
1898 // FIXME: We're converting from LDRi12 to an insn that still
1899 // uses addrmode2, so we need an explicit offset reg. It should
1900 // always by reg0 since we're transforming LDRi12s.
1903 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1904 concatenateMemOperands(MIB, Op0, Op1);
1905 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1908 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1912 // FIXME: We're converting from LDRi12 to an insn that still
1913 // uses addrmode2, so we need an explicit offset reg. It should
1914 // always by reg0 since we're transforming STRi12s.
1917 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1918 concatenateMemOperands(MIB, Op0, Op1);
1919 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1925 // Add register allocation hints to form register pairs.
1926 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1927 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1929 for (unsigned i = 0; i != NumMove; ++i) {
1930 MachineInstr *Op = Ops.back();
1932 MBB->splice(InsertPos, MBB, Op);
1936 NumLdStMoved += NumMove;
1946 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1947 bool RetVal = false;
1949 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1950 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1951 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1952 SmallVector<unsigned, 4> LdBases;
1953 SmallVector<unsigned, 4> StBases;
1956 MachineBasicBlock::iterator MBBI = MBB->begin();
1957 MachineBasicBlock::iterator E = MBB->end();
1959 for (; MBBI != E; ++MBBI) {
1960 MachineInstr *MI = MBBI;
1961 if (MI->isCall() || MI->isTerminator()) {
1962 // Stop at barriers.
1967 if (!MI->isDebugValue())
1968 MI2LocMap[MI] = ++Loc;
1970 if (!isMemoryOp(MI))
1972 unsigned PredReg = 0;
1973 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
1976 int Opc = MI->getOpcode();
1977 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1978 unsigned Base = MI->getOperand(1).getReg();
1979 int Offset = getMemoryOpOffset(MI);
1981 bool StopHere = false;
1983 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1984 Base2LdsMap.find(Base);
1985 if (BI != Base2LdsMap.end()) {
1986 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1987 if (Offset == getMemoryOpOffset(BI->second[i])) {
1993 BI->second.push_back(MI);
1995 SmallVector<MachineInstr*, 4> MIs;
1997 Base2LdsMap[Base] = MIs;
1998 LdBases.push_back(Base);
2001 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2002 Base2StsMap.find(Base);
2003 if (BI != Base2StsMap.end()) {
2004 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2005 if (Offset == getMemoryOpOffset(BI->second[i])) {
2011 BI->second.push_back(MI);
2013 SmallVector<MachineInstr*, 4> MIs;
2015 Base2StsMap[Base] = MIs;
2016 StBases.push_back(Base);
2021 // Found a duplicate (a base+offset combination that's seen earlier).
2028 // Re-schedule loads.
2029 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2030 unsigned Base = LdBases[i];
2031 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
2033 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2036 // Re-schedule stores.
2037 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2038 unsigned Base = StBases[i];
2039 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
2041 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2045 Base2LdsMap.clear();
2046 Base2StsMap.clear();
2056 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
2057 /// optimization pass.
2058 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2060 return new ARMPreAllocLoadStoreOpt();
2061 return new ARMLoadStoreOpt();