1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/CodeGen/MachineBasicBlock.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/RegisterScavenging.h"
28 #include "llvm/Target/TargetData.h"
29 #include "llvm/Target/TargetInstrInfo.h"
30 #include "llvm/Target/TargetMachine.h"
31 #include "llvm/Target/TargetRegisterInfo.h"
32 #include "llvm/Support/Compiler.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
45 STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
78 MachineBasicBlock::iterator MBBI;
80 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
81 : Offset(o), Position(p), MBBI(i), Merged(false) {};
83 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
84 typedef MemOpQueue::iterator MemOpQueueIter;
86 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
87 int Offset, unsigned Base, bool BaseKill, int Opcode,
88 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
89 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
90 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
91 int Opcode, unsigned Size,
92 ARMCC::CondCodes Pred, unsigned PredReg,
93 unsigned Scratch, MemOpQueue &MemOps,
94 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
96 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
97 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
98 MachineBasicBlock::iterator &MBBI);
99 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
100 MachineBasicBlock::iterator MBBI,
101 const TargetInstrInfo *TII,
103 MachineBasicBlock::iterator &I);
104 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
105 MachineBasicBlock::iterator MBBI,
107 MachineBasicBlock::iterator &I);
108 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
109 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
111 char ARMLoadStoreOpt::ID = 0;
114 static int getLoadStoreMultipleOpcode(int Opcode) {
142 default: llvm_unreachable("Unhandled opcode!");
147 static bool isT2i32Load(unsigned Opc) {
148 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
151 static bool isi32Load(unsigned Opc) {
152 return Opc == ARM::LDR || isT2i32Load(Opc);
155 static bool isT2i32Store(unsigned Opc) {
156 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
159 static bool isi32Store(unsigned Opc) {
160 return Opc == ARM::STR || isT2i32Store(Opc);
163 /// MergeOps - Create and insert a LDM or STM with Base as base register and
164 /// registers in Regs as the register operands that would be loaded / stored.
165 /// It returns true if the transformation is done.
167 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
168 MachineBasicBlock::iterator MBBI,
169 int Offset, unsigned Base, bool BaseKill,
170 int Opcode, ARMCC::CondCodes Pred,
171 unsigned PredReg, unsigned Scratch, DebugLoc dl,
172 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
173 // Only a single register to load / store. Don't bother.
174 unsigned NumRegs = Regs.size();
178 ARM_AM::AMSubMode Mode = ARM_AM::ia;
179 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
180 if (isAM4 && Offset == 4) {
182 // Thumb2 does not support ldmib / stmib.
185 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
187 // Thumb2 does not support ldmda / stmda.
190 } else if (isAM4 && Offset == -4 * (int)NumRegs) {
192 } else if (Offset != 0) {
193 // If starting offset isn't zero, insert a MI to materialize a new base.
194 // But only do so if it is cost effective, i.e. merging more than two
200 if (isi32Load(Opcode))
201 // If it is a load, then just use one of the destination register to
202 // use as the new base.
203 NewBase = Regs[NumRegs-1].first;
205 // Use the scratch register to use as a new base.
210 int BaseOpc = !isThumb2
212 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
216 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
219 int ImmedOffset = isThumb2
220 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
221 if (ImmedOffset == -1)
222 // FIXME: Try t2ADDri12 or t2SUBri12?
223 return false; // Probably not worth it then.
225 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
226 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
227 .addImm(Pred).addReg(PredReg).addReg(0);
229 BaseKill = true; // New base is always killed right its use.
232 bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
233 bool isDef = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
234 Opcode = getLoadStoreMultipleOpcode(Opcode);
235 MachineInstrBuilder MIB = (isAM4)
236 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
237 .addReg(Base, getKillRegState(BaseKill))
238 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
239 : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
240 .addReg(Base, getKillRegState(BaseKill))
241 .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
242 .addImm(Pred).addReg(PredReg);
243 for (unsigned i = 0; i != NumRegs; ++i)
244 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
245 | getKillRegState(Regs[i].second));
250 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
251 /// load / store multiple instructions.
253 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
254 unsigned Base, int Opcode, unsigned Size,
255 ARMCC::CondCodes Pred, unsigned PredReg,
256 unsigned Scratch, MemOpQueue &MemOps,
257 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
258 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
259 int Offset = MemOps[SIndex].Offset;
260 int SOffset = Offset;
261 unsigned Pos = MemOps[SIndex].Position;
262 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
263 DebugLoc dl = Loc->getDebugLoc();
264 unsigned PReg = Loc->getOperand(0).getReg();
265 unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
266 bool isKill = Loc->getOperand(0).isKill();
268 SmallVector<std::pair<unsigned,bool>, 8> Regs;
269 Regs.push_back(std::make_pair(PReg, isKill));
270 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
271 int NewOffset = MemOps[i].Offset;
272 unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
273 unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
274 isKill = MemOps[i].MBBI->getOperand(0).isKill();
275 // AM4 - register numbers in ascending order.
276 // AM5 - consecutive register numbers in ascending order.
277 if (NewOffset == Offset + (int)Size &&
278 ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
280 Regs.push_back(std::make_pair(Reg, isKill));
283 // Can't merge this in. Try merge the earlier ones first.
284 if (MergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg,
285 Scratch, dl, Regs)) {
286 Merges.push_back(prior(Loc));
287 for (unsigned j = SIndex; j < i; ++j) {
288 MBB.erase(MemOps[j].MBBI);
289 MemOps[j].Merged = true;
292 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
297 if (MemOps[i].Position > Pos) {
298 Pos = MemOps[i].Position;
299 Loc = MemOps[i].MBBI;
303 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
304 if (MergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg,
305 Scratch, dl, Regs)) {
306 Merges.push_back(prior(Loc));
307 for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
308 MBB.erase(MemOps[i].MBBI);
309 MemOps[i].Merged = true;
316 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
317 unsigned Bytes, unsigned Limit,
318 ARMCC::CondCodes Pred, unsigned PredReg){
319 unsigned MyPredReg = 0;
322 if (MI->getOpcode() != ARM::t2SUBri &&
323 MI->getOpcode() != ARM::t2SUBrSPi &&
324 MI->getOpcode() != ARM::t2SUBrSPi12 &&
325 MI->getOpcode() != ARM::tSUBspi &&
326 MI->getOpcode() != ARM::SUBri)
329 // Make sure the offset fits in 8 bits.
330 if (Bytes <= 0 || (Limit && Bytes >= Limit))
333 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
334 return (MI->getOperand(0).getReg() == Base &&
335 MI->getOperand(1).getReg() == Base &&
336 (MI->getOperand(2).getImm()*Scale) == Bytes &&
337 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
338 MyPredReg == PredReg);
341 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
342 unsigned Bytes, unsigned Limit,
343 ARMCC::CondCodes Pred, unsigned PredReg){
344 unsigned MyPredReg = 0;
347 if (MI->getOpcode() != ARM::t2ADDri &&
348 MI->getOpcode() != ARM::t2ADDrSPi &&
349 MI->getOpcode() != ARM::t2ADDrSPi12 &&
350 MI->getOpcode() != ARM::tADDspi &&
351 MI->getOpcode() != ARM::ADDri)
354 if (Bytes <= 0 || (Limit && Bytes >= Limit))
355 // Make sure the offset fits in 8 bits.
358 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
359 return (MI->getOperand(0).getReg() == Base &&
360 MI->getOperand(1).getReg() == Base &&
361 (MI->getOperand(2).getImm()*Scale) == Bytes &&
362 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
363 MyPredReg == PredReg);
366 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
367 switch (MI->getOpcode()) {
385 return (MI->getNumOperands() - 4) * 4;
390 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
394 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
395 /// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
397 /// stmia rn, <ra, rb, rc>
398 /// rn := rn + 4 * 3;
400 /// stmia rn!, <ra, rb, rc>
402 /// rn := rn - 4 * 3;
403 /// ldmia rn, <ra, rb, rc>
405 /// ldmdb rn!, <ra, rb, rc>
406 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
407 MachineBasicBlock::iterator MBBI,
409 MachineBasicBlock::iterator &I) {
410 MachineInstr *MI = MBBI;
411 unsigned Base = MI->getOperand(0).getReg();
412 unsigned Bytes = getLSMultipleTransferSize(MI);
413 unsigned PredReg = 0;
414 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
415 int Opcode = MI->getOpcode();
416 bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
417 Opcode == ARM::STM || Opcode == ARM::t2STM;
420 if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
423 // Can't use the updating AM4 sub-mode if the base register is also a dest
424 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
425 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
426 if (MI->getOperand(i).getReg() == Base)
430 ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
431 if (MBBI != MBB.begin()) {
432 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
433 if (Mode == ARM_AM::ia &&
434 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
435 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
438 } else if (Mode == ARM_AM::ib &&
439 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
440 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
446 if (MBBI != MBB.end()) {
447 MachineBasicBlock::iterator NextMBBI = next(MBBI);
448 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
449 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
450 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
457 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
458 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
459 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
469 // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
470 if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
473 ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
474 unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
475 if (MBBI != MBB.begin()) {
476 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
477 if (Mode == ARM_AM::ia &&
478 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
479 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
485 if (MBBI != MBB.end()) {
486 MachineBasicBlock::iterator NextMBBI = next(MBBI);
487 if (Mode == ARM_AM::ia &&
488 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
489 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
503 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
505 case ARM::LDR: return ARM::LDR_PRE;
506 case ARM::STR: return ARM::STR_PRE;
507 case ARM::FLDS: return ARM::FLDMS;
508 case ARM::FLDD: return ARM::FLDMD;
509 case ARM::FSTS: return ARM::FSTMS;
510 case ARM::FSTD: return ARM::FSTMD;
513 return ARM::t2LDR_PRE;
516 return ARM::t2STR_PRE;
517 default: llvm_unreachable("Unhandled opcode!");
522 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
524 case ARM::LDR: return ARM::LDR_POST;
525 case ARM::STR: return ARM::STR_POST;
526 case ARM::FLDS: return ARM::FLDMS;
527 case ARM::FLDD: return ARM::FLDMD;
528 case ARM::FSTS: return ARM::FSTMS;
529 case ARM::FSTD: return ARM::FSTMD;
532 return ARM::t2LDR_POST;
535 return ARM::t2STR_POST;
536 default: llvm_unreachable("Unhandled opcode!");
541 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
542 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
543 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
544 MachineBasicBlock::iterator MBBI,
545 const TargetInstrInfo *TII,
547 MachineBasicBlock::iterator &I) {
548 MachineInstr *MI = MBBI;
549 unsigned Base = MI->getOperand(1).getReg();
550 bool BaseKill = MI->getOperand(1).isKill();
551 unsigned Bytes = getLSMultipleTransferSize(MI);
552 int Opcode = MI->getOpcode();
553 DebugLoc dl = MI->getDebugLoc();
554 bool isAM5 = Opcode == ARM::FLDD || Opcode == ARM::FLDS ||
555 Opcode == ARM::FSTD || Opcode == ARM::FSTS;
556 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
557 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
559 else if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
561 else if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
562 if (MI->getOperand(2).getImm() != 0)
565 bool isLd = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
566 // Can't do the merge if the destination register is the same as the would-be
567 // writeback register.
568 if (isLd && MI->getOperand(0).getReg() == Base)
571 unsigned PredReg = 0;
572 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
573 bool DoMerge = false;
574 ARM_AM::AddrOpc AddSub = ARM_AM::add;
576 // AM2 - 12 bits, thumb2 - 8 bits.
577 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
578 if (MBBI != MBB.begin()) {
579 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
580 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
582 AddSub = ARM_AM::sub;
583 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
585 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
587 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
593 if (!DoMerge && MBBI != MBB.end()) {
594 MachineBasicBlock::iterator NextMBBI = next(MBBI);
596 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
598 AddSub = ARM_AM::sub;
599 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
600 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
602 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
616 bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
619 Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
621 : ARM_AM::ia, true, (isDPR ? 2 : 1));
623 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
625 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
629 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
630 .addReg(Base, getKillRegState(BaseKill))
631 .addImm(Offset).addImm(Pred).addReg(PredReg)
632 .addReg(MI->getOperand(0).getReg(), RegState::Define);
634 // LDR_PRE, LDR_POST,
635 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
636 .addReg(Base, RegState::Define)
637 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
639 // t2LDR_PRE, t2LDR_POST
640 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
641 .addReg(Base, RegState::Define)
642 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
644 MachineOperand &MO = MI->getOperand(0);
647 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
648 .addImm(Pred).addReg(PredReg)
649 .addReg(MO.getReg(), getKillRegState(MO.isKill()));
652 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
653 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
654 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
656 // t2STR_PRE, t2STR_POST
657 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
658 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
659 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
666 /// isMemoryOp - Returns true if instruction is a memory operations (that this
667 /// pass is capable of operating on).
668 static bool isMemoryOp(const MachineInstr *MI) {
669 int Opcode = MI->getOpcode();
674 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
677 return MI->getOperand(1).isReg();
680 return MI->getOperand(1).isReg();
690 /// AdvanceRS - Advance register scavenger to just before the earliest memory
691 /// op that is being merged.
692 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
693 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
694 unsigned Position = MemOps[0].Position;
695 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
696 if (MemOps[i].Position < Position) {
697 Position = MemOps[i].Position;
698 Loc = MemOps[i].MBBI;
702 if (Loc != MBB.begin())
703 RS->forward(prior(Loc));
706 static int getMemoryOpOffset(const MachineInstr *MI) {
707 int Opcode = MI->getOpcode();
708 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
709 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
710 unsigned NumOperands = MI->getDesc().getNumOperands();
711 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
713 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
714 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
715 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
719 ? ARM_AM::getAM2Offset(OffField)
720 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
721 : ARM_AM::getAM5Offset(OffField) * 4);
723 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
726 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
729 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
735 static void InsertLDR_STR(MachineBasicBlock &MBB,
736 MachineBasicBlock::iterator &MBBI,
737 int OffImm, bool isDef,
738 DebugLoc dl, unsigned NewOpc,
739 unsigned Reg, bool RegDeadKill,
740 unsigned BaseReg, bool BaseKill,
741 unsigned OffReg, bool OffKill,
742 ARMCC::CondCodes Pred, unsigned PredReg,
743 const TargetInstrInfo *TII) {
746 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
748 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
750 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
751 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
752 .addReg(BaseReg, getKillRegState(BaseKill))
753 .addReg(OffReg, getKillRegState(OffKill))
755 .addImm(Pred).addReg(PredReg);
757 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
758 .addReg(Reg, getKillRegState(RegDeadKill))
759 .addReg(BaseReg, getKillRegState(BaseKill))
760 .addReg(OffReg, getKillRegState(OffKill))
762 .addImm(Pred).addReg(PredReg);
765 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
766 MachineBasicBlock::iterator &MBBI) {
767 MachineInstr *MI = &*MBBI;
768 unsigned Opcode = MI->getOpcode();
769 if (Opcode == ARM::LDRD || Opcode == ARM::STRD) {
770 unsigned EvenReg = MI->getOperand(0).getReg();
771 unsigned OddReg = MI->getOperand(1).getReg();
772 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
773 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
774 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
777 bool isLd = Opcode == ARM::LDRD;
778 bool EvenDeadKill = isLd ?
779 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
780 bool OddDeadKill = isLd ?
781 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
782 const MachineOperand &BaseOp = MI->getOperand(2);
783 unsigned BaseReg = BaseOp.getReg();
784 bool BaseKill = BaseOp.isKill();
785 const MachineOperand &OffOp = MI->getOperand(3);
786 unsigned OffReg = OffOp.getReg();
787 bool OffKill = OffOp.isKill();
788 int OffImm = getMemoryOpOffset(MI);
789 unsigned PredReg = 0;
790 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
792 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
793 // Ascending register numbers and no offset. It's safe to change it to a
795 unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDM : ARM::STM;
797 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
798 .addReg(BaseReg, getKillRegState(BaseKill))
799 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
800 .addImm(Pred).addReg(PredReg)
801 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
802 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
805 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
806 .addReg(BaseReg, getKillRegState(BaseKill))
807 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
808 .addImm(Pred).addReg(PredReg)
809 .addReg(EvenReg, getKillRegState(EvenDeadKill))
810 .addReg(OddReg, getKillRegState(OddDeadKill));
814 // Split into two instructions.
815 unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDR : ARM::STR;
816 DebugLoc dl = MBBI->getDebugLoc();
817 // If this is a load and base register is killed, it may have been
818 // re-defed by the load, make sure the first load does not clobber it.
820 (BaseKill || OffKill) &&
821 (TRI->regsOverlap(EvenReg, BaseReg) ||
822 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
823 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
824 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
825 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill,
826 BaseReg, false, OffReg, false, Pred, PredReg, TII);
827 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill,
828 BaseReg, BaseKill, OffReg, OffKill, Pred, PredReg, TII);
830 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
831 EvenReg, EvenDeadKill, BaseReg, false, OffReg, false,
833 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
834 OddReg, OddDeadKill, BaseReg, BaseKill, OffReg, OffKill,
849 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
850 /// ops of the same base and incrementing offset into LDM / STM ops.
851 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
852 unsigned NumMerges = 0;
853 unsigned NumMemOps = 0;
855 unsigned CurrBase = 0;
857 unsigned CurrSize = 0;
858 ARMCC::CondCodes CurrPred = ARMCC::AL;
859 unsigned CurrPredReg = 0;
860 unsigned Position = 0;
861 SmallVector<MachineBasicBlock::iterator,4> Merges;
863 RS->enterBasicBlock(&MBB);
864 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
866 if (FixInvalidRegPairOp(MBB, MBBI))
869 bool Advance = false;
870 bool TryMerge = false;
871 bool Clobber = false;
873 bool isMemOp = isMemoryOp(MBBI);
875 int Opcode = MBBI->getOpcode();
876 unsigned Size = getLSMultipleTransferSize(MBBI);
877 unsigned Base = MBBI->getOperand(1).getReg();
878 unsigned PredReg = 0;
879 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
880 int Offset = getMemoryOpOffset(MBBI);
883 // r5 := ldr [r5, #4]
884 // r6 := ldr [r5, #8]
886 // The second ldr has effectively broken the chain even though it
887 // looks like the later ldr(s) use the same base register. Try to
888 // merge the ldr's so far, including this one. But don't try to
889 // combine the following ldr(s).
890 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
891 if (CurrBase == 0 && !Clobber) {
892 // Start of a new chain.
897 CurrPredReg = PredReg;
898 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
907 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
908 // No need to match PredReg.
909 // Continue adding to the queue.
910 if (Offset > MemOps.back().Offset) {
911 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
915 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
917 if (Offset < I->Offset) {
918 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
922 } else if (Offset == I->Offset) {
923 // Collision! This can't be merged!
940 // Try to find a free register to use as a new base in case it's needed.
941 // First advance to the instruction just before the start of the chain.
942 AdvanceRS(MBB, MemOps);
943 // Find a scratch register. Make sure it's a call clobbered register or
944 // a spilled callee-saved register.
945 unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, true);
947 Scratch = RS->FindUnusedReg(&ARM::GPRRegClass,
948 AFI->getSpilledCSRegisters());
949 // Process the load / store instructions.
950 RS->forward(prior(MBBI));
954 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
955 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
957 // Try folding preceeding/trailing base inc/dec into the generated
959 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
960 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
962 NumMerges += Merges.size();
964 // Try folding preceeding/trailing base inc/dec into those load/store
965 // that were not merged to form LDM/STM ops.
966 for (unsigned i = 0; i != NumMemOps; ++i)
967 if (!MemOps[i].Merged)
968 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
971 // RS may be pointing to an instruction that's deleted.
972 RS->skipTo(prior(MBBI));
973 } else if (NumMemOps == 1) {
974 // Try folding preceeding/trailing base inc/dec into the single
976 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
978 RS->forward(prior(MBBI));
985 CurrPred = ARMCC::AL;
992 // If iterator hasn't been advanced and this is not a memory op, skip it.
993 // It can't start a new chain anyway.
994 if (!Advance && !isMemOp && MBBI != E) {
1000 return NumMerges > 0;
1004 struct OffsetCompare {
1005 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1006 int LOffset = getMemoryOpOffset(LHS);
1007 int ROffset = getMemoryOpOffset(RHS);
1008 assert(LHS == RHS || LOffset != ROffset);
1009 return LOffset > ROffset;
1014 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
1015 /// (bx lr) into the preceeding stack restore so it directly restore the value
1017 /// ldmfd sp!, {r7, lr}
1020 /// ldmfd sp!, {r7, pc}
1021 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1022 if (MBB.empty()) return false;
1024 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1025 if (MBBI != MBB.begin() &&
1026 (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
1027 MachineInstr *PrevMI = prior(MBBI);
1028 if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) {
1029 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1030 if (MO.getReg() != ARM::LR)
1032 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1033 PrevMI->setDesc(TII->get(NewOpc));
1042 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1043 const TargetMachine &TM = Fn.getTarget();
1044 AFI = Fn.getInfo<ARMFunctionInfo>();
1045 TII = TM.getInstrInfo();
1046 TRI = TM.getRegisterInfo();
1047 RS = new RegScavenger();
1048 isThumb2 = AFI->isThumb2Function();
1050 bool Modified = false;
1051 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1053 MachineBasicBlock &MBB = *MFI;
1054 Modified |= LoadStoreMultipleOpti(MBB);
1055 Modified |= MergeReturnIntoLDM(MBB);
1063 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1064 /// load / stores from consecutive locations close to make it more
1065 /// likely they will be combined later.
1068 struct VISIBILITY_HIDDEN ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1070 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
1072 const TargetData *TD;
1073 const TargetInstrInfo *TII;
1074 const TargetRegisterInfo *TRI;
1075 const ARMSubtarget *STI;
1076 MachineRegisterInfo *MRI;
1078 virtual bool runOnMachineFunction(MachineFunction &Fn);
1080 virtual const char *getPassName() const {
1081 return "ARM pre- register allocation load / store optimization pass";
1085 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1086 unsigned &NewOpc, unsigned &EvenReg,
1087 unsigned &OddReg, unsigned &BaseReg,
1088 unsigned &OffReg, unsigned &Offset,
1089 unsigned &PredReg, ARMCC::CondCodes &Pred);
1090 bool RescheduleOps(MachineBasicBlock *MBB,
1091 SmallVector<MachineInstr*, 4> &Ops,
1092 unsigned Base, bool isLd,
1093 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1094 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1096 char ARMPreAllocLoadStoreOpt::ID = 0;
1099 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1100 TD = Fn.getTarget().getTargetData();
1101 TII = Fn.getTarget().getInstrInfo();
1102 TRI = Fn.getTarget().getRegisterInfo();
1103 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1104 MRI = &Fn.getRegInfo();
1106 bool Modified = false;
1107 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1109 Modified |= RescheduleLoadStoreInstrs(MFI);
1114 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1115 MachineBasicBlock::iterator I,
1116 MachineBasicBlock::iterator E,
1117 SmallPtrSet<MachineInstr*, 4> &MemOps,
1118 SmallSet<unsigned, 4> &MemRegs,
1119 const TargetRegisterInfo *TRI) {
1120 // Are there stores / loads / calls between them?
1121 // FIXME: This is overly conservative. We should make use of alias information
1123 SmallSet<unsigned, 4> AddedRegPressure;
1125 if (MemOps.count(&*I))
1127 const TargetInstrDesc &TID = I->getDesc();
1128 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1130 if (isLd && TID.mayStore())
1135 // It's not safe to move the first 'str' down.
1138 // str r4, [r0, #+4]
1142 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1143 MachineOperand &MO = I->getOperand(j);
1146 unsigned Reg = MO.getReg();
1147 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1149 if (Reg != Base && !MemRegs.count(Reg))
1150 AddedRegPressure.insert(Reg);
1154 // Estimate register pressure increase due to the transformation.
1155 if (MemRegs.size() <= 4)
1156 // Ok if we are moving small number of instructions.
1158 return AddedRegPressure.size() <= MemRegs.size() * 2;
1162 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1164 unsigned &NewOpc, unsigned &EvenReg,
1165 unsigned &OddReg, unsigned &BaseReg,
1166 unsigned &OffReg, unsigned &Offset,
1168 ARMCC::CondCodes &Pred) {
1169 // FIXME: FLDS / FSTS -> FLDD / FSTD
1170 unsigned Opcode = Op0->getOpcode();
1171 if (Opcode == ARM::LDR)
1173 else if (Opcode == ARM::STR)
1178 // Must sure the base address satisfies i64 ld / st alignment requirement.
1179 if (!Op0->hasOneMemOperand() ||
1180 !Op0->memoperands_begin()->getValue() ||
1181 Op0->memoperands_begin()->isVolatile())
1184 unsigned Align = Op0->memoperands_begin()->getAlignment();
1185 unsigned ReqAlign = STI->hasV6Ops()
1186 ? TD->getPrefTypeAlignment(Type::Int64Ty) : 8; // Pre-v6 need 8-byte align
1187 if (Align < ReqAlign)
1190 // Then make sure the immediate offset fits.
1191 int OffImm = getMemoryOpOffset(Op0);
1192 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1194 AddSub = ARM_AM::sub;
1197 if (OffImm >= 256) // 8 bits
1199 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1201 EvenReg = Op0->getOperand(0).getReg();
1202 OddReg = Op1->getOperand(0).getReg();
1203 if (EvenReg == OddReg)
1205 BaseReg = Op0->getOperand(1).getReg();
1206 OffReg = Op0->getOperand(2).getReg();
1207 Pred = llvm::getInstrPredicate(Op0, PredReg);
1208 dl = Op0->getDebugLoc();
1212 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1213 SmallVector<MachineInstr*, 4> &Ops,
1214 unsigned Base, bool isLd,
1215 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1216 bool RetVal = false;
1218 // Sort by offset (in reverse order).
1219 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1221 // The loads / stores of the same base are in order. Scan them from first to
1222 // last and check for the followins:
1223 // 1. Any def of base.
1225 while (Ops.size() > 1) {
1226 unsigned FirstLoc = ~0U;
1227 unsigned LastLoc = 0;
1228 MachineInstr *FirstOp = 0;
1229 MachineInstr *LastOp = 0;
1231 unsigned LastOpcode = 0;
1232 unsigned LastBytes = 0;
1233 unsigned NumMove = 0;
1234 for (int i = Ops.size() - 1; i >= 0; --i) {
1235 MachineInstr *Op = Ops[i];
1236 unsigned Loc = MI2LocMap[Op];
1237 if (Loc <= FirstLoc) {
1241 if (Loc >= LastLoc) {
1246 unsigned Opcode = Op->getOpcode();
1247 if (LastOpcode && Opcode != LastOpcode)
1250 int Offset = getMemoryOpOffset(Op);
1251 unsigned Bytes = getLSMultipleTransferSize(Op);
1253 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1256 LastOffset = Offset;
1258 LastOpcode = Opcode;
1259 if (++NumMove == 8) // FIXME: Tune
1266 SmallPtrSet<MachineInstr*, 4> MemOps;
1267 SmallSet<unsigned, 4> MemRegs;
1268 for (int i = NumMove-1; i >= 0; --i) {
1269 MemOps.insert(Ops[i]);
1270 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1273 // Be conservative, if the instructions are too far apart, don't
1274 // move them. We want to limit the increase of register pressure.
1275 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1277 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1278 MemOps, MemRegs, TRI);
1280 for (unsigned i = 0; i != NumMove; ++i)
1283 // This is the new location for the loads / stores.
1284 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1285 while (InsertPos != MBB->end() && MemOps.count(InsertPos))
1288 // If we are moving a pair of loads / stores, see if it makes sense
1289 // to try to allocate a pair of registers that can form register pairs.
1290 MachineInstr *Op0 = Ops.back();
1291 MachineInstr *Op1 = Ops[Ops.size()-2];
1292 unsigned EvenReg = 0, OddReg = 0;
1293 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1294 ARMCC::CondCodes Pred = ARMCC::AL;
1295 unsigned NewOpc = 0;
1296 unsigned Offset = 0;
1298 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1299 EvenReg, OddReg, BaseReg, OffReg,
1300 Offset, PredReg, Pred)) {
1304 // Form the pair instruction.
1306 BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc))
1307 .addReg(EvenReg, RegState::Define)
1308 .addReg(OddReg, RegState::Define)
1309 .addReg(BaseReg).addReg(0).addImm(Offset)
1310 .addImm(Pred).addReg(PredReg);
1313 BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc))
1316 .addReg(BaseReg).addReg(0).addImm(Offset)
1317 .addImm(Pred).addReg(PredReg);
1323 // Add register allocation hints to form register pairs.
1324 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1325 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1327 for (unsigned i = 0; i != NumMove; ++i) {
1328 MachineInstr *Op = Ops.back();
1330 MBB->splice(InsertPos, MBB, Op);
1334 NumLdStMoved += NumMove;
1344 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1345 bool RetVal = false;
1347 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1348 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1349 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1350 SmallVector<unsigned, 4> LdBases;
1351 SmallVector<unsigned, 4> StBases;
1354 MachineBasicBlock::iterator MBBI = MBB->begin();
1355 MachineBasicBlock::iterator E = MBB->end();
1357 for (; MBBI != E; ++MBBI) {
1358 MachineInstr *MI = MBBI;
1359 const TargetInstrDesc &TID = MI->getDesc();
1360 if (TID.isCall() || TID.isTerminator()) {
1361 // Stop at barriers.
1366 MI2LocMap[MI] = Loc++;
1367 if (!isMemoryOp(MI))
1369 unsigned PredReg = 0;
1370 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1373 int Opcode = MI->getOpcode();
1374 bool isLd = Opcode == ARM::LDR ||
1375 Opcode == ARM::FLDS || Opcode == ARM::FLDD;
1376 unsigned Base = MI->getOperand(1).getReg();
1377 int Offset = getMemoryOpOffset(MI);
1379 bool StopHere = false;
1381 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1382 Base2LdsMap.find(Base);
1383 if (BI != Base2LdsMap.end()) {
1384 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1385 if (Offset == getMemoryOpOffset(BI->second[i])) {
1391 BI->second.push_back(MI);
1393 SmallVector<MachineInstr*, 4> MIs;
1395 Base2LdsMap[Base] = MIs;
1396 LdBases.push_back(Base);
1399 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1400 Base2StsMap.find(Base);
1401 if (BI != Base2StsMap.end()) {
1402 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1403 if (Offset == getMemoryOpOffset(BI->second[i])) {
1409 BI->second.push_back(MI);
1411 SmallVector<MachineInstr*, 4> MIs;
1413 Base2StsMap[Base] = MIs;
1414 StBases.push_back(Base);
1419 // Found a duplicate (a base+offset combination that's seen earlier).
1426 // Re-schedule loads.
1427 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1428 unsigned Base = LdBases[i];
1429 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1431 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1434 // Re-schedule stores.
1435 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1436 unsigned Base = StBases[i];
1437 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1439 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1443 Base2LdsMap.clear();
1444 Base2StsMap.clear();
1454 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1455 /// optimization pass.
1456 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1458 return new ARMPreAllocLoadStoreOpt();
1459 return new ARMLoadStoreOpt();