1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
78 MachineBasicBlock::iterator MBBI;
80 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
81 : Offset(o), Position(p), MBBI(i), Merged(false) {};
83 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
84 typedef MemOpQueue::iterator MemOpQueueIter;
86 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
87 int Offset, unsigned Base, bool BaseKill, int Opcode,
88 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
89 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
90 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
91 int Opcode, unsigned Size,
92 ARMCC::CondCodes Pred, unsigned PredReg,
93 unsigned Scratch, MemOpQueue &MemOps,
94 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
96 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
97 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
98 MachineBasicBlock::iterator &MBBI);
99 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
100 MachineBasicBlock::iterator MBBI,
101 const TargetInstrInfo *TII,
103 MachineBasicBlock::iterator &I);
104 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
105 MachineBasicBlock::iterator MBBI,
107 MachineBasicBlock::iterator &I);
108 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
109 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
111 char ARMLoadStoreOpt::ID = 0;
114 static int getLoadStoreMultipleOpcode(int Opcode) {
142 default: llvm_unreachable("Unhandled opcode!");
147 static bool isT2i32Load(unsigned Opc) {
148 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
151 static bool isi32Load(unsigned Opc) {
152 return Opc == ARM::LDR || isT2i32Load(Opc);
155 static bool isT2i32Store(unsigned Opc) {
156 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
159 static bool isi32Store(unsigned Opc) {
160 return Opc == ARM::STR || isT2i32Store(Opc);
163 /// MergeOps - Create and insert a LDM or STM with Base as base register and
164 /// registers in Regs as the register operands that would be loaded / stored.
165 /// It returns true if the transformation is done.
167 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
168 MachineBasicBlock::iterator MBBI,
169 int Offset, unsigned Base, bool BaseKill,
170 int Opcode, ARMCC::CondCodes Pred,
171 unsigned PredReg, unsigned Scratch, DebugLoc dl,
172 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
173 // Only a single register to load / store. Don't bother.
174 unsigned NumRegs = Regs.size();
178 ARM_AM::AMSubMode Mode = ARM_AM::ia;
179 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
180 if (isAM4 && Offset == 4) {
182 // Thumb2 does not support ldmib / stmib.
185 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
187 // Thumb2 does not support ldmda / stmda.
190 } else if (isAM4 && Offset == -4 * (int)NumRegs) {
192 } else if (Offset != 0) {
193 // If starting offset isn't zero, insert a MI to materialize a new base.
194 // But only do so if it is cost effective, i.e. merging more than two
200 if (isi32Load(Opcode))
201 // If it is a load, then just use one of the destination register to
202 // use as the new base.
203 NewBase = Regs[NumRegs-1].first;
205 // Use the scratch register to use as a new base.
210 int BaseOpc = !isThumb2
212 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
216 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
219 int ImmedOffset = isThumb2
220 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
221 if (ImmedOffset == -1)
222 // FIXME: Try t2ADDri12 or t2SUBri12?
223 return false; // Probably not worth it then.
225 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
226 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
227 .addImm(Pred).addReg(PredReg).addReg(0);
229 BaseKill = true; // New base is always killed right its use.
232 bool isDPR = Opcode == ARM::VLDRD || Opcode == ARM::VSTRD;
233 bool isDef = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
234 Opcode = getLoadStoreMultipleOpcode(Opcode);
235 MachineInstrBuilder MIB = (isAM4)
236 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
237 .addReg(Base, getKillRegState(BaseKill))
238 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
239 : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
240 .addReg(Base, getKillRegState(BaseKill))
241 .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
242 .addImm(Pred).addReg(PredReg);
243 MIB.addReg(0); // Add optional writeback (0 for now).
244 for (unsigned i = 0; i != NumRegs; ++i)
245 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
246 | getKillRegState(Regs[i].second));
251 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
252 /// load / store multiple instructions.
254 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
255 unsigned Base, int Opcode, unsigned Size,
256 ARMCC::CondCodes Pred, unsigned PredReg,
257 unsigned Scratch, MemOpQueue &MemOps,
258 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
259 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
260 int Offset = MemOps[SIndex].Offset;
261 int SOffset = Offset;
262 unsigned Pos = MemOps[SIndex].Position;
263 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
264 DebugLoc dl = Loc->getDebugLoc();
265 unsigned PReg = Loc->getOperand(0).getReg();
266 unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
267 bool isKill = Loc->getOperand(0).isKill();
269 SmallVector<std::pair<unsigned,bool>, 8> Regs;
270 Regs.push_back(std::make_pair(PReg, isKill));
271 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
272 int NewOffset = MemOps[i].Offset;
273 unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
274 unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
275 isKill = MemOps[i].MBBI->getOperand(0).isKill();
276 // AM4 - register numbers in ascending order.
277 // AM5 - consecutive register numbers in ascending order.
278 if (NewOffset == Offset + (int)Size &&
279 ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
281 Regs.push_back(std::make_pair(Reg, isKill));
284 // Can't merge this in. Try merge the earlier ones first.
285 if (MergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg,
286 Scratch, dl, Regs)) {
287 Merges.push_back(prior(Loc));
288 for (unsigned j = SIndex; j < i; ++j) {
289 MBB.erase(MemOps[j].MBBI);
290 MemOps[j].Merged = true;
293 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
298 if (MemOps[i].Position > Pos) {
299 Pos = MemOps[i].Position;
300 Loc = MemOps[i].MBBI;
304 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
305 if (MergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg,
306 Scratch, dl, Regs)) {
307 Merges.push_back(prior(Loc));
308 for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
309 MBB.erase(MemOps[i].MBBI);
310 MemOps[i].Merged = true;
317 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
318 unsigned Bytes, unsigned Limit,
319 ARMCC::CondCodes Pred, unsigned PredReg){
320 unsigned MyPredReg = 0;
323 if (MI->getOpcode() != ARM::t2SUBri &&
324 MI->getOpcode() != ARM::t2SUBrSPi &&
325 MI->getOpcode() != ARM::t2SUBrSPi12 &&
326 MI->getOpcode() != ARM::tSUBspi &&
327 MI->getOpcode() != ARM::SUBri)
330 // Make sure the offset fits in 8 bits.
331 if (Bytes <= 0 || (Limit && Bytes >= Limit))
334 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
335 return (MI->getOperand(0).getReg() == Base &&
336 MI->getOperand(1).getReg() == Base &&
337 (MI->getOperand(2).getImm()*Scale) == Bytes &&
338 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
339 MyPredReg == PredReg);
342 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
343 unsigned Bytes, unsigned Limit,
344 ARMCC::CondCodes Pred, unsigned PredReg){
345 unsigned MyPredReg = 0;
348 if (MI->getOpcode() != ARM::t2ADDri &&
349 MI->getOpcode() != ARM::t2ADDrSPi &&
350 MI->getOpcode() != ARM::t2ADDrSPi12 &&
351 MI->getOpcode() != ARM::tADDspi &&
352 MI->getOpcode() != ARM::ADDri)
355 if (Bytes <= 0 || (Limit && Bytes >= Limit))
356 // Make sure the offset fits in 8 bits.
359 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
360 return (MI->getOperand(0).getReg() == Base &&
361 MI->getOperand(1).getReg() == Base &&
362 (MI->getOperand(2).getImm()*Scale) == Bytes &&
363 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
364 MyPredReg == PredReg);
367 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
368 switch (MI->getOpcode()) {
386 return (MI->getNumOperands() - 5) * 4;
391 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
395 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
396 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
398 /// stmia rn, <ra, rb, rc>
399 /// rn := rn + 4 * 3;
401 /// stmia rn!, <ra, rb, rc>
403 /// rn := rn - 4 * 3;
404 /// ldmia rn, <ra, rb, rc>
406 /// ldmdb rn!, <ra, rb, rc>
407 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
408 MachineBasicBlock::iterator MBBI,
410 MachineBasicBlock::iterator &I) {
411 MachineInstr *MI = MBBI;
412 unsigned Base = MI->getOperand(0).getReg();
413 unsigned Bytes = getLSMultipleTransferSize(MI);
414 unsigned PredReg = 0;
415 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
416 int Opcode = MI->getOpcode();
417 bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
418 Opcode == ARM::STM || Opcode == ARM::t2STM;
421 if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
424 // Can't use the updating AM4 sub-mode if the base register is also a dest
425 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
426 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
427 if (MI->getOperand(i).getReg() == Base)
431 ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
432 if (MBBI != MBB.begin()) {
433 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
434 if (Mode == ARM_AM::ia &&
435 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
436 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
437 MI->getOperand(4).setReg(Base);
438 MI->getOperand(4).setIsDef();
441 } else if (Mode == ARM_AM::ib &&
442 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
443 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
444 MI->getOperand(4).setReg(Base); // WB to base
445 MI->getOperand(4).setIsDef();
451 if (MBBI != MBB.end()) {
452 MachineBasicBlock::iterator NextMBBI = next(MBBI);
453 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
454 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
455 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
456 MI->getOperand(4).setReg(Base); // WB to base
457 MI->getOperand(4).setIsDef();
464 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
465 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
466 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
467 MI->getOperand(4).setReg(Base); // WB to base
468 MI->getOperand(4).setIsDef();
478 // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
479 if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
482 ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
483 unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
484 if (MBBI != MBB.begin()) {
485 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
486 if (Mode == ARM_AM::ia &&
487 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
488 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
489 MI->getOperand(4).setReg(Base); // WB to base
490 MI->getOperand(4).setIsDef();
496 if (MBBI != MBB.end()) {
497 MachineBasicBlock::iterator NextMBBI = next(MBBI);
498 if (Mode == ARM_AM::ia &&
499 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
500 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
501 MI->getOperand(4).setReg(Base); // WB to base
502 MI->getOperand(4).setIsDef();
516 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
518 case ARM::LDR: return ARM::LDR_PRE;
519 case ARM::STR: return ARM::STR_PRE;
520 case ARM::VLDRS: return ARM::VLDMS;
521 case ARM::VLDRD: return ARM::VLDMD;
522 case ARM::VSTRS: return ARM::VSTMS;
523 case ARM::VSTRD: return ARM::VSTMD;
526 return ARM::t2LDR_PRE;
529 return ARM::t2STR_PRE;
530 default: llvm_unreachable("Unhandled opcode!");
535 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
537 case ARM::LDR: return ARM::LDR_POST;
538 case ARM::STR: return ARM::STR_POST;
539 case ARM::VLDRS: return ARM::VLDMS;
540 case ARM::VLDRD: return ARM::VLDMD;
541 case ARM::VSTRS: return ARM::VSTMS;
542 case ARM::VSTRD: return ARM::VSTMD;
545 return ARM::t2LDR_POST;
548 return ARM::t2STR_POST;
549 default: llvm_unreachable("Unhandled opcode!");
554 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
555 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
556 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
557 MachineBasicBlock::iterator MBBI,
558 const TargetInstrInfo *TII,
560 MachineBasicBlock::iterator &I) {
561 MachineInstr *MI = MBBI;
562 unsigned Base = MI->getOperand(1).getReg();
563 bool BaseKill = MI->getOperand(1).isKill();
564 unsigned Bytes = getLSMultipleTransferSize(MI);
565 int Opcode = MI->getOpcode();
566 DebugLoc dl = MI->getDebugLoc();
567 bool isAM5 = Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
568 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS;
569 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
570 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
572 else if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
574 else if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
575 if (MI->getOperand(2).getImm() != 0)
578 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
579 // Can't do the merge if the destination register is the same as the would-be
580 // writeback register.
581 if (isLd && MI->getOperand(0).getReg() == Base)
584 unsigned PredReg = 0;
585 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
586 bool DoMerge = false;
587 ARM_AM::AddrOpc AddSub = ARM_AM::add;
589 // AM2 - 12 bits, thumb2 - 8 bits.
590 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
591 if (MBBI != MBB.begin()) {
592 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
593 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
595 AddSub = ARM_AM::sub;
596 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
598 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
600 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
606 if (!DoMerge && MBBI != MBB.end()) {
607 MachineBasicBlock::iterator NextMBBI = next(MBBI);
609 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
611 AddSub = ARM_AM::sub;
612 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
613 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
615 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
629 bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
632 Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
634 : ARM_AM::ia, true, (isDPR ? 2 : 1));
636 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
638 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
642 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
643 .addReg(Base, getKillRegState(BaseKill))
644 .addImm(Offset).addImm(Pred).addReg(PredReg)
645 .addReg(Base, getDefRegState(true)) // WB base register
646 .addReg(MI->getOperand(0).getReg(), RegState::Define);
648 // LDR_PRE, LDR_POST,
649 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
650 .addReg(Base, RegState::Define)
651 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
653 // t2LDR_PRE, t2LDR_POST
654 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
655 .addReg(Base, RegState::Define)
656 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
658 MachineOperand &MO = MI->getOperand(0);
661 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
662 .addImm(Pred).addReg(PredReg)
663 .addReg(Base, getDefRegState(true)) // WB base register
664 .addReg(MO.getReg(), getKillRegState(MO.isKill()));
667 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
668 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
669 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
671 // t2STR_PRE, t2STR_POST
672 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
673 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
674 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
681 /// isMemoryOp - Returns true if instruction is a memory operations (that this
682 /// pass is capable of operating on).
683 static bool isMemoryOp(const MachineInstr *MI) {
684 int Opcode = MI->getOpcode();
689 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
692 return MI->getOperand(1).isReg();
695 return MI->getOperand(1).isReg();
700 return MI->getOperand(1).isReg();
705 /// AdvanceRS - Advance register scavenger to just before the earliest memory
706 /// op that is being merged.
707 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
708 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
709 unsigned Position = MemOps[0].Position;
710 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
711 if (MemOps[i].Position < Position) {
712 Position = MemOps[i].Position;
713 Loc = MemOps[i].MBBI;
717 if (Loc != MBB.begin())
718 RS->forward(prior(Loc));
721 static int getMemoryOpOffset(const MachineInstr *MI) {
722 int Opcode = MI->getOpcode();
723 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
724 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
725 unsigned NumOperands = MI->getDesc().getNumOperands();
726 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
728 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
729 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
730 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
734 ? ARM_AM::getAM2Offset(OffField)
735 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
736 : ARM_AM::getAM5Offset(OffField) * 4);
738 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
741 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
744 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
750 static void InsertLDR_STR(MachineBasicBlock &MBB,
751 MachineBasicBlock::iterator &MBBI,
752 int OffImm, bool isDef,
753 DebugLoc dl, unsigned NewOpc,
754 unsigned Reg, bool RegDeadKill, bool RegUndef,
755 unsigned BaseReg, bool BaseKill, bool BaseUndef,
756 unsigned OffReg, bool OffKill, bool OffUndef,
757 ARMCC::CondCodes Pred, unsigned PredReg,
758 const TargetInstrInfo *TII, bool isT2) {
762 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
764 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
767 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
769 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
770 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
772 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
773 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
775 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
777 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
778 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
780 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
781 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
785 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
786 MachineBasicBlock::iterator &MBBI) {
787 MachineInstr *MI = &*MBBI;
788 unsigned Opcode = MI->getOpcode();
789 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
790 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
791 unsigned EvenReg = MI->getOperand(0).getReg();
792 unsigned OddReg = MI->getOperand(1).getReg();
793 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
794 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
795 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
798 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
799 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
800 bool EvenDeadKill = isLd ?
801 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
802 bool EvenUndef = MI->getOperand(0).isUndef();
803 bool OddDeadKill = isLd ?
804 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
805 bool OddUndef = MI->getOperand(1).isUndef();
806 const MachineOperand &BaseOp = MI->getOperand(2);
807 unsigned BaseReg = BaseOp.getReg();
808 bool BaseKill = BaseOp.isKill();
809 bool BaseUndef = BaseOp.isUndef();
810 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
811 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
812 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
813 int OffImm = getMemoryOpOffset(MI);
814 unsigned PredReg = 0;
815 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
817 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
818 // Ascending register numbers and no offset. It's safe to change it to a
820 unsigned NewOpc = (isLd)
821 ? (isT2 ? ARM::t2LDM : ARM::LDM)
822 : (isT2 ? ARM::t2STM : ARM::STM);
824 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
825 .addReg(BaseReg, getKillRegState(BaseKill))
826 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
827 .addImm(Pred).addReg(PredReg)
829 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
830 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
833 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
834 .addReg(BaseReg, getKillRegState(BaseKill))
835 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
836 .addImm(Pred).addReg(PredReg)
839 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
841 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
845 // Split into two instructions.
846 assert((!isT2 || !OffReg) &&
847 "Thumb2 ldrd / strd does not encode offset register!");
848 unsigned NewOpc = (isLd)
849 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
850 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
851 DebugLoc dl = MBBI->getDebugLoc();
852 // If this is a load and base register is killed, it may have been
853 // re-defed by the load, make sure the first load does not clobber it.
855 (BaseKill || OffKill) &&
856 (TRI->regsOverlap(EvenReg, BaseReg) ||
857 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
858 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
859 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
860 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
861 OddReg, OddDeadKill, false,
862 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
863 Pred, PredReg, TII, isT2);
864 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
865 EvenReg, EvenDeadKill, false,
866 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
867 Pred, PredReg, TII, isT2);
869 if (OddReg == EvenReg && EvenDeadKill) {
870 // If the two source operands are the same, the kill marker is probably
871 // on the first one. e.g.
872 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
873 EvenDeadKill = false;
876 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
877 EvenReg, EvenDeadKill, EvenUndef,
878 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
879 Pred, PredReg, TII, isT2);
880 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
881 OddReg, OddDeadKill, OddUndef,
882 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
883 Pred, PredReg, TII, isT2);
897 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
898 /// ops of the same base and incrementing offset into LDM / STM ops.
899 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
900 unsigned NumMerges = 0;
901 unsigned NumMemOps = 0;
903 unsigned CurrBase = 0;
905 unsigned CurrSize = 0;
906 ARMCC::CondCodes CurrPred = ARMCC::AL;
907 unsigned CurrPredReg = 0;
908 unsigned Position = 0;
909 SmallVector<MachineBasicBlock::iterator,4> Merges;
911 RS->enterBasicBlock(&MBB);
912 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
914 if (FixInvalidRegPairOp(MBB, MBBI))
917 bool Advance = false;
918 bool TryMerge = false;
919 bool Clobber = false;
921 bool isMemOp = isMemoryOp(MBBI);
923 int Opcode = MBBI->getOpcode();
924 unsigned Size = getLSMultipleTransferSize(MBBI);
925 unsigned Base = MBBI->getOperand(1).getReg();
926 unsigned PredReg = 0;
927 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
928 int Offset = getMemoryOpOffset(MBBI);
931 // r5 := ldr [r5, #4]
932 // r6 := ldr [r5, #8]
934 // The second ldr has effectively broken the chain even though it
935 // looks like the later ldr(s) use the same base register. Try to
936 // merge the ldr's so far, including this one. But don't try to
937 // combine the following ldr(s).
938 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
939 if (CurrBase == 0 && !Clobber) {
940 // Start of a new chain.
945 CurrPredReg = PredReg;
946 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
955 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
956 // No need to match PredReg.
957 // Continue adding to the queue.
958 if (Offset > MemOps.back().Offset) {
959 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
963 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
965 if (Offset < I->Offset) {
966 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
970 } else if (Offset == I->Offset) {
971 // Collision! This can't be merged!
984 // Reach the end of the block, try merging the memory instructions.
991 // Try to find a free register to use as a new base in case it's needed.
992 // First advance to the instruction just before the start of the chain.
993 AdvanceRS(MBB, MemOps);
994 // Find a scratch register.
995 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
996 // Process the load / store instructions.
997 RS->forward(prior(MBBI));
1001 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1002 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1004 // Try folding preceeding/trailing base inc/dec into the generated
1006 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1007 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1009 NumMerges += Merges.size();
1011 // Try folding preceeding/trailing base inc/dec into those load/store
1012 // that were not merged to form LDM/STM ops.
1013 for (unsigned i = 0; i != NumMemOps; ++i)
1014 if (!MemOps[i].Merged)
1015 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1018 // RS may be pointing to an instruction that's deleted.
1019 RS->skipTo(prior(MBBI));
1020 } else if (NumMemOps == 1) {
1021 // Try folding preceeding/trailing base inc/dec into the single
1023 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1025 RS->forward(prior(MBBI));
1032 CurrPred = ARMCC::AL;
1039 // If iterator hasn't been advanced and this is not a memory op, skip it.
1040 // It can't start a new chain anyway.
1041 if (!Advance && !isMemOp && MBBI != E) {
1047 return NumMerges > 0;
1051 struct OffsetCompare {
1052 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1053 int LOffset = getMemoryOpOffset(LHS);
1054 int ROffset = getMemoryOpOffset(RHS);
1055 assert(LHS == RHS || LOffset != ROffset);
1056 return LOffset > ROffset;
1061 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
1062 /// (bx lr) into the preceeding stack restore so it directly restore the value
1064 /// ldmfd sp!, {r7, lr}
1067 /// ldmfd sp!, {r7, pc}
1068 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1069 if (MBB.empty()) return false;
1071 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1072 if (MBBI != MBB.begin() &&
1073 (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
1074 MachineInstr *PrevMI = prior(MBBI);
1075 if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) {
1076 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1077 if (MO.getReg() != ARM::LR)
1079 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1080 PrevMI->setDesc(TII->get(NewOpc));
1089 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1090 const TargetMachine &TM = Fn.getTarget();
1091 AFI = Fn.getInfo<ARMFunctionInfo>();
1092 TII = TM.getInstrInfo();
1093 TRI = TM.getRegisterInfo();
1094 RS = new RegScavenger();
1095 isThumb2 = AFI->isThumb2Function();
1097 bool Modified = false;
1098 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1100 MachineBasicBlock &MBB = *MFI;
1101 Modified |= LoadStoreMultipleOpti(MBB);
1102 Modified |= MergeReturnIntoLDM(MBB);
1110 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1111 /// load / stores from consecutive locations close to make it more
1112 /// likely they will be combined later.
1115 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1117 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
1119 const TargetData *TD;
1120 const TargetInstrInfo *TII;
1121 const TargetRegisterInfo *TRI;
1122 const ARMSubtarget *STI;
1123 MachineRegisterInfo *MRI;
1124 MachineFunction *MF;
1126 virtual bool runOnMachineFunction(MachineFunction &Fn);
1128 virtual const char *getPassName() const {
1129 return "ARM pre- register allocation load / store optimization pass";
1133 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1134 unsigned &NewOpc, unsigned &EvenReg,
1135 unsigned &OddReg, unsigned &BaseReg,
1136 unsigned &OffReg, int &Offset,
1137 unsigned &PredReg, ARMCC::CondCodes &Pred,
1139 bool RescheduleOps(MachineBasicBlock *MBB,
1140 SmallVector<MachineInstr*, 4> &Ops,
1141 unsigned Base, bool isLd,
1142 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1143 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1145 char ARMPreAllocLoadStoreOpt::ID = 0;
1148 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1149 TD = Fn.getTarget().getTargetData();
1150 TII = Fn.getTarget().getInstrInfo();
1151 TRI = Fn.getTarget().getRegisterInfo();
1152 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1153 MRI = &Fn.getRegInfo();
1156 bool Modified = false;
1157 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1159 Modified |= RescheduleLoadStoreInstrs(MFI);
1164 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1165 MachineBasicBlock::iterator I,
1166 MachineBasicBlock::iterator E,
1167 SmallPtrSet<MachineInstr*, 4> &MemOps,
1168 SmallSet<unsigned, 4> &MemRegs,
1169 const TargetRegisterInfo *TRI) {
1170 // Are there stores / loads / calls between them?
1171 // FIXME: This is overly conservative. We should make use of alias information
1173 SmallSet<unsigned, 4> AddedRegPressure;
1175 if (MemOps.count(&*I))
1177 const TargetInstrDesc &TID = I->getDesc();
1178 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1180 if (isLd && TID.mayStore())
1185 // It's not safe to move the first 'str' down.
1188 // str r4, [r0, #+4]
1192 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1193 MachineOperand &MO = I->getOperand(j);
1196 unsigned Reg = MO.getReg();
1197 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1199 if (Reg != Base && !MemRegs.count(Reg))
1200 AddedRegPressure.insert(Reg);
1204 // Estimate register pressure increase due to the transformation.
1205 if (MemRegs.size() <= 4)
1206 // Ok if we are moving small number of instructions.
1208 return AddedRegPressure.size() <= MemRegs.size() * 2;
1212 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1214 unsigned &NewOpc, unsigned &EvenReg,
1215 unsigned &OddReg, unsigned &BaseReg,
1216 unsigned &OffReg, int &Offset,
1218 ARMCC::CondCodes &Pred,
1220 // Make sure we're allowed to generate LDRD/STRD.
1221 if (!STI->hasV5TEOps())
1224 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1226 unsigned Opcode = Op0->getOpcode();
1227 if (Opcode == ARM::LDR)
1229 else if (Opcode == ARM::STR)
1231 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1232 NewOpc = ARM::t2LDRDi8;
1235 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1236 NewOpc = ARM::t2STRDi8;
1242 // Make sure the offset registers match.
1244 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1247 // Must sure the base address satisfies i64 ld / st alignment requirement.
1248 if (!Op0->hasOneMemOperand() ||
1249 !(*Op0->memoperands_begin())->getValue() ||
1250 (*Op0->memoperands_begin())->isVolatile())
1253 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1254 Function *Func = MF->getFunction();
1255 unsigned ReqAlign = STI->hasV6Ops()
1256 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1257 : 8; // Pre-v6 need 8-byte align
1258 if (Align < ReqAlign)
1261 // Then make sure the immediate offset fits.
1262 int OffImm = getMemoryOpOffset(Op0);
1266 // Can't fall back to t2LDRi8 / t2STRi8.
1269 int Limit = (1 << 8) * Scale;
1270 if (OffImm >= Limit || (OffImm & (Scale-1)))
1275 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1277 AddSub = ARM_AM::sub;
1280 int Limit = (1 << 8) * Scale;
1281 if (OffImm >= Limit || (OffImm & (Scale-1)))
1283 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1285 EvenReg = Op0->getOperand(0).getReg();
1286 OddReg = Op1->getOperand(0).getReg();
1287 if (EvenReg == OddReg)
1289 BaseReg = Op0->getOperand(1).getReg();
1291 OffReg = Op0->getOperand(2).getReg();
1292 Pred = llvm::getInstrPredicate(Op0, PredReg);
1293 dl = Op0->getDebugLoc();
1297 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1298 SmallVector<MachineInstr*, 4> &Ops,
1299 unsigned Base, bool isLd,
1300 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1301 bool RetVal = false;
1303 // Sort by offset (in reverse order).
1304 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1306 // The loads / stores of the same base are in order. Scan them from first to
1307 // last and check for the followins:
1308 // 1. Any def of base.
1310 while (Ops.size() > 1) {
1311 unsigned FirstLoc = ~0U;
1312 unsigned LastLoc = 0;
1313 MachineInstr *FirstOp = 0;
1314 MachineInstr *LastOp = 0;
1316 unsigned LastOpcode = 0;
1317 unsigned LastBytes = 0;
1318 unsigned NumMove = 0;
1319 for (int i = Ops.size() - 1; i >= 0; --i) {
1320 MachineInstr *Op = Ops[i];
1321 unsigned Loc = MI2LocMap[Op];
1322 if (Loc <= FirstLoc) {
1326 if (Loc >= LastLoc) {
1331 unsigned Opcode = Op->getOpcode();
1332 if (LastOpcode && Opcode != LastOpcode)
1335 int Offset = getMemoryOpOffset(Op);
1336 unsigned Bytes = getLSMultipleTransferSize(Op);
1338 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1341 LastOffset = Offset;
1343 LastOpcode = Opcode;
1344 if (++NumMove == 8) // FIXME: Tune this limit.
1351 SmallPtrSet<MachineInstr*, 4> MemOps;
1352 SmallSet<unsigned, 4> MemRegs;
1353 for (int i = NumMove-1; i >= 0; --i) {
1354 MemOps.insert(Ops[i]);
1355 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1358 // Be conservative, if the instructions are too far apart, don't
1359 // move them. We want to limit the increase of register pressure.
1360 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1362 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1363 MemOps, MemRegs, TRI);
1365 for (unsigned i = 0; i != NumMove; ++i)
1368 // This is the new location for the loads / stores.
1369 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1370 while (InsertPos != MBB->end() && MemOps.count(InsertPos))
1373 // If we are moving a pair of loads / stores, see if it makes sense
1374 // to try to allocate a pair of registers that can form register pairs.
1375 MachineInstr *Op0 = Ops.back();
1376 MachineInstr *Op1 = Ops[Ops.size()-2];
1377 unsigned EvenReg = 0, OddReg = 0;
1378 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1379 ARMCC::CondCodes Pred = ARMCC::AL;
1381 unsigned NewOpc = 0;
1384 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1385 EvenReg, OddReg, BaseReg, OffReg,
1386 Offset, PredReg, Pred, isT2)) {
1390 // Form the pair instruction.
1392 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1393 dl, TII->get(NewOpc))
1394 .addReg(EvenReg, RegState::Define)
1395 .addReg(OddReg, RegState::Define)
1399 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1402 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1403 dl, TII->get(NewOpc))
1409 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1415 // Add register allocation hints to form register pairs.
1416 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1417 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1419 for (unsigned i = 0; i != NumMove; ++i) {
1420 MachineInstr *Op = Ops.back();
1422 MBB->splice(InsertPos, MBB, Op);
1426 NumLdStMoved += NumMove;
1436 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1437 bool RetVal = false;
1439 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1440 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1441 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1442 SmallVector<unsigned, 4> LdBases;
1443 SmallVector<unsigned, 4> StBases;
1446 MachineBasicBlock::iterator MBBI = MBB->begin();
1447 MachineBasicBlock::iterator E = MBB->end();
1449 for (; MBBI != E; ++MBBI) {
1450 MachineInstr *MI = MBBI;
1451 const TargetInstrDesc &TID = MI->getDesc();
1452 if (TID.isCall() || TID.isTerminator()) {
1453 // Stop at barriers.
1458 MI2LocMap[MI] = Loc++;
1459 if (!isMemoryOp(MI))
1461 unsigned PredReg = 0;
1462 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1465 int Opc = MI->getOpcode();
1466 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1467 unsigned Base = MI->getOperand(1).getReg();
1468 int Offset = getMemoryOpOffset(MI);
1470 bool StopHere = false;
1472 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1473 Base2LdsMap.find(Base);
1474 if (BI != Base2LdsMap.end()) {
1475 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1476 if (Offset == getMemoryOpOffset(BI->second[i])) {
1482 BI->second.push_back(MI);
1484 SmallVector<MachineInstr*, 4> MIs;
1486 Base2LdsMap[Base] = MIs;
1487 LdBases.push_back(Base);
1490 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1491 Base2StsMap.find(Base);
1492 if (BI != Base2StsMap.end()) {
1493 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1494 if (Offset == getMemoryOpOffset(BI->second[i])) {
1500 BI->second.push_back(MI);
1502 SmallVector<MachineInstr*, 4> MIs;
1504 Base2StsMap[Base] = MIs;
1505 StBases.push_back(Base);
1510 // Found a duplicate (a base+offset combination that's seen earlier).
1517 // Re-schedule loads.
1518 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1519 unsigned Base = LdBases[i];
1520 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1522 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1525 // Re-schedule stores.
1526 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1527 unsigned Base = StBases[i];
1528 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1530 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1534 Base2LdsMap.clear();
1535 Base2StsMap.clear();
1545 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1546 /// optimization pass.
1547 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1549 return new ARMPreAllocLoadStoreOpt();
1550 return new ARMLoadStoreOpt();