1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
80 MachineBasicBlock::iterator MBBI;
82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
83 MachineBasicBlock::iterator i)
84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
87 typedef MemOpQueue::iterator MemOpQueueIter;
89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
90 int Offset, unsigned Base, bool BaseKill, int Opcode,
91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
93 void MergeOpsUpdate(MachineBasicBlock &MBB,
102 ARMCC::CondCodes Pred,
106 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
108 int Opcode, unsigned Size,
109 ARMCC::CondCodes Pred, unsigned PredReg,
110 unsigned Scratch, MemOpQueue &MemOps,
111 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
115 MachineBasicBlock::iterator &MBBI);
116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
117 MachineBasicBlock::iterator MBBI,
118 const TargetInstrInfo *TII,
120 MachineBasicBlock::iterator &I);
121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator MBBI,
124 MachineBasicBlock::iterator &I);
125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128 char ARMLoadStoreOpt::ID = 0;
131 static int getLoadStoreMultipleOpcode(int Opcode) {
159 default: llvm_unreachable("Unhandled opcode!");
164 static bool isT2i32Load(unsigned Opc) {
165 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
168 static bool isi32Load(unsigned Opc) {
169 return Opc == ARM::LDR || isT2i32Load(Opc);
172 static bool isT2i32Store(unsigned Opc) {
173 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
176 static bool isi32Store(unsigned Opc) {
177 return Opc == ARM::STR || isT2i32Store(Opc);
180 /// MergeOps - Create and insert a LDM or STM with Base as base register and
181 /// registers in Regs as the register operands that would be loaded / stored.
182 /// It returns true if the transformation is done.
184 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
185 MachineBasicBlock::iterator MBBI,
186 int Offset, unsigned Base, bool BaseKill,
187 int Opcode, ARMCC::CondCodes Pred,
188 unsigned PredReg, unsigned Scratch, DebugLoc dl,
189 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
190 // Only a single register to load / store. Don't bother.
191 unsigned NumRegs = Regs.size();
195 ARM_AM::AMSubMode Mode = ARM_AM::ia;
196 // VFP and Thumb2 do not support IB or DA modes.
197 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
198 bool haveIBAndDA = isNotVFP && !isThumb2;
199 if (Offset == 4 && haveIBAndDA)
201 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
203 else if (Offset == -4 * (int)NumRegs && isNotVFP)
204 // VLDM/VSTM do not support DB mode without also updating the base reg.
206 else if (Offset != 0) {
207 // If starting offset isn't zero, insert a MI to materialize a new base.
208 // But only do so if it is cost effective, i.e. merging more than two
214 if (isi32Load(Opcode))
215 // If it is a load, then just use one of the destination register to
216 // use as the new base.
217 NewBase = Regs[NumRegs-1].first;
219 // Use the scratch register to use as a new base.
224 int BaseOpc = !isThumb2
226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
233 int ImmedOffset = isThumb2
234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
235 if (ImmedOffset == -1)
236 // FIXME: Try t2ADDri12 or t2SUBri12?
237 return false; // Probably not worth it then.
239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
241 .addImm(Pred).addReg(PredReg).addReg(0);
243 BaseKill = true; // New base is always killed right its use.
246 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
247 Opcode == ARM::VLDRD);
248 Opcode = getLoadStoreMultipleOpcode(Opcode);
249 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
250 .addReg(Base, getKillRegState(BaseKill))
251 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg);
252 for (unsigned i = 0; i != NumRegs; ++i)
253 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
254 | getKillRegState(Regs[i].second));
259 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
261 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
263 unsigned memOpsBegin, unsigned memOpsEnd,
264 unsigned insertAfter, int Offset,
265 unsigned Base, bool BaseKill,
267 ARMCC::CondCodes Pred, unsigned PredReg,
270 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
271 // First calculate which of the registers should be killed by the merged
273 const unsigned insertPos = memOps[insertAfter].Position;
275 SmallSet<unsigned, 4> UnavailRegs;
276 SmallSet<unsigned, 4> KilledRegs;
277 DenseMap<unsigned, unsigned> Killer;
278 for (unsigned i = 0; i < memOpsBegin; ++i) {
279 if (memOps[i].Position < insertPos && memOps[i].isKill) {
280 unsigned Reg = memOps[i].Reg;
281 if (memOps[i].Merged)
282 UnavailRegs.insert(Reg);
284 KilledRegs.insert(Reg);
289 for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
290 if (memOps[i].Position < insertPos && memOps[i].isKill) {
291 unsigned Reg = memOps[i].Reg;
292 KilledRegs.insert(Reg);
297 SmallVector<std::pair<unsigned, bool>, 8> Regs;
298 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
299 unsigned Reg = memOps[i].Reg;
300 if (UnavailRegs.count(Reg))
301 // Register is killed before and it's not easy / possible to update the
302 // kill marker on already merged instructions. Abort.
305 // If we are inserting the merged operation after an unmerged operation that
306 // uses the same register, make sure to transfer any kill flag.
307 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
308 Regs.push_back(std::make_pair(Reg, isKill));
311 // Try to do the merge.
312 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
314 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
315 Pred, PredReg, Scratch, dl, Regs))
318 // Merge succeeded, update records.
319 Merges.push_back(prior(Loc));
320 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
321 // Remove kill flags from any unmerged memops that come before insertPos.
322 if (Regs[i-memOpsBegin].second) {
323 unsigned Reg = Regs[i-memOpsBegin].first;
324 if (KilledRegs.count(Reg)) {
325 unsigned j = Killer[Reg];
326 memOps[j].MBBI->getOperand(0).setIsKill(false);
327 memOps[j].isKill = false;
330 MBB.erase(memOps[i].MBBI);
331 memOps[i].Merged = true;
335 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
336 /// load / store multiple instructions.
338 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
339 unsigned Base, int Opcode, unsigned Size,
340 ARMCC::CondCodes Pred, unsigned PredReg,
341 unsigned Scratch, MemOpQueue &MemOps,
342 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
343 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
344 int Offset = MemOps[SIndex].Offset;
345 int SOffset = Offset;
346 unsigned insertAfter = SIndex;
347 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
348 DebugLoc dl = Loc->getDebugLoc();
349 const MachineOperand &PMO = Loc->getOperand(0);
350 unsigned PReg = PMO.getReg();
351 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
352 : ARMRegisterInfo::getRegisterNumbering(PReg);
355 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
356 int NewOffset = MemOps[i].Offset;
357 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
358 unsigned Reg = MO.getReg();
359 unsigned RegNum = MO.isUndef() ? UINT_MAX
360 : ARMRegisterInfo::getRegisterNumbering(Reg);
361 // Register numbers must be in ascending order. For VFP, the registers
362 // must also be consecutive and there is a limit of 16 double-word
363 // registers per instruction.
364 if (Reg != ARM::SP &&
365 NewOffset == Offset + (int)Size &&
366 ((isNotVFP && RegNum > PRegNum)
367 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
372 // Can't merge this in. Try merge the earlier ones first.
373 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
374 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
375 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
380 if (MemOps[i].Position > MemOps[insertAfter].Position)
384 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
385 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
386 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
390 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
391 unsigned Bytes, unsigned Limit,
392 ARMCC::CondCodes Pred, unsigned PredReg){
393 unsigned MyPredReg = 0;
396 if (MI->getOpcode() != ARM::t2SUBri &&
397 MI->getOpcode() != ARM::t2SUBrSPi &&
398 MI->getOpcode() != ARM::t2SUBrSPi12 &&
399 MI->getOpcode() != ARM::tSUBspi &&
400 MI->getOpcode() != ARM::SUBri)
403 // Make sure the offset fits in 8 bits.
404 if (Bytes == 0 || (Limit && Bytes >= Limit))
407 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
408 return (MI->getOperand(0).getReg() == Base &&
409 MI->getOperand(1).getReg() == Base &&
410 (MI->getOperand(2).getImm()*Scale) == Bytes &&
411 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
412 MyPredReg == PredReg);
415 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
416 unsigned Bytes, unsigned Limit,
417 ARMCC::CondCodes Pred, unsigned PredReg){
418 unsigned MyPredReg = 0;
421 if (MI->getOpcode() != ARM::t2ADDri &&
422 MI->getOpcode() != ARM::t2ADDrSPi &&
423 MI->getOpcode() != ARM::t2ADDrSPi12 &&
424 MI->getOpcode() != ARM::tADDspi &&
425 MI->getOpcode() != ARM::ADDri)
428 if (Bytes == 0 || (Limit && Bytes >= Limit))
429 // Make sure the offset fits in 8 bits.
432 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
433 return (MI->getOperand(0).getReg() == Base &&
434 MI->getOperand(1).getReg() == Base &&
435 (MI->getOperand(2).getImm()*Scale) == Bytes &&
436 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
437 MyPredReg == PredReg);
440 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
441 switch (MI->getOpcode()) {
463 return (MI->getNumOperands() - 4) * 4;
467 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) {
469 case ARM::LDM: return ARM::LDM_UPD;
470 case ARM::STM: return ARM::STM_UPD;
471 case ARM::t2LDM: return ARM::t2LDM_UPD;
472 case ARM::t2STM: return ARM::t2STM_UPD;
473 case ARM::VLDMS: return ARM::VLDMS_UPD;
474 case ARM::VLDMD: return ARM::VLDMD_UPD;
475 case ARM::VSTMS: return ARM::VSTMS_UPD;
476 case ARM::VSTMD: return ARM::VSTMD_UPD;
477 default: llvm_unreachable("Unhandled opcode!");
482 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
483 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
485 /// stmia rn, <ra, rb, rc>
486 /// rn := rn + 4 * 3;
488 /// stmia rn!, <ra, rb, rc>
490 /// rn := rn - 4 * 3;
491 /// ldmia rn, <ra, rb, rc>
493 /// ldmdb rn!, <ra, rb, rc>
494 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
495 MachineBasicBlock::iterator MBBI,
497 MachineBasicBlock::iterator &I) {
498 MachineInstr *MI = MBBI;
499 unsigned Base = MI->getOperand(0).getReg();
500 bool BaseKill = MI->getOperand(0).isKill();
501 unsigned Bytes = getLSMultipleTransferSize(MI);
502 unsigned PredReg = 0;
503 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
504 int Opcode = MI->getOpcode();
505 DebugLoc dl = MI->getDebugLoc();
507 bool DoMerge = false;
508 ARM_AM::AMSubMode Mode = ARM_AM::ia;
510 // Can't use an updating ld/st if the base register is also a dest
511 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
512 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
513 if (MI->getOperand(i).getReg() == Base)
516 Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
518 // Try merging with the previous instruction.
519 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
520 if (MBBI != BeginMBBI) {
521 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
522 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
524 if (Mode == ARM_AM::ia &&
525 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
528 } else if (Mode == ARM_AM::ib &&
529 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
537 // Try merging with the next instruction.
538 MachineBasicBlock::iterator EndMBBI = MBB.end();
539 if (!DoMerge && MBBI != EndMBBI) {
540 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
541 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
543 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
544 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
546 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
547 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
562 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
563 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
564 .addReg(Base, getDefRegState(true)) // WB base register
565 .addReg(Base, getKillRegState(BaseKill))
566 .addImm(ARM_AM::getAM4ModeImm(Mode))
567 .addImm(Pred).addReg(PredReg);
568 // Transfer the rest of operands.
569 for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
570 MIB.addOperand(MI->getOperand(OpNum));
571 // Transfer memoperands.
572 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
578 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
580 case ARM::LDR: return ARM::LDR_PRE;
581 case ARM::STR: return ARM::STR_PRE;
582 case ARM::VLDRS: return ARM::VLDMS_UPD;
583 case ARM::VLDRD: return ARM::VLDMD_UPD;
584 case ARM::VSTRS: return ARM::VSTMS_UPD;
585 case ARM::VSTRD: return ARM::VSTMD_UPD;
588 return ARM::t2LDR_PRE;
591 return ARM::t2STR_PRE;
592 default: llvm_unreachable("Unhandled opcode!");
597 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
599 case ARM::LDR: return ARM::LDR_POST;
600 case ARM::STR: return ARM::STR_POST;
601 case ARM::VLDRS: return ARM::VLDMS_UPD;
602 case ARM::VLDRD: return ARM::VLDMD_UPD;
603 case ARM::VSTRS: return ARM::VSTMS_UPD;
604 case ARM::VSTRD: return ARM::VSTMD_UPD;
607 return ARM::t2LDR_POST;
610 return ARM::t2STR_POST;
611 default: llvm_unreachable("Unhandled opcode!");
616 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
617 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
618 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
619 MachineBasicBlock::iterator MBBI,
620 const TargetInstrInfo *TII,
622 MachineBasicBlock::iterator &I) {
623 MachineInstr *MI = MBBI;
624 unsigned Base = MI->getOperand(1).getReg();
625 bool BaseKill = MI->getOperand(1).isKill();
626 unsigned Bytes = getLSMultipleTransferSize(MI);
627 int Opcode = MI->getOpcode();
628 DebugLoc dl = MI->getDebugLoc();
629 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
630 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
631 bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR);
632 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
634 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
636 if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
637 if (MI->getOperand(2).getImm() != 0)
640 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
641 // Can't do the merge if the destination register is the same as the would-be
642 // writeback register.
643 if (isLd && MI->getOperand(0).getReg() == Base)
646 unsigned PredReg = 0;
647 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
648 bool DoMerge = false;
649 ARM_AM::AddrOpc AddSub = ARM_AM::add;
651 // AM2 - 12 bits, thumb2 - 8 bits.
652 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
654 // Try merging with the previous instruction.
655 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
656 if (MBBI != BeginMBBI) {
657 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
658 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
660 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
662 AddSub = ARM_AM::sub;
664 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
668 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
673 // Try merging with the next instruction.
674 MachineBasicBlock::iterator EndMBBI = MBB.end();
675 if (!DoMerge && MBBI != EndMBBI) {
676 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
677 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
680 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
682 AddSub = ARM_AM::sub;
683 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
687 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
701 Offset = ARM_AM::getAM4ModeImm(AddSub == ARM_AM::sub ?
702 ARM_AM::db : ARM_AM::ia);
704 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
706 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
709 // VLDM[SD}_UPD, VSTM[SD]_UPD
710 // (There are no base-updating versions of VLDR/VSTR instructions, but the
711 // updating load/store-multiple instructions can be used with only one
713 MachineOperand &MO = MI->getOperand(0);
714 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
715 .addReg(Base, getDefRegState(true)) // WB base register
716 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
718 .addImm(Pred).addReg(PredReg)
719 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
720 getKillRegState(MO.isKill())));
723 // LDR_PRE, LDR_POST,
724 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
725 .addReg(Base, RegState::Define)
726 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
728 // t2LDR_PRE, t2LDR_POST
729 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
730 .addReg(Base, RegState::Define)
731 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
733 MachineOperand &MO = MI->getOperand(0);
736 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
737 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
738 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
740 // t2STR_PRE, t2STR_POST
741 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
742 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
743 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
750 /// isMemoryOp - Returns true if instruction is a memory operations (that this
751 /// pass is capable of operating on).
752 static bool isMemoryOp(const MachineInstr *MI) {
753 // When no memory operands are present, conservatively assume unaligned,
754 // volatile, unfoldable.
755 if (!MI->hasOneMemOperand())
758 const MachineMemOperand *MMO = *MI->memoperands_begin();
760 // Don't touch volatile memory accesses - we may be changing their order.
761 if (MMO->isVolatile())
764 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
766 if (MMO->getAlignment() < 4)
769 // str <undef> could probably be eliminated entirely, but for now we just want
770 // to avoid making a mess of it.
771 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
772 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
773 MI->getOperand(0).isUndef())
776 // Likewise don't mess with references to undefined addresses.
777 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
778 MI->getOperand(1).isUndef())
781 int Opcode = MI->getOpcode();
786 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
789 return MI->getOperand(1).isReg();
792 return MI->getOperand(1).isReg();
797 return MI->getOperand(1).isReg();
802 /// AdvanceRS - Advance register scavenger to just before the earliest memory
803 /// op that is being merged.
804 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
805 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
806 unsigned Position = MemOps[0].Position;
807 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
808 if (MemOps[i].Position < Position) {
809 Position = MemOps[i].Position;
810 Loc = MemOps[i].MBBI;
814 if (Loc != MBB.begin())
815 RS->forward(prior(Loc));
818 static int getMemoryOpOffset(const MachineInstr *MI) {
819 int Opcode = MI->getOpcode();
820 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
821 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
822 unsigned NumOperands = MI->getDesc().getNumOperands();
823 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
825 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
826 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
827 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
831 ? ARM_AM::getAM2Offset(OffField)
832 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
833 : ARM_AM::getAM5Offset(OffField) * 4);
835 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
838 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
841 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
847 static void InsertLDR_STR(MachineBasicBlock &MBB,
848 MachineBasicBlock::iterator &MBBI,
849 int OffImm, bool isDef,
850 DebugLoc dl, unsigned NewOpc,
851 unsigned Reg, bool RegDeadKill, bool RegUndef,
852 unsigned BaseReg, bool BaseKill, bool BaseUndef,
853 unsigned OffReg, bool OffKill, bool OffUndef,
854 ARMCC::CondCodes Pred, unsigned PredReg,
855 const TargetInstrInfo *TII, bool isT2) {
859 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
861 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
864 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
866 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
867 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
869 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
870 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
872 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
874 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
875 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
877 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
878 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
882 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
883 MachineBasicBlock::iterator &MBBI) {
884 MachineInstr *MI = &*MBBI;
885 unsigned Opcode = MI->getOpcode();
886 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
887 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
888 unsigned EvenReg = MI->getOperand(0).getReg();
889 unsigned OddReg = MI->getOperand(1).getReg();
890 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
891 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
892 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
895 MachineBasicBlock::iterator NewBBI = MBBI;
896 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
897 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
898 bool EvenDeadKill = isLd ?
899 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
900 bool EvenUndef = MI->getOperand(0).isUndef();
901 bool OddDeadKill = isLd ?
902 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
903 bool OddUndef = MI->getOperand(1).isUndef();
904 const MachineOperand &BaseOp = MI->getOperand(2);
905 unsigned BaseReg = BaseOp.getReg();
906 bool BaseKill = BaseOp.isKill();
907 bool BaseUndef = BaseOp.isUndef();
908 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
909 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
910 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
911 int OffImm = getMemoryOpOffset(MI);
912 unsigned PredReg = 0;
913 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
915 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
916 // Ascending register numbers and no offset. It's safe to change it to a
918 unsigned NewOpc = (isLd)
919 ? (isT2 ? ARM::t2LDM : ARM::LDM)
920 : (isT2 ? ARM::t2STM : ARM::STM);
922 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
923 .addReg(BaseReg, getKillRegState(BaseKill))
924 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
925 .addImm(Pred).addReg(PredReg)
926 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
927 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
930 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
931 .addReg(BaseReg, getKillRegState(BaseKill))
932 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
933 .addImm(Pred).addReg(PredReg)
935 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
937 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
940 NewBBI = llvm::prior(MBBI);
942 // Split into two instructions.
943 assert((!isT2 || !OffReg) &&
944 "Thumb2 ldrd / strd does not encode offset register!");
945 unsigned NewOpc = (isLd)
946 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
947 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
948 DebugLoc dl = MBBI->getDebugLoc();
949 // If this is a load and base register is killed, it may have been
950 // re-defed by the load, make sure the first load does not clobber it.
952 (BaseKill || OffKill) &&
953 (TRI->regsOverlap(EvenReg, BaseReg) ||
954 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
955 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
956 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
957 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
958 OddReg, OddDeadKill, false,
959 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
960 Pred, PredReg, TII, isT2);
961 NewBBI = llvm::prior(MBBI);
962 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
963 EvenReg, EvenDeadKill, false,
964 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
965 Pred, PredReg, TII, isT2);
967 if (OddReg == EvenReg && EvenDeadKill) {
968 // If the two source operands are the same, the kill marker is
969 // probably on the first one. e.g.
970 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
971 EvenDeadKill = false;
974 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
975 EvenReg, EvenDeadKill, EvenUndef,
976 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
977 Pred, PredReg, TII, isT2);
978 NewBBI = llvm::prior(MBBI);
979 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
980 OddReg, OddDeadKill, OddUndef,
981 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
982 Pred, PredReg, TII, isT2);
997 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
998 /// ops of the same base and incrementing offset into LDM / STM ops.
999 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1000 unsigned NumMerges = 0;
1001 unsigned NumMemOps = 0;
1003 unsigned CurrBase = 0;
1005 unsigned CurrSize = 0;
1006 ARMCC::CondCodes CurrPred = ARMCC::AL;
1007 unsigned CurrPredReg = 0;
1008 unsigned Position = 0;
1009 SmallVector<MachineBasicBlock::iterator,4> Merges;
1011 RS->enterBasicBlock(&MBB);
1012 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1014 if (FixInvalidRegPairOp(MBB, MBBI))
1017 bool Advance = false;
1018 bool TryMerge = false;
1019 bool Clobber = false;
1021 bool isMemOp = isMemoryOp(MBBI);
1023 int Opcode = MBBI->getOpcode();
1024 unsigned Size = getLSMultipleTransferSize(MBBI);
1025 const MachineOperand &MO = MBBI->getOperand(0);
1026 unsigned Reg = MO.getReg();
1027 bool isKill = MO.isDef() ? false : MO.isKill();
1028 unsigned Base = MBBI->getOperand(1).getReg();
1029 unsigned PredReg = 0;
1030 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1031 int Offset = getMemoryOpOffset(MBBI);
1034 // r5 := ldr [r5, #4]
1035 // r6 := ldr [r5, #8]
1037 // The second ldr has effectively broken the chain even though it
1038 // looks like the later ldr(s) use the same base register. Try to
1039 // merge the ldr's so far, including this one. But don't try to
1040 // combine the following ldr(s).
1041 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1042 if (CurrBase == 0 && !Clobber) {
1043 // Start of a new chain.
1048 CurrPredReg = PredReg;
1049 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1058 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1059 // No need to match PredReg.
1060 // Continue adding to the queue.
1061 if (Offset > MemOps.back().Offset) {
1062 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1067 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1069 if (Offset < I->Offset) {
1070 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1075 } else if (Offset == I->Offset) {
1076 // Collision! This can't be merged!
1085 if (MBBI->isDebugValue()) {
1088 // Reach the end of the block, try merging the memory instructions.
1090 } else if (Advance) {
1094 // Reach the end of the block, try merging the memory instructions.
1100 if (NumMemOps > 1) {
1101 // Try to find a free register to use as a new base in case it's needed.
1102 // First advance to the instruction just before the start of the chain.
1103 AdvanceRS(MBB, MemOps);
1104 // Find a scratch register.
1105 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1106 // Process the load / store instructions.
1107 RS->forward(prior(MBBI));
1111 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1112 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1114 // Try folding preceeding/trailing base inc/dec into the generated
1116 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1117 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1119 NumMerges += Merges.size();
1121 // Try folding preceeding/trailing base inc/dec into those load/store
1122 // that were not merged to form LDM/STM ops.
1123 for (unsigned i = 0; i != NumMemOps; ++i)
1124 if (!MemOps[i].Merged)
1125 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1128 // RS may be pointing to an instruction that's deleted.
1129 RS->skipTo(prior(MBBI));
1130 } else if (NumMemOps == 1) {
1131 // Try folding preceeding/trailing base inc/dec into the single
1133 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1135 RS->forward(prior(MBBI));
1142 CurrPred = ARMCC::AL;
1149 // If iterator hasn't been advanced and this is not a memory op, skip it.
1150 // It can't start a new chain anyway.
1151 if (!Advance && !isMemOp && MBBI != E) {
1157 return NumMerges > 0;
1161 struct OffsetCompare {
1162 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1163 int LOffset = getMemoryOpOffset(LHS);
1164 int ROffset = getMemoryOpOffset(RHS);
1165 assert(LHS == RHS || LOffset != ROffset);
1166 return LOffset > ROffset;
1171 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1172 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1173 /// directly restore the value of LR into pc.
1174 /// ldmfd sp!, {..., lr}
1177 /// ldmfd sp!, {..., lr}
1180 /// ldmfd sp!, {..., pc}
1181 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1182 if (MBB.empty()) return false;
1184 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1185 if (MBBI != MBB.begin() &&
1186 (MBBI->getOpcode() == ARM::BX_RET ||
1187 MBBI->getOpcode() == ARM::tBX_RET ||
1188 MBBI->getOpcode() == ARM::MOVPCLR)) {
1189 MachineInstr *PrevMI = prior(MBBI);
1190 if (PrevMI->getOpcode() == ARM::LDM_UPD ||
1191 PrevMI->getOpcode() == ARM::t2LDM_UPD) {
1192 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1193 if (MO.getReg() != ARM::LR)
1195 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1196 PrevMI->setDesc(TII->get(NewOpc));
1205 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1206 const TargetMachine &TM = Fn.getTarget();
1207 AFI = Fn.getInfo<ARMFunctionInfo>();
1208 TII = TM.getInstrInfo();
1209 TRI = TM.getRegisterInfo();
1210 RS = new RegScavenger();
1211 isThumb2 = AFI->isThumb2Function();
1213 bool Modified = false;
1214 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1216 MachineBasicBlock &MBB = *MFI;
1217 Modified |= LoadStoreMultipleOpti(MBB);
1218 Modified |= MergeReturnIntoLDM(MBB);
1226 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1227 /// load / stores from consecutive locations close to make it more
1228 /// likely they will be combined later.
1231 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1233 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1235 const TargetData *TD;
1236 const TargetInstrInfo *TII;
1237 const TargetRegisterInfo *TRI;
1238 const ARMSubtarget *STI;
1239 MachineRegisterInfo *MRI;
1240 MachineFunction *MF;
1242 virtual bool runOnMachineFunction(MachineFunction &Fn);
1244 virtual const char *getPassName() const {
1245 return "ARM pre- register allocation load / store optimization pass";
1249 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1250 unsigned &NewOpc, unsigned &EvenReg,
1251 unsigned &OddReg, unsigned &BaseReg,
1252 unsigned &OffReg, int &Offset,
1253 unsigned &PredReg, ARMCC::CondCodes &Pred,
1255 bool RescheduleOps(MachineBasicBlock *MBB,
1256 SmallVector<MachineInstr*, 4> &Ops,
1257 unsigned Base, bool isLd,
1258 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1259 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1261 char ARMPreAllocLoadStoreOpt::ID = 0;
1264 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1265 TD = Fn.getTarget().getTargetData();
1266 TII = Fn.getTarget().getInstrInfo();
1267 TRI = Fn.getTarget().getRegisterInfo();
1268 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1269 MRI = &Fn.getRegInfo();
1272 bool Modified = false;
1273 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1275 Modified |= RescheduleLoadStoreInstrs(MFI);
1280 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1281 MachineBasicBlock::iterator I,
1282 MachineBasicBlock::iterator E,
1283 SmallPtrSet<MachineInstr*, 4> &MemOps,
1284 SmallSet<unsigned, 4> &MemRegs,
1285 const TargetRegisterInfo *TRI) {
1286 // Are there stores / loads / calls between them?
1287 // FIXME: This is overly conservative. We should make use of alias information
1289 SmallSet<unsigned, 4> AddedRegPressure;
1291 if (I->isDebugValue() || MemOps.count(&*I))
1293 const TargetInstrDesc &TID = I->getDesc();
1294 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1296 if (isLd && TID.mayStore())
1301 // It's not safe to move the first 'str' down.
1304 // str r4, [r0, #+4]
1308 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1309 MachineOperand &MO = I->getOperand(j);
1312 unsigned Reg = MO.getReg();
1313 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1315 if (Reg != Base && !MemRegs.count(Reg))
1316 AddedRegPressure.insert(Reg);
1320 // Estimate register pressure increase due to the transformation.
1321 if (MemRegs.size() <= 4)
1322 // Ok if we are moving small number of instructions.
1324 return AddedRegPressure.size() <= MemRegs.size() * 2;
1328 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1330 unsigned &NewOpc, unsigned &EvenReg,
1331 unsigned &OddReg, unsigned &BaseReg,
1332 unsigned &OffReg, int &Offset,
1334 ARMCC::CondCodes &Pred,
1336 // Make sure we're allowed to generate LDRD/STRD.
1337 if (!STI->hasV5TEOps())
1340 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1342 unsigned Opcode = Op0->getOpcode();
1343 if (Opcode == ARM::LDR)
1345 else if (Opcode == ARM::STR)
1347 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1348 NewOpc = ARM::t2LDRDi8;
1351 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1352 NewOpc = ARM::t2STRDi8;
1358 // Make sure the offset registers match.
1360 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1363 // Must sure the base address satisfies i64 ld / st alignment requirement.
1364 if (!Op0->hasOneMemOperand() ||
1365 !(*Op0->memoperands_begin())->getValue() ||
1366 (*Op0->memoperands_begin())->isVolatile())
1369 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1370 const Function *Func = MF->getFunction();
1371 unsigned ReqAlign = STI->hasV6Ops()
1372 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1373 : 8; // Pre-v6 need 8-byte align
1374 if (Align < ReqAlign)
1377 // Then make sure the immediate offset fits.
1378 int OffImm = getMemoryOpOffset(Op0);
1382 // Can't fall back to t2LDRi8 / t2STRi8.
1385 int Limit = (1 << 8) * Scale;
1386 if (OffImm >= Limit || (OffImm & (Scale-1)))
1391 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1393 AddSub = ARM_AM::sub;
1396 int Limit = (1 << 8) * Scale;
1397 if (OffImm >= Limit || (OffImm & (Scale-1)))
1399 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1401 EvenReg = Op0->getOperand(0).getReg();
1402 OddReg = Op1->getOperand(0).getReg();
1403 if (EvenReg == OddReg)
1405 BaseReg = Op0->getOperand(1).getReg();
1407 OffReg = Op0->getOperand(2).getReg();
1408 Pred = llvm::getInstrPredicate(Op0, PredReg);
1409 dl = Op0->getDebugLoc();
1413 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1414 SmallVector<MachineInstr*, 4> &Ops,
1415 unsigned Base, bool isLd,
1416 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1417 bool RetVal = false;
1419 // Sort by offset (in reverse order).
1420 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1422 // The loads / stores of the same base are in order. Scan them from first to
1423 // last and check for the following:
1424 // 1. Any def of base.
1426 while (Ops.size() > 1) {
1427 unsigned FirstLoc = ~0U;
1428 unsigned LastLoc = 0;
1429 MachineInstr *FirstOp = 0;
1430 MachineInstr *LastOp = 0;
1432 unsigned LastOpcode = 0;
1433 unsigned LastBytes = 0;
1434 unsigned NumMove = 0;
1435 for (int i = Ops.size() - 1; i >= 0; --i) {
1436 MachineInstr *Op = Ops[i];
1437 unsigned Loc = MI2LocMap[Op];
1438 if (Loc <= FirstLoc) {
1442 if (Loc >= LastLoc) {
1447 unsigned Opcode = Op->getOpcode();
1448 if (LastOpcode && Opcode != LastOpcode)
1451 int Offset = getMemoryOpOffset(Op);
1452 unsigned Bytes = getLSMultipleTransferSize(Op);
1454 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1457 LastOffset = Offset;
1459 LastOpcode = Opcode;
1460 if (++NumMove == 8) // FIXME: Tune this limit.
1467 SmallPtrSet<MachineInstr*, 4> MemOps;
1468 SmallSet<unsigned, 4> MemRegs;
1469 for (int i = NumMove-1; i >= 0; --i) {
1470 MemOps.insert(Ops[i]);
1471 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1474 // Be conservative, if the instructions are too far apart, don't
1475 // move them. We want to limit the increase of register pressure.
1476 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1478 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1479 MemOps, MemRegs, TRI);
1481 for (unsigned i = 0; i != NumMove; ++i)
1484 // This is the new location for the loads / stores.
1485 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1486 while (InsertPos != MBB->end()
1487 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1490 // If we are moving a pair of loads / stores, see if it makes sense
1491 // to try to allocate a pair of registers that can form register pairs.
1492 MachineInstr *Op0 = Ops.back();
1493 MachineInstr *Op1 = Ops[Ops.size()-2];
1494 unsigned EvenReg = 0, OddReg = 0;
1495 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1496 ARMCC::CondCodes Pred = ARMCC::AL;
1498 unsigned NewOpc = 0;
1501 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1502 EvenReg, OddReg, BaseReg, OffReg,
1503 Offset, PredReg, Pred, isT2)) {
1507 // Form the pair instruction.
1509 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1510 dl, TII->get(NewOpc))
1511 .addReg(EvenReg, RegState::Define)
1512 .addReg(OddReg, RegState::Define)
1516 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1519 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1520 dl, TII->get(NewOpc))
1526 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1532 // Add register allocation hints to form register pairs.
1533 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1534 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1536 for (unsigned i = 0; i != NumMove; ++i) {
1537 MachineInstr *Op = Ops.back();
1539 MBB->splice(InsertPos, MBB, Op);
1543 NumLdStMoved += NumMove;
1553 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1554 bool RetVal = false;
1556 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1557 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1558 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1559 SmallVector<unsigned, 4> LdBases;
1560 SmallVector<unsigned, 4> StBases;
1563 MachineBasicBlock::iterator MBBI = MBB->begin();
1564 MachineBasicBlock::iterator E = MBB->end();
1566 for (; MBBI != E; ++MBBI) {
1567 MachineInstr *MI = MBBI;
1568 const TargetInstrDesc &TID = MI->getDesc();
1569 if (TID.isCall() || TID.isTerminator()) {
1570 // Stop at barriers.
1575 if (!MI->isDebugValue())
1576 MI2LocMap[MI] = ++Loc;
1578 if (!isMemoryOp(MI))
1580 unsigned PredReg = 0;
1581 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1584 int Opc = MI->getOpcode();
1585 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1586 unsigned Base = MI->getOperand(1).getReg();
1587 int Offset = getMemoryOpOffset(MI);
1589 bool StopHere = false;
1591 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1592 Base2LdsMap.find(Base);
1593 if (BI != Base2LdsMap.end()) {
1594 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1595 if (Offset == getMemoryOpOffset(BI->second[i])) {
1601 BI->second.push_back(MI);
1603 SmallVector<MachineInstr*, 4> MIs;
1605 Base2LdsMap[Base] = MIs;
1606 LdBases.push_back(Base);
1609 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1610 Base2StsMap.find(Base);
1611 if (BI != Base2StsMap.end()) {
1612 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1613 if (Offset == getMemoryOpOffset(BI->second[i])) {
1619 BI->second.push_back(MI);
1621 SmallVector<MachineInstr*, 4> MIs;
1623 Base2StsMap[Base] = MIs;
1624 StBases.push_back(Base);
1629 // Found a duplicate (a base+offset combination that's seen earlier).
1636 // Re-schedule loads.
1637 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1638 unsigned Base = LdBases[i];
1639 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1641 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1644 // Re-schedule stores.
1645 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1646 unsigned Base = StBases[i];
1647 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1649 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1653 Base2LdsMap.clear();
1654 Base2StsMap.clear();
1664 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1665 /// optimization pass.
1666 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1668 return new ARMPreAllocLoadStoreOpt();
1669 return new ARMLoadStoreOpt();