1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
80 MachineBasicBlock::iterator MBBI;
82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
83 MachineBasicBlock::iterator i)
84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
87 typedef MemOpQueue::iterator MemOpQueueIter;
89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
90 int Offset, unsigned Base, bool BaseKill, int Opcode,
91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
93 void MergeOpsUpdate(MachineBasicBlock &MBB,
102 ARMCC::CondCodes Pred,
106 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
108 int Opcode, unsigned Size,
109 ARMCC::CondCodes Pred, unsigned PredReg,
110 unsigned Scratch, MemOpQueue &MemOps,
111 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
115 MachineBasicBlock::iterator &MBBI);
116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
117 MachineBasicBlock::iterator MBBI,
118 const TargetInstrInfo *TII,
120 MachineBasicBlock::iterator &I);
121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator MBBI,
124 MachineBasicBlock::iterator &I);
125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128 char ARMLoadStoreOpt::ID = 0;
131 static int getLoadStoreMultipleOpcode(int Opcode) {
159 default: llvm_unreachable("Unhandled opcode!");
164 static bool isT2i32Load(unsigned Opc) {
165 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
168 static bool isi32Load(unsigned Opc) {
169 return Opc == ARM::LDR || isT2i32Load(Opc);
172 static bool isT2i32Store(unsigned Opc) {
173 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
176 static bool isi32Store(unsigned Opc) {
177 return Opc == ARM::STR || isT2i32Store(Opc);
180 /// MergeOps - Create and insert a LDM or STM with Base as base register and
181 /// registers in Regs as the register operands that would be loaded / stored.
182 /// It returns true if the transformation is done.
184 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
185 MachineBasicBlock::iterator MBBI,
186 int Offset, unsigned Base, bool BaseKill,
187 int Opcode, ARMCC::CondCodes Pred,
188 unsigned PredReg, unsigned Scratch, DebugLoc dl,
189 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
190 // Only a single register to load / store. Don't bother.
191 unsigned NumRegs = Regs.size();
195 ARM_AM::AMSubMode Mode = ARM_AM::ia;
196 // VFP and Thumb2 do not support IB or DA modes.
197 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
198 bool haveIBAndDA = isNotVFP && !isThumb2;
199 if (Offset == 4 && haveIBAndDA)
201 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
203 else if (Offset == -4 * (int)NumRegs && isNotVFP)
204 // VLDM/VSTM do not support DB mode without also updating the base reg.
206 else if (Offset != 0) {
207 // If starting offset isn't zero, insert a MI to materialize a new base.
208 // But only do so if it is cost effective, i.e. merging more than two
214 if (isi32Load(Opcode))
215 // If it is a load, then just use one of the destination register to
216 // use as the new base.
217 NewBase = Regs[NumRegs-1].first;
219 // Use the scratch register to use as a new base.
224 int BaseOpc = !isThumb2
226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
233 int ImmedOffset = isThumb2
234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
235 if (ImmedOffset == -1)
236 // FIXME: Try t2ADDri12 or t2SUBri12?
237 return false; // Probably not worth it then.
239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
241 .addImm(Pred).addReg(PredReg).addReg(0);
243 BaseKill = true; // New base is always killed right its use.
246 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
247 Opcode == ARM::VLDRD);
248 Opcode = getLoadStoreMultipleOpcode(Opcode);
249 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
250 .addReg(Base, getKillRegState(BaseKill))
251 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg);
252 for (unsigned i = 0; i != NumRegs; ++i)
253 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
254 | getKillRegState(Regs[i].second));
259 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
261 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
263 unsigned memOpsBegin, unsigned memOpsEnd,
264 unsigned insertAfter, int Offset,
265 unsigned Base, bool BaseKill,
267 ARMCC::CondCodes Pred, unsigned PredReg,
270 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
271 // First calculate which of the registers should be killed by the merged
273 const unsigned insertPos = memOps[insertAfter].Position;
275 SmallSet<unsigned, 4> UnavailRegs;
276 SmallSet<unsigned, 4> KilledRegs;
277 DenseMap<unsigned, unsigned> Killer;
278 for (unsigned i = 0; i < memOpsBegin; ++i) {
279 if (memOps[i].Position < insertPos && memOps[i].isKill) {
280 unsigned Reg = memOps[i].Reg;
281 if (memOps[i].Merged)
282 UnavailRegs.insert(Reg);
284 KilledRegs.insert(Reg);
289 for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
290 if (memOps[i].Position < insertPos && memOps[i].isKill) {
291 unsigned Reg = memOps[i].Reg;
292 KilledRegs.insert(Reg);
297 SmallVector<std::pair<unsigned, bool>, 8> Regs;
298 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
299 unsigned Reg = memOps[i].Reg;
300 if (UnavailRegs.count(Reg))
301 // Register is killed before and it's not easy / possible to update the
302 // kill marker on already merged instructions. Abort.
305 // If we are inserting the merged operation after an unmerged operation that
306 // uses the same register, make sure to transfer any kill flag.
307 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
308 Regs.push_back(std::make_pair(Reg, isKill));
311 // Try to do the merge.
312 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
314 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
315 Pred, PredReg, Scratch, dl, Regs))
318 // Merge succeeded, update records.
319 Merges.push_back(prior(Loc));
320 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
321 // Remove kill flags from any unmerged memops that come before insertPos.
322 if (Regs[i-memOpsBegin].second) {
323 unsigned Reg = Regs[i-memOpsBegin].first;
324 if (KilledRegs.count(Reg)) {
325 unsigned j = Killer[Reg];
326 memOps[j].MBBI->getOperand(0).setIsKill(false);
327 memOps[j].isKill = false;
330 MBB.erase(memOps[i].MBBI);
331 memOps[i].Merged = true;
335 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
336 /// load / store multiple instructions.
338 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
339 unsigned Base, int Opcode, unsigned Size,
340 ARMCC::CondCodes Pred, unsigned PredReg,
341 unsigned Scratch, MemOpQueue &MemOps,
342 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
343 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
344 int Offset = MemOps[SIndex].Offset;
345 int SOffset = Offset;
346 unsigned insertAfter = SIndex;
347 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
348 DebugLoc dl = Loc->getDebugLoc();
349 const MachineOperand &PMO = Loc->getOperand(0);
350 unsigned PReg = PMO.getReg();
351 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
352 : getARMRegisterNumbering(PReg);
355 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
356 int NewOffset = MemOps[i].Offset;
357 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
358 unsigned Reg = MO.getReg();
359 unsigned RegNum = MO.isUndef() ? UINT_MAX
360 : getARMRegisterNumbering(Reg);
361 // Register numbers must be in ascending order. For VFP, the registers
362 // must also be consecutive and there is a limit of 16 double-word
363 // registers per instruction.
364 if (Reg != ARM::SP &&
365 NewOffset == Offset + (int)Size &&
366 ((isNotVFP && RegNum > PRegNum)
367 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
372 // Can't merge this in. Try merge the earlier ones first.
373 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
374 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
375 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
380 if (MemOps[i].Position > MemOps[insertAfter].Position)
384 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
385 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
386 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
390 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
391 unsigned Bytes, unsigned Limit,
392 ARMCC::CondCodes Pred, unsigned PredReg){
393 unsigned MyPredReg = 0;
396 if (MI->getOpcode() != ARM::t2SUBri &&
397 MI->getOpcode() != ARM::t2SUBrSPi &&
398 MI->getOpcode() != ARM::t2SUBrSPi12 &&
399 MI->getOpcode() != ARM::tSUBspi &&
400 MI->getOpcode() != ARM::SUBri)
403 // Make sure the offset fits in 8 bits.
404 if (Bytes == 0 || (Limit && Bytes >= Limit))
407 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
408 return (MI->getOperand(0).getReg() == Base &&
409 MI->getOperand(1).getReg() == Base &&
410 (MI->getOperand(2).getImm()*Scale) == Bytes &&
411 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
412 MyPredReg == PredReg);
415 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
416 unsigned Bytes, unsigned Limit,
417 ARMCC::CondCodes Pred, unsigned PredReg){
418 unsigned MyPredReg = 0;
421 if (MI->getOpcode() != ARM::t2ADDri &&
422 MI->getOpcode() != ARM::t2ADDrSPi &&
423 MI->getOpcode() != ARM::t2ADDrSPi12 &&
424 MI->getOpcode() != ARM::tADDspi &&
425 MI->getOpcode() != ARM::ADDri)
428 if (Bytes == 0 || (Limit && Bytes >= Limit))
429 // Make sure the offset fits in 8 bits.
432 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
433 return (MI->getOperand(0).getReg() == Base &&
434 MI->getOperand(1).getReg() == Base &&
435 (MI->getOperand(2).getImm()*Scale) == Bytes &&
436 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
437 MyPredReg == PredReg);
440 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
441 switch (MI->getOpcode()) {
461 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
464 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
468 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) {
470 case ARM::LDM: return ARM::LDM_UPD;
471 case ARM::STM: return ARM::STM_UPD;
472 case ARM::t2LDM: return ARM::t2LDM_UPD;
473 case ARM::t2STM: return ARM::t2STM_UPD;
474 case ARM::VLDMS: return ARM::VLDMS_UPD;
475 case ARM::VLDMD: return ARM::VLDMD_UPD;
476 case ARM::VSTMS: return ARM::VSTMS_UPD;
477 case ARM::VSTMD: return ARM::VSTMD_UPD;
478 default: llvm_unreachable("Unhandled opcode!");
483 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
484 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
486 /// stmia rn, <ra, rb, rc>
487 /// rn := rn + 4 * 3;
489 /// stmia rn!, <ra, rb, rc>
491 /// rn := rn - 4 * 3;
492 /// ldmia rn, <ra, rb, rc>
494 /// ldmdb rn!, <ra, rb, rc>
495 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
496 MachineBasicBlock::iterator MBBI,
498 MachineBasicBlock::iterator &I) {
499 MachineInstr *MI = MBBI;
500 unsigned Base = MI->getOperand(0).getReg();
501 bool BaseKill = MI->getOperand(0).isKill();
502 unsigned Bytes = getLSMultipleTransferSize(MI);
503 unsigned PredReg = 0;
504 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
505 int Opcode = MI->getOpcode();
506 DebugLoc dl = MI->getDebugLoc();
508 bool DoMerge = false;
509 ARM_AM::AMSubMode Mode = ARM_AM::ia;
511 // Can't use an updating ld/st if the base register is also a dest
512 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
513 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
514 if (MI->getOperand(i).getReg() == Base)
517 Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
519 // Try merging with the previous instruction.
520 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
521 if (MBBI != BeginMBBI) {
522 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
523 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
525 if (Mode == ARM_AM::ia &&
526 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
529 } else if (Mode == ARM_AM::ib &&
530 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
538 // Try merging with the next instruction.
539 MachineBasicBlock::iterator EndMBBI = MBB.end();
540 if (!DoMerge && MBBI != EndMBBI) {
541 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
542 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
544 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
545 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
547 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
548 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
563 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
564 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
565 .addReg(Base, getDefRegState(true)) // WB base register
566 .addReg(Base, getKillRegState(BaseKill))
567 .addImm(ARM_AM::getAM4ModeImm(Mode))
568 .addImm(Pred).addReg(PredReg);
569 // Transfer the rest of operands.
570 for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
571 MIB.addOperand(MI->getOperand(OpNum));
572 // Transfer memoperands.
573 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
579 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
581 case ARM::LDR: return ARM::LDR_PRE;
582 case ARM::STR: return ARM::STR_PRE;
583 case ARM::VLDRS: return ARM::VLDMS_UPD;
584 case ARM::VLDRD: return ARM::VLDMD_UPD;
585 case ARM::VSTRS: return ARM::VSTMS_UPD;
586 case ARM::VSTRD: return ARM::VSTMD_UPD;
589 return ARM::t2LDR_PRE;
592 return ARM::t2STR_PRE;
593 default: llvm_unreachable("Unhandled opcode!");
598 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
600 case ARM::LDR: return ARM::LDR_POST;
601 case ARM::STR: return ARM::STR_POST;
602 case ARM::VLDRS: return ARM::VLDMS_UPD;
603 case ARM::VLDRD: return ARM::VLDMD_UPD;
604 case ARM::VSTRS: return ARM::VSTMS_UPD;
605 case ARM::VSTRD: return ARM::VSTMD_UPD;
608 return ARM::t2LDR_POST;
611 return ARM::t2STR_POST;
612 default: llvm_unreachable("Unhandled opcode!");
617 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
618 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
619 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
620 MachineBasicBlock::iterator MBBI,
621 const TargetInstrInfo *TII,
623 MachineBasicBlock::iterator &I) {
624 MachineInstr *MI = MBBI;
625 unsigned Base = MI->getOperand(1).getReg();
626 bool BaseKill = MI->getOperand(1).isKill();
627 unsigned Bytes = getLSMultipleTransferSize(MI);
628 int Opcode = MI->getOpcode();
629 DebugLoc dl = MI->getDebugLoc();
630 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
631 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
632 bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR);
633 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
635 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
637 if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
638 if (MI->getOperand(2).getImm() != 0)
641 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
642 // Can't do the merge if the destination register is the same as the would-be
643 // writeback register.
644 if (isLd && MI->getOperand(0).getReg() == Base)
647 unsigned PredReg = 0;
648 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
649 bool DoMerge = false;
650 ARM_AM::AddrOpc AddSub = ARM_AM::add;
652 // AM2 - 12 bits, thumb2 - 8 bits.
653 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
655 // Try merging with the previous instruction.
656 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
657 if (MBBI != BeginMBBI) {
658 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
659 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
661 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
663 AddSub = ARM_AM::sub;
665 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
669 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
674 // Try merging with the next instruction.
675 MachineBasicBlock::iterator EndMBBI = MBB.end();
676 if (!DoMerge && MBBI != EndMBBI) {
677 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
678 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
681 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
683 AddSub = ARM_AM::sub;
684 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
688 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
702 Offset = ARM_AM::getAM4ModeImm(AddSub == ARM_AM::sub ?
703 ARM_AM::db : ARM_AM::ia);
705 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
707 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
710 // VLDM[SD}_UPD, VSTM[SD]_UPD
711 // (There are no base-updating versions of VLDR/VSTR instructions, but the
712 // updating load/store-multiple instructions can be used with only one
714 MachineOperand &MO = MI->getOperand(0);
715 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
716 .addReg(Base, getDefRegState(true)) // WB base register
717 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
719 .addImm(Pred).addReg(PredReg)
720 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
721 getKillRegState(MO.isKill())));
724 // LDR_PRE, LDR_POST,
725 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
726 .addReg(Base, RegState::Define)
727 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
729 // t2LDR_PRE, t2LDR_POST
730 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
731 .addReg(Base, RegState::Define)
732 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
734 MachineOperand &MO = MI->getOperand(0);
737 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
738 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
739 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
741 // t2STR_PRE, t2STR_POST
742 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
743 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
744 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
751 /// isMemoryOp - Returns true if instruction is a memory operations (that this
752 /// pass is capable of operating on).
753 static bool isMemoryOp(const MachineInstr *MI) {
754 // When no memory operands are present, conservatively assume unaligned,
755 // volatile, unfoldable.
756 if (!MI->hasOneMemOperand())
759 const MachineMemOperand *MMO = *MI->memoperands_begin();
761 // Don't touch volatile memory accesses - we may be changing their order.
762 if (MMO->isVolatile())
765 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
767 if (MMO->getAlignment() < 4)
770 // str <undef> could probably be eliminated entirely, but for now we just want
771 // to avoid making a mess of it.
772 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
773 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
774 MI->getOperand(0).isUndef())
777 // Likewise don't mess with references to undefined addresses.
778 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
779 MI->getOperand(1).isUndef())
782 int Opcode = MI->getOpcode();
787 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
790 return MI->getOperand(1).isReg();
793 return MI->getOperand(1).isReg();
798 return MI->getOperand(1).isReg();
803 /// AdvanceRS - Advance register scavenger to just before the earliest memory
804 /// op that is being merged.
805 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
806 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
807 unsigned Position = MemOps[0].Position;
808 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
809 if (MemOps[i].Position < Position) {
810 Position = MemOps[i].Position;
811 Loc = MemOps[i].MBBI;
815 if (Loc != MBB.begin())
816 RS->forward(prior(Loc));
819 static int getMemoryOpOffset(const MachineInstr *MI) {
820 int Opcode = MI->getOpcode();
821 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
822 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
823 unsigned NumOperands = MI->getDesc().getNumOperands();
824 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
826 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
827 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
828 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
832 ? ARM_AM::getAM2Offset(OffField)
833 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
834 : ARM_AM::getAM5Offset(OffField) * 4);
836 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
839 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
842 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
848 static void InsertLDR_STR(MachineBasicBlock &MBB,
849 MachineBasicBlock::iterator &MBBI,
850 int OffImm, bool isDef,
851 DebugLoc dl, unsigned NewOpc,
852 unsigned Reg, bool RegDeadKill, bool RegUndef,
853 unsigned BaseReg, bool BaseKill, bool BaseUndef,
854 unsigned OffReg, bool OffKill, bool OffUndef,
855 ARMCC::CondCodes Pred, unsigned PredReg,
856 const TargetInstrInfo *TII, bool isT2) {
860 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
862 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
865 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
867 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
868 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
870 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
871 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
873 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
875 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
876 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
878 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
879 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
883 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
884 MachineBasicBlock::iterator &MBBI) {
885 MachineInstr *MI = &*MBBI;
886 unsigned Opcode = MI->getOpcode();
887 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
888 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
889 unsigned EvenReg = MI->getOperand(0).getReg();
890 unsigned OddReg = MI->getOperand(1).getReg();
891 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
892 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
893 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
896 MachineBasicBlock::iterator NewBBI = MBBI;
897 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
898 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
899 bool EvenDeadKill = isLd ?
900 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
901 bool EvenUndef = MI->getOperand(0).isUndef();
902 bool OddDeadKill = isLd ?
903 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
904 bool OddUndef = MI->getOperand(1).isUndef();
905 const MachineOperand &BaseOp = MI->getOperand(2);
906 unsigned BaseReg = BaseOp.getReg();
907 bool BaseKill = BaseOp.isKill();
908 bool BaseUndef = BaseOp.isUndef();
909 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
910 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
911 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
912 int OffImm = getMemoryOpOffset(MI);
913 unsigned PredReg = 0;
914 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
916 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
917 // Ascending register numbers and no offset. It's safe to change it to a
919 unsigned NewOpc = (isLd)
920 ? (isT2 ? ARM::t2LDM : ARM::LDM)
921 : (isT2 ? ARM::t2STM : ARM::STM);
923 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
924 .addReg(BaseReg, getKillRegState(BaseKill))
925 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
926 .addImm(Pred).addReg(PredReg)
927 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
928 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
931 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
932 .addReg(BaseReg, getKillRegState(BaseKill))
933 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
934 .addImm(Pred).addReg(PredReg)
936 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
938 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
941 NewBBI = llvm::prior(MBBI);
943 // Split into two instructions.
944 assert((!isT2 || !OffReg) &&
945 "Thumb2 ldrd / strd does not encode offset register!");
946 unsigned NewOpc = (isLd)
947 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
948 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
949 DebugLoc dl = MBBI->getDebugLoc();
950 // If this is a load and base register is killed, it may have been
951 // re-defed by the load, make sure the first load does not clobber it.
953 (BaseKill || OffKill) &&
954 (TRI->regsOverlap(EvenReg, BaseReg) ||
955 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
956 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
957 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
958 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
959 OddReg, OddDeadKill, false,
960 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
961 Pred, PredReg, TII, isT2);
962 NewBBI = llvm::prior(MBBI);
963 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
964 EvenReg, EvenDeadKill, false,
965 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
966 Pred, PredReg, TII, isT2);
968 if (OddReg == EvenReg && EvenDeadKill) {
969 // If the two source operands are the same, the kill marker is
970 // probably on the first one. e.g.
971 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
972 EvenDeadKill = false;
975 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
976 EvenReg, EvenDeadKill, EvenUndef,
977 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
978 Pred, PredReg, TII, isT2);
979 NewBBI = llvm::prior(MBBI);
980 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
981 OddReg, OddDeadKill, OddUndef,
982 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
983 Pred, PredReg, TII, isT2);
998 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
999 /// ops of the same base and incrementing offset into LDM / STM ops.
1000 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1001 unsigned NumMerges = 0;
1002 unsigned NumMemOps = 0;
1004 unsigned CurrBase = 0;
1006 unsigned CurrSize = 0;
1007 ARMCC::CondCodes CurrPred = ARMCC::AL;
1008 unsigned CurrPredReg = 0;
1009 unsigned Position = 0;
1010 SmallVector<MachineBasicBlock::iterator,4> Merges;
1012 RS->enterBasicBlock(&MBB);
1013 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1015 if (FixInvalidRegPairOp(MBB, MBBI))
1018 bool Advance = false;
1019 bool TryMerge = false;
1020 bool Clobber = false;
1022 bool isMemOp = isMemoryOp(MBBI);
1024 int Opcode = MBBI->getOpcode();
1025 unsigned Size = getLSMultipleTransferSize(MBBI);
1026 const MachineOperand &MO = MBBI->getOperand(0);
1027 unsigned Reg = MO.getReg();
1028 bool isKill = MO.isDef() ? false : MO.isKill();
1029 unsigned Base = MBBI->getOperand(1).getReg();
1030 unsigned PredReg = 0;
1031 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1032 int Offset = getMemoryOpOffset(MBBI);
1035 // r5 := ldr [r5, #4]
1036 // r6 := ldr [r5, #8]
1038 // The second ldr has effectively broken the chain even though it
1039 // looks like the later ldr(s) use the same base register. Try to
1040 // merge the ldr's so far, including this one. But don't try to
1041 // combine the following ldr(s).
1042 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1043 if (CurrBase == 0 && !Clobber) {
1044 // Start of a new chain.
1049 CurrPredReg = PredReg;
1050 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1059 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1060 // No need to match PredReg.
1061 // Continue adding to the queue.
1062 if (Offset > MemOps.back().Offset) {
1063 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1068 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1070 if (Offset < I->Offset) {
1071 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1076 } else if (Offset == I->Offset) {
1077 // Collision! This can't be merged!
1086 if (MBBI->isDebugValue()) {
1089 // Reach the end of the block, try merging the memory instructions.
1091 } else if (Advance) {
1095 // Reach the end of the block, try merging the memory instructions.
1101 if (NumMemOps > 1) {
1102 // Try to find a free register to use as a new base in case it's needed.
1103 // First advance to the instruction just before the start of the chain.
1104 AdvanceRS(MBB, MemOps);
1105 // Find a scratch register.
1106 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1107 // Process the load / store instructions.
1108 RS->forward(prior(MBBI));
1112 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1113 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1115 // Try folding preceeding/trailing base inc/dec into the generated
1117 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1118 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1120 NumMerges += Merges.size();
1122 // Try folding preceeding/trailing base inc/dec into those load/store
1123 // that were not merged to form LDM/STM ops.
1124 for (unsigned i = 0; i != NumMemOps; ++i)
1125 if (!MemOps[i].Merged)
1126 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1129 // RS may be pointing to an instruction that's deleted.
1130 RS->skipTo(prior(MBBI));
1131 } else if (NumMemOps == 1) {
1132 // Try folding preceeding/trailing base inc/dec into the single
1134 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1136 RS->forward(prior(MBBI));
1143 CurrPred = ARMCC::AL;
1150 // If iterator hasn't been advanced and this is not a memory op, skip it.
1151 // It can't start a new chain anyway.
1152 if (!Advance && !isMemOp && MBBI != E) {
1158 return NumMerges > 0;
1162 struct OffsetCompare {
1163 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1164 int LOffset = getMemoryOpOffset(LHS);
1165 int ROffset = getMemoryOpOffset(RHS);
1166 assert(LHS == RHS || LOffset != ROffset);
1167 return LOffset > ROffset;
1172 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1173 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1174 /// directly restore the value of LR into pc.
1175 /// ldmfd sp!, {..., lr}
1178 /// ldmfd sp!, {..., lr}
1181 /// ldmfd sp!, {..., pc}
1182 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1183 if (MBB.empty()) return false;
1185 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1186 if (MBBI != MBB.begin() &&
1187 (MBBI->getOpcode() == ARM::BX_RET ||
1188 MBBI->getOpcode() == ARM::tBX_RET ||
1189 MBBI->getOpcode() == ARM::MOVPCLR)) {
1190 MachineInstr *PrevMI = prior(MBBI);
1191 if (PrevMI->getOpcode() == ARM::LDM_UPD ||
1192 PrevMI->getOpcode() == ARM::t2LDM_UPD) {
1193 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1194 if (MO.getReg() != ARM::LR)
1196 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1197 PrevMI->setDesc(TII->get(NewOpc));
1199 PrevMI->copyImplicitOps(&*MBBI);
1207 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1208 const TargetMachine &TM = Fn.getTarget();
1209 AFI = Fn.getInfo<ARMFunctionInfo>();
1210 TII = TM.getInstrInfo();
1211 TRI = TM.getRegisterInfo();
1212 RS = new RegScavenger();
1213 isThumb2 = AFI->isThumb2Function();
1215 bool Modified = false;
1216 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1218 MachineBasicBlock &MBB = *MFI;
1219 Modified |= LoadStoreMultipleOpti(MBB);
1220 Modified |= MergeReturnIntoLDM(MBB);
1228 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1229 /// load / stores from consecutive locations close to make it more
1230 /// likely they will be combined later.
1233 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1235 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1237 const TargetData *TD;
1238 const TargetInstrInfo *TII;
1239 const TargetRegisterInfo *TRI;
1240 const ARMSubtarget *STI;
1241 MachineRegisterInfo *MRI;
1242 MachineFunction *MF;
1244 virtual bool runOnMachineFunction(MachineFunction &Fn);
1246 virtual const char *getPassName() const {
1247 return "ARM pre- register allocation load / store optimization pass";
1251 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1252 unsigned &NewOpc, unsigned &EvenReg,
1253 unsigned &OddReg, unsigned &BaseReg,
1254 unsigned &OffReg, int &Offset,
1255 unsigned &PredReg, ARMCC::CondCodes &Pred,
1257 bool RescheduleOps(MachineBasicBlock *MBB,
1258 SmallVector<MachineInstr*, 4> &Ops,
1259 unsigned Base, bool isLd,
1260 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1261 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1263 char ARMPreAllocLoadStoreOpt::ID = 0;
1266 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1267 TD = Fn.getTarget().getTargetData();
1268 TII = Fn.getTarget().getInstrInfo();
1269 TRI = Fn.getTarget().getRegisterInfo();
1270 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1271 MRI = &Fn.getRegInfo();
1274 bool Modified = false;
1275 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1277 Modified |= RescheduleLoadStoreInstrs(MFI);
1282 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1283 MachineBasicBlock::iterator I,
1284 MachineBasicBlock::iterator E,
1285 SmallPtrSet<MachineInstr*, 4> &MemOps,
1286 SmallSet<unsigned, 4> &MemRegs,
1287 const TargetRegisterInfo *TRI) {
1288 // Are there stores / loads / calls between them?
1289 // FIXME: This is overly conservative. We should make use of alias information
1291 SmallSet<unsigned, 4> AddedRegPressure;
1293 if (I->isDebugValue() || MemOps.count(&*I))
1295 const TargetInstrDesc &TID = I->getDesc();
1296 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1298 if (isLd && TID.mayStore())
1303 // It's not safe to move the first 'str' down.
1306 // str r4, [r0, #+4]
1310 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1311 MachineOperand &MO = I->getOperand(j);
1314 unsigned Reg = MO.getReg();
1315 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1317 if (Reg != Base && !MemRegs.count(Reg))
1318 AddedRegPressure.insert(Reg);
1322 // Estimate register pressure increase due to the transformation.
1323 if (MemRegs.size() <= 4)
1324 // Ok if we are moving small number of instructions.
1326 return AddedRegPressure.size() <= MemRegs.size() * 2;
1330 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1332 unsigned &NewOpc, unsigned &EvenReg,
1333 unsigned &OddReg, unsigned &BaseReg,
1334 unsigned &OffReg, int &Offset,
1336 ARMCC::CondCodes &Pred,
1338 // Make sure we're allowed to generate LDRD/STRD.
1339 if (!STI->hasV5TEOps())
1342 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1344 unsigned Opcode = Op0->getOpcode();
1345 if (Opcode == ARM::LDR)
1347 else if (Opcode == ARM::STR)
1349 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1350 NewOpc = ARM::t2LDRDi8;
1353 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1354 NewOpc = ARM::t2STRDi8;
1360 // Make sure the offset registers match.
1362 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1365 // Must sure the base address satisfies i64 ld / st alignment requirement.
1366 if (!Op0->hasOneMemOperand() ||
1367 !(*Op0->memoperands_begin())->getValue() ||
1368 (*Op0->memoperands_begin())->isVolatile())
1371 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1372 const Function *Func = MF->getFunction();
1373 unsigned ReqAlign = STI->hasV6Ops()
1374 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1375 : 8; // Pre-v6 need 8-byte align
1376 if (Align < ReqAlign)
1379 // Then make sure the immediate offset fits.
1380 int OffImm = getMemoryOpOffset(Op0);
1384 // Can't fall back to t2LDRi8 / t2STRi8.
1387 int Limit = (1 << 8) * Scale;
1388 if (OffImm >= Limit || (OffImm & (Scale-1)))
1393 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1395 AddSub = ARM_AM::sub;
1398 int Limit = (1 << 8) * Scale;
1399 if (OffImm >= Limit || (OffImm & (Scale-1)))
1401 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1403 EvenReg = Op0->getOperand(0).getReg();
1404 OddReg = Op1->getOperand(0).getReg();
1405 if (EvenReg == OddReg)
1407 BaseReg = Op0->getOperand(1).getReg();
1409 OffReg = Op0->getOperand(2).getReg();
1410 Pred = llvm::getInstrPredicate(Op0, PredReg);
1411 dl = Op0->getDebugLoc();
1415 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1416 SmallVector<MachineInstr*, 4> &Ops,
1417 unsigned Base, bool isLd,
1418 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1419 bool RetVal = false;
1421 // Sort by offset (in reverse order).
1422 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1424 // The loads / stores of the same base are in order. Scan them from first to
1425 // last and check for the following:
1426 // 1. Any def of base.
1428 while (Ops.size() > 1) {
1429 unsigned FirstLoc = ~0U;
1430 unsigned LastLoc = 0;
1431 MachineInstr *FirstOp = 0;
1432 MachineInstr *LastOp = 0;
1434 unsigned LastOpcode = 0;
1435 unsigned LastBytes = 0;
1436 unsigned NumMove = 0;
1437 for (int i = Ops.size() - 1; i >= 0; --i) {
1438 MachineInstr *Op = Ops[i];
1439 unsigned Loc = MI2LocMap[Op];
1440 if (Loc <= FirstLoc) {
1444 if (Loc >= LastLoc) {
1449 unsigned Opcode = Op->getOpcode();
1450 if (LastOpcode && Opcode != LastOpcode)
1453 int Offset = getMemoryOpOffset(Op);
1454 unsigned Bytes = getLSMultipleTransferSize(Op);
1456 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1459 LastOffset = Offset;
1461 LastOpcode = Opcode;
1462 if (++NumMove == 8) // FIXME: Tune this limit.
1469 SmallPtrSet<MachineInstr*, 4> MemOps;
1470 SmallSet<unsigned, 4> MemRegs;
1471 for (int i = NumMove-1; i >= 0; --i) {
1472 MemOps.insert(Ops[i]);
1473 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1476 // Be conservative, if the instructions are too far apart, don't
1477 // move them. We want to limit the increase of register pressure.
1478 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1480 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1481 MemOps, MemRegs, TRI);
1483 for (unsigned i = 0; i != NumMove; ++i)
1486 // This is the new location for the loads / stores.
1487 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1488 while (InsertPos != MBB->end()
1489 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1492 // If we are moving a pair of loads / stores, see if it makes sense
1493 // to try to allocate a pair of registers that can form register pairs.
1494 MachineInstr *Op0 = Ops.back();
1495 MachineInstr *Op1 = Ops[Ops.size()-2];
1496 unsigned EvenReg = 0, OddReg = 0;
1497 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1498 ARMCC::CondCodes Pred = ARMCC::AL;
1500 unsigned NewOpc = 0;
1503 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1504 EvenReg, OddReg, BaseReg, OffReg,
1505 Offset, PredReg, Pred, isT2)) {
1509 // Form the pair instruction.
1511 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1512 dl, TII->get(NewOpc))
1513 .addReg(EvenReg, RegState::Define)
1514 .addReg(OddReg, RegState::Define)
1518 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1521 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1522 dl, TII->get(NewOpc))
1528 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1534 // Add register allocation hints to form register pairs.
1535 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1536 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1538 for (unsigned i = 0; i != NumMove; ++i) {
1539 MachineInstr *Op = Ops.back();
1541 MBB->splice(InsertPos, MBB, Op);
1545 NumLdStMoved += NumMove;
1555 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1556 bool RetVal = false;
1558 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1559 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1560 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1561 SmallVector<unsigned, 4> LdBases;
1562 SmallVector<unsigned, 4> StBases;
1565 MachineBasicBlock::iterator MBBI = MBB->begin();
1566 MachineBasicBlock::iterator E = MBB->end();
1568 for (; MBBI != E; ++MBBI) {
1569 MachineInstr *MI = MBBI;
1570 const TargetInstrDesc &TID = MI->getDesc();
1571 if (TID.isCall() || TID.isTerminator()) {
1572 // Stop at barriers.
1577 if (!MI->isDebugValue())
1578 MI2LocMap[MI] = ++Loc;
1580 if (!isMemoryOp(MI))
1582 unsigned PredReg = 0;
1583 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1586 int Opc = MI->getOpcode();
1587 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1588 unsigned Base = MI->getOperand(1).getReg();
1589 int Offset = getMemoryOpOffset(MI);
1591 bool StopHere = false;
1593 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1594 Base2LdsMap.find(Base);
1595 if (BI != Base2LdsMap.end()) {
1596 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1597 if (Offset == getMemoryOpOffset(BI->second[i])) {
1603 BI->second.push_back(MI);
1605 SmallVector<MachineInstr*, 4> MIs;
1607 Base2LdsMap[Base] = MIs;
1608 LdBases.push_back(Base);
1611 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1612 Base2StsMap.find(Base);
1613 if (BI != Base2StsMap.end()) {
1614 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1615 if (Offset == getMemoryOpOffset(BI->second[i])) {
1621 BI->second.push_back(MI);
1623 SmallVector<MachineInstr*, 4> MIs;
1625 Base2StsMap[Base] = MIs;
1626 StBases.push_back(Base);
1631 // Found a duplicate (a base+offset combination that's seen earlier).
1638 // Re-schedule loads.
1639 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1640 unsigned Base = LdBases[i];
1641 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1643 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1646 // Re-schedule stores.
1647 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1648 unsigned Base = StBases[i];
1649 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1651 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1655 Base2LdsMap.clear();
1656 Base2StsMap.clear();
1666 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1667 /// optimization pass.
1668 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1670 return new ARMPreAllocLoadStoreOpt();
1671 return new ARMLoadStoreOpt();