1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
80 MachineBasicBlock::iterator MBBI;
82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
83 MachineBasicBlock::iterator i)
84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
87 typedef MemOpQueue::iterator MemOpQueueIter;
89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
90 int Offset, unsigned Base, bool BaseKill, int Opcode,
91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
93 void MergeOpsUpdate(MachineBasicBlock &MBB,
102 ARMCC::CondCodes Pred,
106 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
108 int Opcode, unsigned Size,
109 ARMCC::CondCodes Pred, unsigned PredReg,
110 unsigned Scratch, MemOpQueue &MemOps,
111 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
115 MachineBasicBlock::iterator &MBBI);
116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
117 MachineBasicBlock::iterator MBBI,
118 const TargetInstrInfo *TII,
120 MachineBasicBlock::iterator &I);
121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator MBBI,
124 MachineBasicBlock::iterator &I);
125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128 char ARMLoadStoreOpt::ID = 0;
131 static int getLoadStoreMultipleOpcode(int Opcode) {
159 default: llvm_unreachable("Unhandled opcode!");
164 static bool isT2i32Load(unsigned Opc) {
165 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
168 static bool isi32Load(unsigned Opc) {
169 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
172 static bool isT2i32Store(unsigned Opc) {
173 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
176 static bool isi32Store(unsigned Opc) {
177 return Opc == ARM::STRi12 || isT2i32Store(Opc);
180 /// MergeOps - Create and insert a LDM or STM with Base as base register and
181 /// registers in Regs as the register operands that would be loaded / stored.
182 /// It returns true if the transformation is done.
184 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
185 MachineBasicBlock::iterator MBBI,
186 int Offset, unsigned Base, bool BaseKill,
187 int Opcode, ARMCC::CondCodes Pred,
188 unsigned PredReg, unsigned Scratch, DebugLoc dl,
189 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
190 // Only a single register to load / store. Don't bother.
191 unsigned NumRegs = Regs.size();
195 ARM_AM::AMSubMode Mode = ARM_AM::ia;
196 // VFP and Thumb2 do not support IB or DA modes.
197 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
198 bool haveIBAndDA = isNotVFP && !isThumb2;
199 if (Offset == 4 && haveIBAndDA)
201 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
203 else if (Offset == -4 * (int)NumRegs && isNotVFP)
204 // VLDM/VSTM do not support DB mode without also updating the base reg.
206 else if (Offset != 0) {
207 // If starting offset isn't zero, insert a MI to materialize a new base.
208 // But only do so if it is cost effective, i.e. merging more than two
214 if (isi32Load(Opcode))
215 // If it is a load, then just use one of the destination register to
216 // use as the new base.
217 NewBase = Regs[NumRegs-1].first;
219 // Use the scratch register to use as a new base.
224 int BaseOpc = !isThumb2
226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
233 int ImmedOffset = isThumb2
234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
235 if (ImmedOffset == -1)
236 // FIXME: Try t2ADDri12 or t2SUBri12?
237 return false; // Probably not worth it then.
239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
241 .addImm(Pred).addReg(PredReg).addReg(0);
243 BaseKill = true; // New base is always killed right its use.
246 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
247 Opcode == ARM::VLDRD);
248 Opcode = getLoadStoreMultipleOpcode(Opcode);
249 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
250 .addReg(Base, getKillRegState(BaseKill))
251 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg);
252 for (unsigned i = 0; i != NumRegs; ++i)
253 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
254 | getKillRegState(Regs[i].second));
259 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
261 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
263 unsigned memOpsBegin, unsigned memOpsEnd,
264 unsigned insertAfter, int Offset,
265 unsigned Base, bool BaseKill,
267 ARMCC::CondCodes Pred, unsigned PredReg,
270 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
271 // First calculate which of the registers should be killed by the merged
273 const unsigned insertPos = memOps[insertAfter].Position;
275 SmallSet<unsigned, 4> UnavailRegs;
276 SmallSet<unsigned, 4> KilledRegs;
277 DenseMap<unsigned, unsigned> Killer;
278 for (unsigned i = 0; i < memOpsBegin; ++i) {
279 if (memOps[i].Position < insertPos && memOps[i].isKill) {
280 unsigned Reg = memOps[i].Reg;
281 if (memOps[i].Merged)
282 UnavailRegs.insert(Reg);
284 KilledRegs.insert(Reg);
289 for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
290 if (memOps[i].Position < insertPos && memOps[i].isKill) {
291 unsigned Reg = memOps[i].Reg;
292 KilledRegs.insert(Reg);
297 SmallVector<std::pair<unsigned, bool>, 8> Regs;
298 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
299 unsigned Reg = memOps[i].Reg;
300 if (UnavailRegs.count(Reg))
301 // Register is killed before and it's not easy / possible to update the
302 // kill marker on already merged instructions. Abort.
305 // If we are inserting the merged operation after an unmerged operation that
306 // uses the same register, make sure to transfer any kill flag.
307 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
308 Regs.push_back(std::make_pair(Reg, isKill));
311 // Try to do the merge.
312 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
314 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
315 Pred, PredReg, Scratch, dl, Regs))
318 // Merge succeeded, update records.
319 Merges.push_back(prior(Loc));
320 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
321 // Remove kill flags from any unmerged memops that come before insertPos.
322 if (Regs[i-memOpsBegin].second) {
323 unsigned Reg = Regs[i-memOpsBegin].first;
324 if (KilledRegs.count(Reg)) {
325 unsigned j = Killer[Reg];
326 memOps[j].MBBI->getOperand(0).setIsKill(false);
327 memOps[j].isKill = false;
330 MBB.erase(memOps[i].MBBI);
331 memOps[i].Merged = true;
335 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
336 /// load / store multiple instructions.
338 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
339 unsigned Base, int Opcode, unsigned Size,
340 ARMCC::CondCodes Pred, unsigned PredReg,
341 unsigned Scratch, MemOpQueue &MemOps,
342 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
343 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
344 int Offset = MemOps[SIndex].Offset;
345 int SOffset = Offset;
346 unsigned insertAfter = SIndex;
347 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
348 DebugLoc dl = Loc->getDebugLoc();
349 const MachineOperand &PMO = Loc->getOperand(0);
350 unsigned PReg = PMO.getReg();
351 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
352 : getARMRegisterNumbering(PReg);
355 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
356 int NewOffset = MemOps[i].Offset;
357 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
358 unsigned Reg = MO.getReg();
359 unsigned RegNum = MO.isUndef() ? UINT_MAX
360 : getARMRegisterNumbering(Reg);
361 // Register numbers must be in ascending order. For VFP, the registers
362 // must also be consecutive and there is a limit of 16 double-word
363 // registers per instruction.
364 if (Reg != ARM::SP &&
365 NewOffset == Offset + (int)Size &&
366 ((isNotVFP && RegNum > PRegNum)
367 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
372 // Can't merge this in. Try merge the earlier ones first.
373 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
374 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
375 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
380 if (MemOps[i].Position > MemOps[insertAfter].Position)
384 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
385 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
386 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
390 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
391 unsigned Bytes, unsigned Limit,
392 ARMCC::CondCodes Pred, unsigned PredReg){
393 unsigned MyPredReg = 0;
396 if (MI->getOpcode() != ARM::t2SUBri &&
397 MI->getOpcode() != ARM::t2SUBrSPi &&
398 MI->getOpcode() != ARM::t2SUBrSPi12 &&
399 MI->getOpcode() != ARM::tSUBspi &&
400 MI->getOpcode() != ARM::SUBri)
403 // Make sure the offset fits in 8 bits.
404 if (Bytes == 0 || (Limit && Bytes >= Limit))
407 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
408 return (MI->getOperand(0).getReg() == Base &&
409 MI->getOperand(1).getReg() == Base &&
410 (MI->getOperand(2).getImm()*Scale) == Bytes &&
411 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
412 MyPredReg == PredReg);
415 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
416 unsigned Bytes, unsigned Limit,
417 ARMCC::CondCodes Pred, unsigned PredReg){
418 unsigned MyPredReg = 0;
421 if (MI->getOpcode() != ARM::t2ADDri &&
422 MI->getOpcode() != ARM::t2ADDrSPi &&
423 MI->getOpcode() != ARM::t2ADDrSPi12 &&
424 MI->getOpcode() != ARM::tADDspi &&
425 MI->getOpcode() != ARM::ADDri)
428 if (Bytes == 0 || (Limit && Bytes >= Limit))
429 // Make sure the offset fits in 8 bits.
432 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
433 return (MI->getOperand(0).getReg() == Base &&
434 MI->getOperand(1).getReg() == Base &&
435 (MI->getOperand(2).getImm()*Scale) == Bytes &&
436 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
437 MyPredReg == PredReg);
440 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
441 switch (MI->getOpcode()) {
461 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
464 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
468 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) {
470 case ARM::LDM: return ARM::LDM_UPD;
471 case ARM::STM: return ARM::STM_UPD;
472 case ARM::t2LDM: return ARM::t2LDM_UPD;
473 case ARM::t2STM: return ARM::t2STM_UPD;
474 case ARM::VLDMS: return ARM::VLDMS_UPD;
475 case ARM::VLDMD: return ARM::VLDMD_UPD;
476 case ARM::VSTMS: return ARM::VSTMS_UPD;
477 case ARM::VSTMD: return ARM::VSTMD_UPD;
478 default: llvm_unreachable("Unhandled opcode!");
483 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
484 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
486 /// stmia rn, <ra, rb, rc>
487 /// rn := rn + 4 * 3;
489 /// stmia rn!, <ra, rb, rc>
491 /// rn := rn - 4 * 3;
492 /// ldmia rn, <ra, rb, rc>
494 /// ldmdb rn!, <ra, rb, rc>
495 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
496 MachineBasicBlock::iterator MBBI,
498 MachineBasicBlock::iterator &I) {
499 MachineInstr *MI = MBBI;
500 unsigned Base = MI->getOperand(0).getReg();
501 bool BaseKill = MI->getOperand(0).isKill();
502 unsigned Bytes = getLSMultipleTransferSize(MI);
503 unsigned PredReg = 0;
504 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
505 int Opcode = MI->getOpcode();
506 DebugLoc dl = MI->getDebugLoc();
508 bool DoMerge = false;
509 ARM_AM::AMSubMode Mode = ARM_AM::ia;
511 // Can't use an updating ld/st if the base register is also a dest
512 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
513 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
514 if (MI->getOperand(i).getReg() == Base)
517 Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
519 // Try merging with the previous instruction.
520 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
521 if (MBBI != BeginMBBI) {
522 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
523 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
525 if (Mode == ARM_AM::ia &&
526 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
529 } else if (Mode == ARM_AM::ib &&
530 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
538 // Try merging with the next instruction.
539 MachineBasicBlock::iterator EndMBBI = MBB.end();
540 if (!DoMerge && MBBI != EndMBBI) {
541 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
542 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
544 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
545 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
547 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
548 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
563 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
564 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
565 .addReg(Base, getDefRegState(true)) // WB base register
566 .addReg(Base, getKillRegState(BaseKill))
567 .addImm(ARM_AM::getAM4ModeImm(Mode))
568 .addImm(Pred).addReg(PredReg);
569 // Transfer the rest of operands.
570 for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
571 MIB.addOperand(MI->getOperand(OpNum));
572 // Transfer memoperands.
573 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
579 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
581 case ARM::LDRi12: return ARM::LDR_PRE;
582 case ARM::STRi12: return ARM::STR_PRE;
583 case ARM::VLDRS: return ARM::VLDMS_UPD;
584 case ARM::VLDRD: return ARM::VLDMD_UPD;
585 case ARM::VSTRS: return ARM::VSTMS_UPD;
586 case ARM::VSTRD: return ARM::VSTMD_UPD;
589 return ARM::t2LDR_PRE;
592 return ARM::t2STR_PRE;
593 default: llvm_unreachable("Unhandled opcode!");
598 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
600 case ARM::LDRi12: return ARM::LDR_POST;
601 case ARM::STRi12: return ARM::STR_POST;
602 case ARM::VLDRS: return ARM::VLDMS_UPD;
603 case ARM::VLDRD: return ARM::VLDMD_UPD;
604 case ARM::VSTRS: return ARM::VSTMS_UPD;
605 case ARM::VSTRD: return ARM::VSTMD_UPD;
608 return ARM::t2LDR_POST;
611 return ARM::t2STR_POST;
612 default: llvm_unreachable("Unhandled opcode!");
617 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
618 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
619 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
620 MachineBasicBlock::iterator MBBI,
621 const TargetInstrInfo *TII,
623 MachineBasicBlock::iterator &I) {
624 MachineInstr *MI = MBBI;
625 unsigned Base = MI->getOperand(1).getReg();
626 bool BaseKill = MI->getOperand(1).isKill();
627 unsigned Bytes = getLSMultipleTransferSize(MI);
628 int Opcode = MI->getOpcode();
629 DebugLoc dl = MI->getDebugLoc();
630 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
631 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
632 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
633 if (isi32Load(Opcode) || isi32Store(Opcode))
634 if (MI->getOperand(2).getImm() != 0)
636 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
639 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
640 // Can't do the merge if the destination register is the same as the would-be
641 // writeback register.
642 if (isLd && MI->getOperand(0).getReg() == Base)
645 unsigned PredReg = 0;
646 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
647 bool DoMerge = false;
648 ARM_AM::AddrOpc AddSub = ARM_AM::add;
650 // AM2 - 12 bits, thumb2 - 8 bits.
651 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
653 // Try merging with the previous instruction.
654 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
655 if (MBBI != BeginMBBI) {
656 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
657 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
659 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
661 AddSub = ARM_AM::sub;
663 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
667 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
672 // Try merging with the next instruction.
673 MachineBasicBlock::iterator EndMBBI = MBB.end();
674 if (!DoMerge && MBBI != EndMBBI) {
675 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
676 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
679 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
681 AddSub = ARM_AM::sub;
682 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
686 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
700 Offset = ARM_AM::getAM4ModeImm(AddSub == ARM_AM::sub ?
701 ARM_AM::db : ARM_AM::ia);
703 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
705 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
708 // VLDM[SD}_UPD, VSTM[SD]_UPD
709 // (There are no base-updating versions of VLDR/VSTR instructions, but the
710 // updating load/store-multiple instructions can be used with only one
712 MachineOperand &MO = MI->getOperand(0);
713 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
714 .addReg(Base, getDefRegState(true)) // WB base register
715 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
717 .addImm(Pred).addReg(PredReg)
718 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
719 getKillRegState(MO.isKill())));
722 // LDR_PRE, LDR_POST,
723 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
724 .addReg(Base, RegState::Define)
725 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
727 // t2LDR_PRE, t2LDR_POST
728 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
729 .addReg(Base, RegState::Define)
730 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
732 MachineOperand &MO = MI->getOperand(0);
735 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
736 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
737 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
739 // t2STR_PRE, t2STR_POST
740 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
741 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
742 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
749 /// isMemoryOp - Returns true if instruction is a memory operations (that this
750 /// pass is capable of operating on).
751 static bool isMemoryOp(const MachineInstr *MI) {
752 // When no memory operands are present, conservatively assume unaligned,
753 // volatile, unfoldable.
754 if (!MI->hasOneMemOperand())
757 const MachineMemOperand *MMO = *MI->memoperands_begin();
759 // Don't touch volatile memory accesses - we may be changing their order.
760 if (MMO->isVolatile())
763 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
765 if (MMO->getAlignment() < 4)
768 // str <undef> could probably be eliminated entirely, but for now we just want
769 // to avoid making a mess of it.
770 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
771 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
772 MI->getOperand(0).isUndef())
775 // Likewise don't mess with references to undefined addresses.
776 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
777 MI->getOperand(1).isUndef())
780 int Opcode = MI->getOpcode();
785 return MI->getOperand(1).isReg();
788 return MI->getOperand(1).isReg();
795 return MI->getOperand(1).isReg();
800 /// AdvanceRS - Advance register scavenger to just before the earliest memory
801 /// op that is being merged.
802 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
803 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
804 unsigned Position = MemOps[0].Position;
805 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
806 if (MemOps[i].Position < Position) {
807 Position = MemOps[i].Position;
808 Loc = MemOps[i].MBBI;
812 if (Loc != MBB.begin())
813 RS->forward(prior(Loc));
816 static int getMemoryOpOffset(const MachineInstr *MI) {
817 int Opcode = MI->getOpcode();
818 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
819 unsigned NumOperands = MI->getDesc().getNumOperands();
820 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
822 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
823 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
824 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
825 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
828 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
829 : ARM_AM::getAM5Offset(OffField) * 4;
831 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
834 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
840 static void InsertLDR_STR(MachineBasicBlock &MBB,
841 MachineBasicBlock::iterator &MBBI,
842 int Offset, bool isDef,
843 DebugLoc dl, unsigned NewOpc,
844 unsigned Reg, bool RegDeadKill, bool RegUndef,
845 unsigned BaseReg, bool BaseKill, bool BaseUndef,
846 bool OffKill, bool OffUndef,
847 ARMCC::CondCodes Pred, unsigned PredReg,
848 const TargetInstrInfo *TII, bool isT2) {
850 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
852 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
853 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
854 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
856 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
858 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
859 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
860 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
864 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
865 MachineBasicBlock::iterator &MBBI) {
866 MachineInstr *MI = &*MBBI;
867 unsigned Opcode = MI->getOpcode();
868 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
869 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
870 unsigned EvenReg = MI->getOperand(0).getReg();
871 unsigned OddReg = MI->getOperand(1).getReg();
872 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
873 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
874 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
877 MachineBasicBlock::iterator NewBBI = MBBI;
878 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
879 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
880 bool EvenDeadKill = isLd ?
881 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
882 bool EvenUndef = MI->getOperand(0).isUndef();
883 bool OddDeadKill = isLd ?
884 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
885 bool OddUndef = MI->getOperand(1).isUndef();
886 const MachineOperand &BaseOp = MI->getOperand(2);
887 unsigned BaseReg = BaseOp.getReg();
888 bool BaseKill = BaseOp.isKill();
889 bool BaseUndef = BaseOp.isUndef();
890 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
891 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
892 int OffImm = getMemoryOpOffset(MI);
893 unsigned PredReg = 0;
894 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
896 if (OddRegNum > EvenRegNum && OffImm == 0) {
897 // Ascending register numbers and no offset. It's safe to change it to a
899 unsigned NewOpc = (isLd)
900 ? (isT2 ? ARM::t2LDM : ARM::LDM)
901 : (isT2 ? ARM::t2STM : ARM::STM);
903 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
904 .addReg(BaseReg, getKillRegState(BaseKill))
905 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
906 .addImm(Pred).addReg(PredReg)
907 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
908 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
911 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
912 .addReg(BaseReg, getKillRegState(BaseKill))
913 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
914 .addImm(Pred).addReg(PredReg)
916 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
918 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
921 NewBBI = llvm::prior(MBBI);
923 // Split into two instructions.
924 unsigned NewOpc = (isLd)
925 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
926 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
927 DebugLoc dl = MBBI->getDebugLoc();
928 // If this is a load and base register is killed, it may have been
929 // re-defed by the load, make sure the first load does not clobber it.
931 (BaseKill || OffKill) &&
932 (TRI->regsOverlap(EvenReg, BaseReg))) {
933 assert(!TRI->regsOverlap(OddReg, BaseReg));
934 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
935 OddReg, OddDeadKill, false,
936 BaseReg, false, BaseUndef, false, OffUndef,
937 Pred, PredReg, TII, isT2);
938 NewBBI = llvm::prior(MBBI);
939 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
940 EvenReg, EvenDeadKill, false,
941 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
942 Pred, PredReg, TII, isT2);
944 if (OddReg == EvenReg && EvenDeadKill) {
945 // If the two source operands are the same, the kill marker is
946 // probably on the first one. e.g.
947 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
948 EvenDeadKill = false;
951 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
952 EvenReg, EvenDeadKill, EvenUndef,
953 BaseReg, false, BaseUndef, false, OffUndef,
954 Pred, PredReg, TII, isT2);
955 NewBBI = llvm::prior(MBBI);
956 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
957 OddReg, OddDeadKill, OddUndef,
958 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
959 Pred, PredReg, TII, isT2);
974 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
975 /// ops of the same base and incrementing offset into LDM / STM ops.
976 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
977 unsigned NumMerges = 0;
978 unsigned NumMemOps = 0;
980 unsigned CurrBase = 0;
982 unsigned CurrSize = 0;
983 ARMCC::CondCodes CurrPred = ARMCC::AL;
984 unsigned CurrPredReg = 0;
985 unsigned Position = 0;
986 SmallVector<MachineBasicBlock::iterator,4> Merges;
988 RS->enterBasicBlock(&MBB);
989 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
991 if (FixInvalidRegPairOp(MBB, MBBI))
994 bool Advance = false;
995 bool TryMerge = false;
996 bool Clobber = false;
998 bool isMemOp = isMemoryOp(MBBI);
1000 int Opcode = MBBI->getOpcode();
1001 unsigned Size = getLSMultipleTransferSize(MBBI);
1002 const MachineOperand &MO = MBBI->getOperand(0);
1003 unsigned Reg = MO.getReg();
1004 bool isKill = MO.isDef() ? false : MO.isKill();
1005 unsigned Base = MBBI->getOperand(1).getReg();
1006 unsigned PredReg = 0;
1007 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1008 int Offset = getMemoryOpOffset(MBBI);
1011 // r5 := ldr [r5, #4]
1012 // r6 := ldr [r5, #8]
1014 // The second ldr has effectively broken the chain even though it
1015 // looks like the later ldr(s) use the same base register. Try to
1016 // merge the ldr's so far, including this one. But don't try to
1017 // combine the following ldr(s).
1018 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1019 if (CurrBase == 0 && !Clobber) {
1020 // Start of a new chain.
1025 CurrPredReg = PredReg;
1026 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1035 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1036 // No need to match PredReg.
1037 // Continue adding to the queue.
1038 if (Offset > MemOps.back().Offset) {
1039 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1044 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1046 if (Offset < I->Offset) {
1047 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1052 } else if (Offset == I->Offset) {
1053 // Collision! This can't be merged!
1062 if (MBBI->isDebugValue()) {
1065 // Reach the end of the block, try merging the memory instructions.
1067 } else if (Advance) {
1071 // Reach the end of the block, try merging the memory instructions.
1077 if (NumMemOps > 1) {
1078 // Try to find a free register to use as a new base in case it's needed.
1079 // First advance to the instruction just before the start of the chain.
1080 AdvanceRS(MBB, MemOps);
1081 // Find a scratch register.
1082 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1083 // Process the load / store instructions.
1084 RS->forward(prior(MBBI));
1088 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1089 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1091 // Try folding preceeding/trailing base inc/dec into the generated
1093 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1094 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1096 NumMerges += Merges.size();
1098 // Try folding preceeding/trailing base inc/dec into those load/store
1099 // that were not merged to form LDM/STM ops.
1100 for (unsigned i = 0; i != NumMemOps; ++i)
1101 if (!MemOps[i].Merged)
1102 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1105 // RS may be pointing to an instruction that's deleted.
1106 RS->skipTo(prior(MBBI));
1107 } else if (NumMemOps == 1) {
1108 // Try folding preceeding/trailing base inc/dec into the single
1110 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1112 RS->forward(prior(MBBI));
1119 CurrPred = ARMCC::AL;
1126 // If iterator hasn't been advanced and this is not a memory op, skip it.
1127 // It can't start a new chain anyway.
1128 if (!Advance && !isMemOp && MBBI != E) {
1134 return NumMerges > 0;
1138 struct OffsetCompare {
1139 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1140 int LOffset = getMemoryOpOffset(LHS);
1141 int ROffset = getMemoryOpOffset(RHS);
1142 assert(LHS == RHS || LOffset != ROffset);
1143 return LOffset > ROffset;
1148 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1149 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1150 /// directly restore the value of LR into pc.
1151 /// ldmfd sp!, {..., lr}
1154 /// ldmfd sp!, {..., lr}
1157 /// ldmfd sp!, {..., pc}
1158 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1159 if (MBB.empty()) return false;
1161 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1162 if (MBBI != MBB.begin() &&
1163 (MBBI->getOpcode() == ARM::BX_RET ||
1164 MBBI->getOpcode() == ARM::tBX_RET ||
1165 MBBI->getOpcode() == ARM::MOVPCLR)) {
1166 MachineInstr *PrevMI = prior(MBBI);
1167 if (PrevMI->getOpcode() == ARM::LDM_UPD ||
1168 PrevMI->getOpcode() == ARM::t2LDM_UPD) {
1169 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1170 if (MO.getReg() != ARM::LR)
1172 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1173 PrevMI->setDesc(TII->get(NewOpc));
1175 PrevMI->copyImplicitOps(&*MBBI);
1183 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1184 const TargetMachine &TM = Fn.getTarget();
1185 AFI = Fn.getInfo<ARMFunctionInfo>();
1186 TII = TM.getInstrInfo();
1187 TRI = TM.getRegisterInfo();
1188 RS = new RegScavenger();
1189 isThumb2 = AFI->isThumb2Function();
1191 bool Modified = false;
1192 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1194 MachineBasicBlock &MBB = *MFI;
1195 Modified |= LoadStoreMultipleOpti(MBB);
1196 Modified |= MergeReturnIntoLDM(MBB);
1204 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1205 /// load / stores from consecutive locations close to make it more
1206 /// likely they will be combined later.
1209 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1211 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1213 const TargetData *TD;
1214 const TargetInstrInfo *TII;
1215 const TargetRegisterInfo *TRI;
1216 const ARMSubtarget *STI;
1217 MachineRegisterInfo *MRI;
1218 MachineFunction *MF;
1220 virtual bool runOnMachineFunction(MachineFunction &Fn);
1222 virtual const char *getPassName() const {
1223 return "ARM pre- register allocation load / store optimization pass";
1227 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1228 unsigned &NewOpc, unsigned &EvenReg,
1229 unsigned &OddReg, unsigned &BaseReg,
1231 unsigned &PredReg, ARMCC::CondCodes &Pred,
1233 bool RescheduleOps(MachineBasicBlock *MBB,
1234 SmallVector<MachineInstr*, 4> &Ops,
1235 unsigned Base, bool isLd,
1236 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1237 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1239 char ARMPreAllocLoadStoreOpt::ID = 0;
1242 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1243 TD = Fn.getTarget().getTargetData();
1244 TII = Fn.getTarget().getInstrInfo();
1245 TRI = Fn.getTarget().getRegisterInfo();
1246 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1247 MRI = &Fn.getRegInfo();
1250 bool Modified = false;
1251 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1253 Modified |= RescheduleLoadStoreInstrs(MFI);
1258 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1259 MachineBasicBlock::iterator I,
1260 MachineBasicBlock::iterator E,
1261 SmallPtrSet<MachineInstr*, 4> &MemOps,
1262 SmallSet<unsigned, 4> &MemRegs,
1263 const TargetRegisterInfo *TRI) {
1264 // Are there stores / loads / calls between them?
1265 // FIXME: This is overly conservative. We should make use of alias information
1267 SmallSet<unsigned, 4> AddedRegPressure;
1269 if (I->isDebugValue() || MemOps.count(&*I))
1271 const TargetInstrDesc &TID = I->getDesc();
1272 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1274 if (isLd && TID.mayStore())
1279 // It's not safe to move the first 'str' down.
1282 // str r4, [r0, #+4]
1286 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1287 MachineOperand &MO = I->getOperand(j);
1290 unsigned Reg = MO.getReg();
1291 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1293 if (Reg != Base && !MemRegs.count(Reg))
1294 AddedRegPressure.insert(Reg);
1298 // Estimate register pressure increase due to the transformation.
1299 if (MemRegs.size() <= 4)
1300 // Ok if we are moving small number of instructions.
1302 return AddedRegPressure.size() <= MemRegs.size() * 2;
1306 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1308 unsigned &NewOpc, unsigned &EvenReg,
1309 unsigned &OddReg, unsigned &BaseReg,
1310 int &Offset, unsigned &PredReg,
1311 ARMCC::CondCodes &Pred,
1313 // Make sure we're allowed to generate LDRD/STRD.
1314 if (!STI->hasV5TEOps())
1317 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1319 unsigned Opcode = Op0->getOpcode();
1320 if (Opcode == ARM::LDRi12)
1322 else if (Opcode == ARM::STRi12)
1324 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1325 NewOpc = ARM::t2LDRDi8;
1328 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1329 NewOpc = ARM::t2STRDi8;
1335 // Make sure the base address satisfies i64 ld / st alignment requirement.
1336 if (!Op0->hasOneMemOperand() ||
1337 !(*Op0->memoperands_begin())->getValue() ||
1338 (*Op0->memoperands_begin())->isVolatile())
1341 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1342 const Function *Func = MF->getFunction();
1343 unsigned ReqAlign = STI->hasV6Ops()
1344 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1345 : 8; // Pre-v6 need 8-byte align
1346 if (Align < ReqAlign)
1349 // Then make sure the immediate offset fits.
1350 int OffImm = getMemoryOpOffset(Op0);
1354 // Can't fall back to t2LDRi8 / t2STRi8.
1357 int Limit = (1 << 8) * Scale;
1358 if (OffImm >= Limit || (OffImm & (Scale-1)))
1363 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1365 AddSub = ARM_AM::sub;
1368 int Limit = (1 << 8) * Scale;
1369 if (OffImm >= Limit || (OffImm & (Scale-1)))
1371 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1373 EvenReg = Op0->getOperand(0).getReg();
1374 OddReg = Op1->getOperand(0).getReg();
1375 if (EvenReg == OddReg)
1377 BaseReg = Op0->getOperand(1).getReg();
1378 Pred = llvm::getInstrPredicate(Op0, PredReg);
1379 dl = Op0->getDebugLoc();
1383 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1384 SmallVector<MachineInstr*, 4> &Ops,
1385 unsigned Base, bool isLd,
1386 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1387 bool RetVal = false;
1389 // Sort by offset (in reverse order).
1390 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1392 // The loads / stores of the same base are in order. Scan them from first to
1393 // last and check for the following:
1394 // 1. Any def of base.
1396 while (Ops.size() > 1) {
1397 unsigned FirstLoc = ~0U;
1398 unsigned LastLoc = 0;
1399 MachineInstr *FirstOp = 0;
1400 MachineInstr *LastOp = 0;
1402 unsigned LastOpcode = 0;
1403 unsigned LastBytes = 0;
1404 unsigned NumMove = 0;
1405 for (int i = Ops.size() - 1; i >= 0; --i) {
1406 MachineInstr *Op = Ops[i];
1407 unsigned Loc = MI2LocMap[Op];
1408 if (Loc <= FirstLoc) {
1412 if (Loc >= LastLoc) {
1417 unsigned Opcode = Op->getOpcode();
1418 if (LastOpcode && Opcode != LastOpcode)
1421 int Offset = getMemoryOpOffset(Op);
1422 unsigned Bytes = getLSMultipleTransferSize(Op);
1424 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1427 LastOffset = Offset;
1429 LastOpcode = Opcode;
1430 if (++NumMove == 8) // FIXME: Tune this limit.
1437 SmallPtrSet<MachineInstr*, 4> MemOps;
1438 SmallSet<unsigned, 4> MemRegs;
1439 for (int i = NumMove-1; i >= 0; --i) {
1440 MemOps.insert(Ops[i]);
1441 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1444 // Be conservative, if the instructions are too far apart, don't
1445 // move them. We want to limit the increase of register pressure.
1446 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1448 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1449 MemOps, MemRegs, TRI);
1451 for (unsigned i = 0; i != NumMove; ++i)
1454 // This is the new location for the loads / stores.
1455 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1456 while (InsertPos != MBB->end()
1457 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1460 // If we are moving a pair of loads / stores, see if it makes sense
1461 // to try to allocate a pair of registers that can form register pairs.
1462 MachineInstr *Op0 = Ops.back();
1463 MachineInstr *Op1 = Ops[Ops.size()-2];
1464 unsigned EvenReg = 0, OddReg = 0;
1465 unsigned BaseReg = 0, PredReg = 0;
1466 ARMCC::CondCodes Pred = ARMCC::AL;
1468 unsigned NewOpc = 0;
1471 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1472 EvenReg, OddReg, BaseReg,
1473 Offset, PredReg, Pred, isT2)) {
1477 // Form the pair instruction.
1479 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1480 dl, TII->get(NewOpc))
1481 .addReg(EvenReg, RegState::Define)
1482 .addReg(OddReg, RegState::Define)
1484 // FIXME: We're converting from LDRi12 to an insn that still
1485 // uses addrmode2, so we need an explicit offset reg. It should
1486 // always by reg0 since we're transforming LDRi12s.
1489 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1492 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1493 dl, TII->get(NewOpc))
1497 // FIXME: We're converting from LDRi12 to an insn that still
1498 // uses addrmode2, so we need an explicit offset reg. It should
1499 // always by reg0 since we're transforming STRi12s.
1502 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1508 // Add register allocation hints to form register pairs.
1509 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1510 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1512 for (unsigned i = 0; i != NumMove; ++i) {
1513 MachineInstr *Op = Ops.back();
1515 MBB->splice(InsertPos, MBB, Op);
1519 NumLdStMoved += NumMove;
1529 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1530 bool RetVal = false;
1532 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1533 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1534 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1535 SmallVector<unsigned, 4> LdBases;
1536 SmallVector<unsigned, 4> StBases;
1539 MachineBasicBlock::iterator MBBI = MBB->begin();
1540 MachineBasicBlock::iterator E = MBB->end();
1542 for (; MBBI != E; ++MBBI) {
1543 MachineInstr *MI = MBBI;
1544 const TargetInstrDesc &TID = MI->getDesc();
1545 if (TID.isCall() || TID.isTerminator()) {
1546 // Stop at barriers.
1551 if (!MI->isDebugValue())
1552 MI2LocMap[MI] = ++Loc;
1554 if (!isMemoryOp(MI))
1556 unsigned PredReg = 0;
1557 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1560 int Opc = MI->getOpcode();
1561 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1562 unsigned Base = MI->getOperand(1).getReg();
1563 int Offset = getMemoryOpOffset(MI);
1565 bool StopHere = false;
1567 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1568 Base2LdsMap.find(Base);
1569 if (BI != Base2LdsMap.end()) {
1570 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1571 if (Offset == getMemoryOpOffset(BI->second[i])) {
1577 BI->second.push_back(MI);
1579 SmallVector<MachineInstr*, 4> MIs;
1581 Base2LdsMap[Base] = MIs;
1582 LdBases.push_back(Base);
1585 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1586 Base2StsMap.find(Base);
1587 if (BI != Base2StsMap.end()) {
1588 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1589 if (Offset == getMemoryOpOffset(BI->second[i])) {
1595 BI->second.push_back(MI);
1597 SmallVector<MachineInstr*, 4> MIs;
1599 Base2StsMap[Base] = MIs;
1600 StBases.push_back(Base);
1605 // Found a duplicate (a base+offset combination that's seen earlier).
1612 // Re-schedule loads.
1613 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1614 unsigned Base = LdBases[i];
1615 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1617 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1620 // Re-schedule stores.
1621 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1622 unsigned Base = StBases[i];
1623 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1625 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1629 Base2LdsMap.clear();
1630 Base2StsMap.clear();
1640 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1641 /// optimization pass.
1642 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1644 return new ARMPreAllocLoadStoreOpt();
1645 return new ARMLoadStoreOpt();