1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
78 MachineBasicBlock::iterator MBBI;
80 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
81 : Offset(o), Position(p), MBBI(i), Merged(false) {}
83 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
84 typedef MemOpQueue::iterator MemOpQueueIter;
86 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
87 int Offset, unsigned Base, bool BaseKill, int Opcode,
88 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
89 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
90 void MergeOpsUpdate(MachineBasicBlock &MBB,
99 ARMCC::CondCodes Pred,
103 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
104 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
105 int Opcode, unsigned Size,
106 ARMCC::CondCodes Pred, unsigned PredReg,
107 unsigned Scratch, MemOpQueue &MemOps,
108 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
110 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
111 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
112 MachineBasicBlock::iterator &MBBI);
113 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
114 MachineBasicBlock::iterator MBBI,
115 const TargetInstrInfo *TII,
117 MachineBasicBlock::iterator &I);
118 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
119 MachineBasicBlock::iterator MBBI,
121 MachineBasicBlock::iterator &I);
122 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
123 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
125 char ARMLoadStoreOpt::ID = 0;
128 static int getLoadStoreMultipleOpcode(int Opcode) {
156 default: llvm_unreachable("Unhandled opcode!");
161 static bool isT2i32Load(unsigned Opc) {
162 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
165 static bool isi32Load(unsigned Opc) {
166 return Opc == ARM::LDR || isT2i32Load(Opc);
169 static bool isT2i32Store(unsigned Opc) {
170 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
173 static bool isi32Store(unsigned Opc) {
174 return Opc == ARM::STR || isT2i32Store(Opc);
177 /// MergeOps - Create and insert a LDM or STM with Base as base register and
178 /// registers in Regs as the register operands that would be loaded / stored.
179 /// It returns true if the transformation is done.
181 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
182 MachineBasicBlock::iterator MBBI,
183 int Offset, unsigned Base, bool BaseKill,
184 int Opcode, ARMCC::CondCodes Pred,
185 unsigned PredReg, unsigned Scratch, DebugLoc dl,
186 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
187 // Only a single register to load / store. Don't bother.
188 unsigned NumRegs = Regs.size();
192 ARM_AM::AMSubMode Mode = ARM_AM::ia;
193 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
194 if (isAM4 && Offset == 4) {
196 // Thumb2 does not support ldmib / stmib.
199 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
201 // Thumb2 does not support ldmda / stmda.
204 } else if (isAM4 && Offset == -4 * (int)NumRegs) {
206 } else if (Offset != 0) {
207 // If starting offset isn't zero, insert a MI to materialize a new base.
208 // But only do so if it is cost effective, i.e. merging more than two
214 if (isi32Load(Opcode))
215 // If it is a load, then just use one of the destination register to
216 // use as the new base.
217 NewBase = Regs[NumRegs-1].first;
219 // Use the scratch register to use as a new base.
224 int BaseOpc = !isThumb2
226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
233 int ImmedOffset = isThumb2
234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
235 if (ImmedOffset == -1)
236 // FIXME: Try t2ADDri12 or t2SUBri12?
237 return false; // Probably not worth it then.
239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
241 .addImm(Pred).addReg(PredReg).addReg(0);
243 BaseKill = true; // New base is always killed right its use.
246 bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD);
247 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
248 Opcode == ARM::VLDRD);
249 Opcode = getLoadStoreMultipleOpcode(Opcode);
250 MachineInstrBuilder MIB = (isAM4)
251 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
252 .addReg(Base, getKillRegState(BaseKill))
253 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
254 : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
255 .addReg(Base, getKillRegState(BaseKill))
256 .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs))
257 .addImm(Pred).addReg(PredReg);
258 for (unsigned i = 0; i != NumRegs; ++i)
259 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
260 | getKillRegState(Regs[i].second));
265 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
267 void ARMLoadStoreOpt::
268 MergeOpsUpdate(MachineBasicBlock &MBB,
270 unsigned memOpsBegin,
272 unsigned insertAfter,
277 ARMCC::CondCodes Pred,
281 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
282 // First calculate which of the registers should be killed by the merged
284 SmallVector<std::pair<unsigned, bool>, 8> Regs;
285 const unsigned insertPos = memOps[insertAfter].Position;
286 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
287 const MachineOperand &MO = memOps[i].MBBI->getOperand(0);
288 unsigned Reg = MO.getReg();
289 bool isKill = MO.isKill();
291 // If we are inserting the merged operation after an unmerged operation that
292 // uses the same register, make sure to transfer any kill flag.
293 for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j)
294 if (memOps[j].Position<insertPos) {
295 const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
296 if (MOJ.getReg() == Reg && MOJ.isKill())
300 Regs.push_back(std::make_pair(Reg, isKill));
303 // Try to do the merge.
304 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
306 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
307 Pred, PredReg, Scratch, dl, Regs))
310 // Merge succeeded, update records.
311 Merges.push_back(prior(Loc));
312 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
313 // Remove kill flags from any unmerged memops that come before insertPos.
314 if (Regs[i-memOpsBegin].second)
315 for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j)
316 if (memOps[j].Position<insertPos) {
317 MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
318 if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill())
319 MOJ.setIsKill(false);
321 MBB.erase(memOps[i].MBBI);
322 memOps[i].Merged = true;
326 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
327 /// load / store multiple instructions.
329 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
330 unsigned Base, int Opcode, unsigned Size,
331 ARMCC::CondCodes Pred, unsigned PredReg,
332 unsigned Scratch, MemOpQueue &MemOps,
333 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
334 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
335 int Offset = MemOps[SIndex].Offset;
336 int SOffset = Offset;
337 unsigned insertAfter = SIndex;
338 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
339 DebugLoc dl = Loc->getDebugLoc();
340 const MachineOperand &PMO = Loc->getOperand(0);
341 unsigned PReg = PMO.getReg();
342 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
343 : ARMRegisterInfo::getRegisterNumbering(PReg);
346 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
347 int NewOffset = MemOps[i].Offset;
348 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
349 unsigned Reg = MO.getReg();
350 unsigned RegNum = MO.isUndef() ? UINT_MAX
351 : ARMRegisterInfo::getRegisterNumbering(Reg);
352 // AM4 - register numbers in ascending order.
353 // AM5 - consecutive register numbers in ascending order.
354 // Can only do up to 16 double-word registers per insn.
355 if (Reg != ARM::SP &&
356 NewOffset == Offset + (int)Size &&
357 ((isAM4 && RegNum > PRegNum)
358 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
363 // Can't merge this in. Try merge the earlier ones first.
364 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
365 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
366 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
371 if (MemOps[i].Position > MemOps[insertAfter].Position)
375 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
376 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
377 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
381 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
382 unsigned Bytes, unsigned Limit,
383 ARMCC::CondCodes Pred, unsigned PredReg){
384 unsigned MyPredReg = 0;
387 if (MI->getOpcode() != ARM::t2SUBri &&
388 MI->getOpcode() != ARM::t2SUBrSPi &&
389 MI->getOpcode() != ARM::t2SUBrSPi12 &&
390 MI->getOpcode() != ARM::tSUBspi &&
391 MI->getOpcode() != ARM::SUBri)
394 // Make sure the offset fits in 8 bits.
395 if (Bytes <= 0 || (Limit && Bytes >= Limit))
398 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
399 return (MI->getOperand(0).getReg() == Base &&
400 MI->getOperand(1).getReg() == Base &&
401 (MI->getOperand(2).getImm()*Scale) == Bytes &&
402 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
403 MyPredReg == PredReg);
406 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
407 unsigned Bytes, unsigned Limit,
408 ARMCC::CondCodes Pred, unsigned PredReg){
409 unsigned MyPredReg = 0;
412 if (MI->getOpcode() != ARM::t2ADDri &&
413 MI->getOpcode() != ARM::t2ADDrSPi &&
414 MI->getOpcode() != ARM::t2ADDrSPi12 &&
415 MI->getOpcode() != ARM::tADDspi &&
416 MI->getOpcode() != ARM::ADDri)
419 if (Bytes <= 0 || (Limit && Bytes >= Limit))
420 // Make sure the offset fits in 8 bits.
423 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
424 return (MI->getOperand(0).getReg() == Base &&
425 MI->getOperand(1).getReg() == Base &&
426 (MI->getOperand(2).getImm()*Scale) == Bytes &&
427 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
428 MyPredReg == PredReg);
431 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
432 switch (MI->getOpcode()) {
450 return (MI->getNumOperands() - 4) * 4;
455 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
459 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) {
461 case ARM::LDM: return ARM::LDM_UPD;
462 case ARM::STM: return ARM::STM_UPD;
463 case ARM::t2LDM: return ARM::t2LDM_UPD;
464 case ARM::t2STM: return ARM::t2STM_UPD;
465 case ARM::VLDMS: return ARM::VLDMS_UPD;
466 case ARM::VLDMD: return ARM::VLDMD_UPD;
467 case ARM::VSTMS: return ARM::VSTMS_UPD;
468 case ARM::VSTMD: return ARM::VSTMD_UPD;
469 default: llvm_unreachable("Unhandled opcode!");
474 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
475 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
477 /// stmia rn, <ra, rb, rc>
478 /// rn := rn + 4 * 3;
480 /// stmia rn!, <ra, rb, rc>
482 /// rn := rn - 4 * 3;
483 /// ldmia rn, <ra, rb, rc>
485 /// ldmdb rn!, <ra, rb, rc>
486 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
487 MachineBasicBlock::iterator MBBI,
489 MachineBasicBlock::iterator &I) {
490 MachineInstr *MI = MBBI;
491 unsigned Base = MI->getOperand(0).getReg();
492 bool BaseKill = MI->getOperand(0).isKill();
493 unsigned Bytes = getLSMultipleTransferSize(MI);
494 unsigned PredReg = 0;
495 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
496 int Opcode = MI->getOpcode();
497 DebugLoc dl = MI->getDebugLoc();
498 bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
499 Opcode == ARM::STM || Opcode == ARM::t2STM);
501 bool DoMerge = false;
502 ARM_AM::AMSubMode Mode = ARM_AM::ia;
506 // Can't use an updating ld/st if the base register is also a dest
507 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
508 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
509 if (MI->getOperand(i).getReg() == Base)
512 Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
514 // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
515 Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
516 Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
519 // Try merging with the previous instruction.
520 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
521 if (MBBI != BeginMBBI) {
522 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
523 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
526 if (Mode == ARM_AM::ia &&
527 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
530 } else if (isAM4 && Mode == ARM_AM::ib &&
531 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
536 if (Mode == ARM_AM::ia &&
537 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
546 // Try merging with the next instruction.
547 MachineBasicBlock::iterator EndMBBI = MBB.end();
548 if (!DoMerge && MBBI != EndMBBI) {
549 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
550 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
553 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
554 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
556 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
557 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
561 if (Mode == ARM_AM::ia &&
562 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
578 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
579 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
580 .addReg(Base, getDefRegState(true)) // WB base register
581 .addReg(Base, getKillRegState(BaseKill));
583 // [t2]LDM_UPD, [t2]STM_UPD
584 MIB.addImm(ARM_AM::getAM4ModeImm(Mode))
585 .addImm(Pred).addReg(PredReg);
587 // VLDM[SD}_UPD, VSTM[SD]_UPD
588 MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset))
589 .addImm(Pred).addReg(PredReg);
591 // Transfer the rest of operands.
592 for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
593 MIB.addOperand(MI->getOperand(OpNum));
594 // Transfer memoperands.
595 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
601 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
603 case ARM::LDR: return ARM::LDR_PRE;
604 case ARM::STR: return ARM::STR_PRE;
605 case ARM::VLDRS: return ARM::VLDMS_UPD;
606 case ARM::VLDRD: return ARM::VLDMD_UPD;
607 case ARM::VSTRS: return ARM::VSTMS_UPD;
608 case ARM::VSTRD: return ARM::VSTMD_UPD;
611 return ARM::t2LDR_PRE;
614 return ARM::t2STR_PRE;
615 default: llvm_unreachable("Unhandled opcode!");
620 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
622 case ARM::LDR: return ARM::LDR_POST;
623 case ARM::STR: return ARM::STR_POST;
624 case ARM::VLDRS: return ARM::VLDMS_UPD;
625 case ARM::VLDRD: return ARM::VLDMD_UPD;
626 case ARM::VSTRS: return ARM::VSTMS_UPD;
627 case ARM::VSTRD: return ARM::VSTMD_UPD;
630 return ARM::t2LDR_POST;
633 return ARM::t2STR_POST;
634 default: llvm_unreachable("Unhandled opcode!");
639 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
640 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
641 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
642 MachineBasicBlock::iterator MBBI,
643 const TargetInstrInfo *TII,
645 MachineBasicBlock::iterator &I) {
646 MachineInstr *MI = MBBI;
647 unsigned Base = MI->getOperand(1).getReg();
648 bool BaseKill = MI->getOperand(1).isKill();
649 unsigned Bytes = getLSMultipleTransferSize(MI);
650 int Opcode = MI->getOpcode();
651 DebugLoc dl = MI->getDebugLoc();
652 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
653 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
654 bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR);
655 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
657 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
659 if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
660 if (MI->getOperand(2).getImm() != 0)
663 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
664 // Can't do the merge if the destination register is the same as the would-be
665 // writeback register.
666 if (isLd && MI->getOperand(0).getReg() == Base)
669 unsigned PredReg = 0;
670 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
671 bool DoMerge = false;
672 ARM_AM::AddrOpc AddSub = ARM_AM::add;
674 // AM2 - 12 bits, thumb2 - 8 bits.
675 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
677 // Try merging with the previous instruction.
678 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
679 if (MBBI != BeginMBBI) {
680 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
681 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
683 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
685 AddSub = ARM_AM::sub;
687 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
691 NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
696 // Try merging with the next instruction.
697 MachineBasicBlock::iterator EndMBBI = MBB.begin();
698 if (!DoMerge && MBBI != EndMBBI) {
699 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
700 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
703 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
705 AddSub = ARM_AM::sub;
706 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
710 NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
722 bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
725 Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia,
728 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
730 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
733 // VLDM[SD}_UPD, VSTM[SD]_UPD
734 MachineOperand &MO = MI->getOperand(0);
735 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
736 .addReg(Base, getDefRegState(true)) // WB base register
737 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
739 .addImm(Pred).addReg(PredReg)
740 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
741 getKillRegState(MO.isKill())));
744 // LDR_PRE, LDR_POST,
745 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
746 .addReg(Base, RegState::Define)
747 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
749 // t2LDR_PRE, t2LDR_POST
750 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
751 .addReg(Base, RegState::Define)
752 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
754 MachineOperand &MO = MI->getOperand(0);
757 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
758 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
759 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
761 // t2STR_PRE, t2STR_POST
762 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
763 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
764 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
771 /// isMemoryOp - Returns true if instruction is a memory operations (that this
772 /// pass is capable of operating on).
773 static bool isMemoryOp(const MachineInstr *MI) {
774 if (MI->hasOneMemOperand()) {
775 const MachineMemOperand *MMO = *MI->memoperands_begin();
777 // Don't touch volatile memory accesses - we may be changing their order.
778 if (MMO->isVolatile())
781 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
783 if (MMO->getAlignment() < 4)
787 // str <undef> could probably be eliminated entirely, but for now we just want
788 // to avoid making a mess of it.
789 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
790 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
791 MI->getOperand(0).isUndef())
794 // Likewise don't mess with references to undefined addresses.
795 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
796 MI->getOperand(1).isUndef())
799 int Opcode = MI->getOpcode();
804 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
807 return MI->getOperand(1).isReg();
810 return MI->getOperand(1).isReg();
815 return MI->getOperand(1).isReg();
820 /// AdvanceRS - Advance register scavenger to just before the earliest memory
821 /// op that is being merged.
822 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
823 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
824 unsigned Position = MemOps[0].Position;
825 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
826 if (MemOps[i].Position < Position) {
827 Position = MemOps[i].Position;
828 Loc = MemOps[i].MBBI;
832 if (Loc != MBB.begin())
833 RS->forward(prior(Loc));
836 static int getMemoryOpOffset(const MachineInstr *MI) {
837 int Opcode = MI->getOpcode();
838 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
839 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
840 unsigned NumOperands = MI->getDesc().getNumOperands();
841 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
843 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
844 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
845 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
849 ? ARM_AM::getAM2Offset(OffField)
850 : (isAM3 ? ARM_AM::getAM3Offset(OffField)
851 : ARM_AM::getAM5Offset(OffField) * 4);
853 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
856 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
859 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
865 static void InsertLDR_STR(MachineBasicBlock &MBB,
866 MachineBasicBlock::iterator &MBBI,
867 int OffImm, bool isDef,
868 DebugLoc dl, unsigned NewOpc,
869 unsigned Reg, bool RegDeadKill, bool RegUndef,
870 unsigned BaseReg, bool BaseKill, bool BaseUndef,
871 unsigned OffReg, bool OffKill, bool OffUndef,
872 ARMCC::CondCodes Pred, unsigned PredReg,
873 const TargetInstrInfo *TII, bool isT2) {
877 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
879 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
882 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
884 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
885 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
887 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
888 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
890 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
892 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
893 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
895 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef));
896 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
900 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
901 MachineBasicBlock::iterator &MBBI) {
902 MachineInstr *MI = &*MBBI;
903 unsigned Opcode = MI->getOpcode();
904 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
905 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
906 unsigned EvenReg = MI->getOperand(0).getReg();
907 unsigned OddReg = MI->getOperand(1).getReg();
908 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
909 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
910 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
913 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
914 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
915 bool EvenDeadKill = isLd ?
916 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
917 bool EvenUndef = MI->getOperand(0).isUndef();
918 bool OddDeadKill = isLd ?
919 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
920 bool OddUndef = MI->getOperand(1).isUndef();
921 const MachineOperand &BaseOp = MI->getOperand(2);
922 unsigned BaseReg = BaseOp.getReg();
923 bool BaseKill = BaseOp.isKill();
924 bool BaseUndef = BaseOp.isUndef();
925 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
926 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
927 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
928 int OffImm = getMemoryOpOffset(MI);
929 unsigned PredReg = 0;
930 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
932 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
933 // Ascending register numbers and no offset. It's safe to change it to a
935 unsigned NewOpc = (isLd)
936 ? (isT2 ? ARM::t2LDM : ARM::LDM)
937 : (isT2 ? ARM::t2STM : ARM::STM);
939 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
940 .addReg(BaseReg, getKillRegState(BaseKill))
941 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
942 .addImm(Pred).addReg(PredReg)
943 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
944 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
947 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
948 .addReg(BaseReg, getKillRegState(BaseKill))
949 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
950 .addImm(Pred).addReg(PredReg)
952 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
954 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
958 // Split into two instructions.
959 assert((!isT2 || !OffReg) &&
960 "Thumb2 ldrd / strd does not encode offset register!");
961 unsigned NewOpc = (isLd)
962 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
963 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
964 DebugLoc dl = MBBI->getDebugLoc();
965 // If this is a load and base register is killed, it may have been
966 // re-defed by the load, make sure the first load does not clobber it.
968 (BaseKill || OffKill) &&
969 (TRI->regsOverlap(EvenReg, BaseReg) ||
970 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
971 assert(!TRI->regsOverlap(OddReg, BaseReg) &&
972 (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
973 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
974 OddReg, OddDeadKill, false,
975 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
976 Pred, PredReg, TII, isT2);
977 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
978 EvenReg, EvenDeadKill, false,
979 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
980 Pred, PredReg, TII, isT2);
982 if (OddReg == EvenReg && EvenDeadKill) {
983 // If the two source operands are the same, the kill marker is
984 // probably on the first one. e.g.
985 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
986 EvenDeadKill = false;
989 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
990 EvenReg, EvenDeadKill, EvenUndef,
991 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
992 Pred, PredReg, TII, isT2);
993 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
994 OddReg, OddDeadKill, OddUndef,
995 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
996 Pred, PredReg, TII, isT2);
1010 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1011 /// ops of the same base and incrementing offset into LDM / STM ops.
1012 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1013 unsigned NumMerges = 0;
1014 unsigned NumMemOps = 0;
1016 unsigned CurrBase = 0;
1018 unsigned CurrSize = 0;
1019 ARMCC::CondCodes CurrPred = ARMCC::AL;
1020 unsigned CurrPredReg = 0;
1021 unsigned Position = 0;
1022 SmallVector<MachineBasicBlock::iterator,4> Merges;
1024 RS->enterBasicBlock(&MBB);
1025 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1027 if (FixInvalidRegPairOp(MBB, MBBI))
1030 bool Advance = false;
1031 bool TryMerge = false;
1032 bool Clobber = false;
1034 bool isMemOp = isMemoryOp(MBBI);
1036 int Opcode = MBBI->getOpcode();
1037 unsigned Size = getLSMultipleTransferSize(MBBI);
1038 unsigned Base = MBBI->getOperand(1).getReg();
1039 unsigned PredReg = 0;
1040 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1041 int Offset = getMemoryOpOffset(MBBI);
1044 // r5 := ldr [r5, #4]
1045 // r6 := ldr [r5, #8]
1047 // The second ldr has effectively broken the chain even though it
1048 // looks like the later ldr(s) use the same base register. Try to
1049 // merge the ldr's so far, including this one. But don't try to
1050 // combine the following ldr(s).
1051 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1052 if (CurrBase == 0 && !Clobber) {
1053 // Start of a new chain.
1058 CurrPredReg = PredReg;
1059 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1068 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1069 // No need to match PredReg.
1070 // Continue adding to the queue.
1071 if (Offset > MemOps.back().Offset) {
1072 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1076 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1078 if (Offset < I->Offset) {
1079 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
1083 } else if (Offset == I->Offset) {
1084 // Collision! This can't be merged!
1097 // Reach the end of the block, try merging the memory instructions.
1103 if (NumMemOps > 1) {
1104 // Try to find a free register to use as a new base in case it's needed.
1105 // First advance to the instruction just before the start of the chain.
1106 AdvanceRS(MBB, MemOps);
1107 // Find a scratch register.
1108 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1109 // Process the load / store instructions.
1110 RS->forward(prior(MBBI));
1114 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1115 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1117 // Try folding preceeding/trailing base inc/dec into the generated
1119 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1120 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1122 NumMerges += Merges.size();
1124 // Try folding preceeding/trailing base inc/dec into those load/store
1125 // that were not merged to form LDM/STM ops.
1126 for (unsigned i = 0; i != NumMemOps; ++i)
1127 if (!MemOps[i].Merged)
1128 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1131 // RS may be pointing to an instruction that's deleted.
1132 RS->skipTo(prior(MBBI));
1133 } else if (NumMemOps == 1) {
1134 // Try folding preceeding/trailing base inc/dec into the single
1136 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1138 RS->forward(prior(MBBI));
1145 CurrPred = ARMCC::AL;
1152 // If iterator hasn't been advanced and this is not a memory op, skip it.
1153 // It can't start a new chain anyway.
1154 if (!Advance && !isMemOp && MBBI != E) {
1160 return NumMerges > 0;
1164 struct OffsetCompare {
1165 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1166 int LOffset = getMemoryOpOffset(LHS);
1167 int ROffset = getMemoryOpOffset(RHS);
1168 assert(LHS == RHS || LOffset != ROffset);
1169 return LOffset > ROffset;
1174 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1175 /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1176 /// directly restore the value of LR into pc.
1177 /// ldmfd sp!, {..., lr}
1180 /// ldmfd sp!, {..., lr}
1183 /// ldmfd sp!, {..., pc}
1184 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1185 if (MBB.empty()) return false;
1187 MachineBasicBlock::iterator MBBI = prior(MBB.end());
1188 if (MBBI != MBB.begin() &&
1189 (MBBI->getOpcode() == ARM::BX_RET ||
1190 MBBI->getOpcode() == ARM::tBX_RET ||
1191 MBBI->getOpcode() == ARM::MOVPCLR)) {
1192 MachineInstr *PrevMI = prior(MBBI);
1193 if (PrevMI->getOpcode() == ARM::LDM_UPD ||
1194 PrevMI->getOpcode() == ARM::t2LDM_UPD) {
1195 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1196 if (MO.getReg() != ARM::LR)
1198 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1199 PrevMI->setDesc(TII->get(NewOpc));
1208 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1209 const TargetMachine &TM = Fn.getTarget();
1210 AFI = Fn.getInfo<ARMFunctionInfo>();
1211 TII = TM.getInstrInfo();
1212 TRI = TM.getRegisterInfo();
1213 RS = new RegScavenger();
1214 isThumb2 = AFI->isThumb2Function();
1216 bool Modified = false;
1217 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1219 MachineBasicBlock &MBB = *MFI;
1220 Modified |= LoadStoreMultipleOpti(MBB);
1221 Modified |= MergeReturnIntoLDM(MBB);
1229 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1230 /// load / stores from consecutive locations close to make it more
1231 /// likely they will be combined later.
1234 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1236 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
1238 const TargetData *TD;
1239 const TargetInstrInfo *TII;
1240 const TargetRegisterInfo *TRI;
1241 const ARMSubtarget *STI;
1242 MachineRegisterInfo *MRI;
1243 MachineFunction *MF;
1245 virtual bool runOnMachineFunction(MachineFunction &Fn);
1247 virtual const char *getPassName() const {
1248 return "ARM pre- register allocation load / store optimization pass";
1252 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1253 unsigned &NewOpc, unsigned &EvenReg,
1254 unsigned &OddReg, unsigned &BaseReg,
1255 unsigned &OffReg, int &Offset,
1256 unsigned &PredReg, ARMCC::CondCodes &Pred,
1258 bool RescheduleOps(MachineBasicBlock *MBB,
1259 SmallVector<MachineInstr*, 4> &Ops,
1260 unsigned Base, bool isLd,
1261 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1262 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1264 char ARMPreAllocLoadStoreOpt::ID = 0;
1267 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1268 TD = Fn.getTarget().getTargetData();
1269 TII = Fn.getTarget().getInstrInfo();
1270 TRI = Fn.getTarget().getRegisterInfo();
1271 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1272 MRI = &Fn.getRegInfo();
1275 bool Modified = false;
1276 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1278 Modified |= RescheduleLoadStoreInstrs(MFI);
1283 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1284 MachineBasicBlock::iterator I,
1285 MachineBasicBlock::iterator E,
1286 SmallPtrSet<MachineInstr*, 4> &MemOps,
1287 SmallSet<unsigned, 4> &MemRegs,
1288 const TargetRegisterInfo *TRI) {
1289 // Are there stores / loads / calls between them?
1290 // FIXME: This is overly conservative. We should make use of alias information
1292 SmallSet<unsigned, 4> AddedRegPressure;
1294 if (MemOps.count(&*I))
1296 const TargetInstrDesc &TID = I->getDesc();
1297 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1299 if (isLd && TID.mayStore())
1304 // It's not safe to move the first 'str' down.
1307 // str r4, [r0, #+4]
1311 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1312 MachineOperand &MO = I->getOperand(j);
1315 unsigned Reg = MO.getReg();
1316 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1318 if (Reg != Base && !MemRegs.count(Reg))
1319 AddedRegPressure.insert(Reg);
1323 // Estimate register pressure increase due to the transformation.
1324 if (MemRegs.size() <= 4)
1325 // Ok if we are moving small number of instructions.
1327 return AddedRegPressure.size() <= MemRegs.size() * 2;
1331 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1333 unsigned &NewOpc, unsigned &EvenReg,
1334 unsigned &OddReg, unsigned &BaseReg,
1335 unsigned &OffReg, int &Offset,
1337 ARMCC::CondCodes &Pred,
1339 // Make sure we're allowed to generate LDRD/STRD.
1340 if (!STI->hasV5TEOps())
1343 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1345 unsigned Opcode = Op0->getOpcode();
1346 if (Opcode == ARM::LDR)
1348 else if (Opcode == ARM::STR)
1350 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1351 NewOpc = ARM::t2LDRDi8;
1354 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1355 NewOpc = ARM::t2STRDi8;
1361 // Make sure the offset registers match.
1363 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1366 // Must sure the base address satisfies i64 ld / st alignment requirement.
1367 if (!Op0->hasOneMemOperand() ||
1368 !(*Op0->memoperands_begin())->getValue() ||
1369 (*Op0->memoperands_begin())->isVolatile())
1372 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1373 const Function *Func = MF->getFunction();
1374 unsigned ReqAlign = STI->hasV6Ops()
1375 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1376 : 8; // Pre-v6 need 8-byte align
1377 if (Align < ReqAlign)
1380 // Then make sure the immediate offset fits.
1381 int OffImm = getMemoryOpOffset(Op0);
1385 // Can't fall back to t2LDRi8 / t2STRi8.
1388 int Limit = (1 << 8) * Scale;
1389 if (OffImm >= Limit || (OffImm & (Scale-1)))
1394 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1396 AddSub = ARM_AM::sub;
1399 int Limit = (1 << 8) * Scale;
1400 if (OffImm >= Limit || (OffImm & (Scale-1)))
1402 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1404 EvenReg = Op0->getOperand(0).getReg();
1405 OddReg = Op1->getOperand(0).getReg();
1406 if (EvenReg == OddReg)
1408 BaseReg = Op0->getOperand(1).getReg();
1410 OffReg = Op0->getOperand(2).getReg();
1411 Pred = llvm::getInstrPredicate(Op0, PredReg);
1412 dl = Op0->getDebugLoc();
1416 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1417 SmallVector<MachineInstr*, 4> &Ops,
1418 unsigned Base, bool isLd,
1419 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1420 bool RetVal = false;
1422 // Sort by offset (in reverse order).
1423 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1425 // The loads / stores of the same base are in order. Scan them from first to
1426 // last and check for the following:
1427 // 1. Any def of base.
1429 while (Ops.size() > 1) {
1430 unsigned FirstLoc = ~0U;
1431 unsigned LastLoc = 0;
1432 MachineInstr *FirstOp = 0;
1433 MachineInstr *LastOp = 0;
1435 unsigned LastOpcode = 0;
1436 unsigned LastBytes = 0;
1437 unsigned NumMove = 0;
1438 for (int i = Ops.size() - 1; i >= 0; --i) {
1439 MachineInstr *Op = Ops[i];
1440 unsigned Loc = MI2LocMap[Op];
1441 if (Loc <= FirstLoc) {
1445 if (Loc >= LastLoc) {
1450 unsigned Opcode = Op->getOpcode();
1451 if (LastOpcode && Opcode != LastOpcode)
1454 int Offset = getMemoryOpOffset(Op);
1455 unsigned Bytes = getLSMultipleTransferSize(Op);
1457 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1460 LastOffset = Offset;
1462 LastOpcode = Opcode;
1463 if (++NumMove == 8) // FIXME: Tune this limit.
1470 SmallPtrSet<MachineInstr*, 4> MemOps;
1471 SmallSet<unsigned, 4> MemRegs;
1472 for (int i = NumMove-1; i >= 0; --i) {
1473 MemOps.insert(Ops[i]);
1474 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1477 // Be conservative, if the instructions are too far apart, don't
1478 // move them. We want to limit the increase of register pressure.
1479 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1481 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1482 MemOps, MemRegs, TRI);
1484 for (unsigned i = 0; i != NumMove; ++i)
1487 // This is the new location for the loads / stores.
1488 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1489 while (InsertPos != MBB->end() && MemOps.count(InsertPos))
1492 // If we are moving a pair of loads / stores, see if it makes sense
1493 // to try to allocate a pair of registers that can form register pairs.
1494 MachineInstr *Op0 = Ops.back();
1495 MachineInstr *Op1 = Ops[Ops.size()-2];
1496 unsigned EvenReg = 0, OddReg = 0;
1497 unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1498 ARMCC::CondCodes Pred = ARMCC::AL;
1500 unsigned NewOpc = 0;
1503 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1504 EvenReg, OddReg, BaseReg, OffReg,
1505 Offset, PredReg, Pred, isT2)) {
1509 // Form the pair instruction.
1511 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1512 dl, TII->get(NewOpc))
1513 .addReg(EvenReg, RegState::Define)
1514 .addReg(OddReg, RegState::Define)
1518 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1521 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1522 dl, TII->get(NewOpc))
1528 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1534 // Add register allocation hints to form register pairs.
1535 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1536 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1538 for (unsigned i = 0; i != NumMove; ++i) {
1539 MachineInstr *Op = Ops.back();
1541 MBB->splice(InsertPos, MBB, Op);
1545 NumLdStMoved += NumMove;
1555 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1556 bool RetVal = false;
1558 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1559 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1560 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1561 SmallVector<unsigned, 4> LdBases;
1562 SmallVector<unsigned, 4> StBases;
1565 MachineBasicBlock::iterator MBBI = MBB->begin();
1566 MachineBasicBlock::iterator E = MBB->end();
1568 for (; MBBI != E; ++MBBI) {
1569 MachineInstr *MI = MBBI;
1570 const TargetInstrDesc &TID = MI->getDesc();
1571 if (TID.isCall() || TID.isTerminator()) {
1572 // Stop at barriers.
1577 MI2LocMap[MI] = Loc++;
1578 if (!isMemoryOp(MI))
1580 unsigned PredReg = 0;
1581 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1584 int Opc = MI->getOpcode();
1585 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1586 unsigned Base = MI->getOperand(1).getReg();
1587 int Offset = getMemoryOpOffset(MI);
1589 bool StopHere = false;
1591 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1592 Base2LdsMap.find(Base);
1593 if (BI != Base2LdsMap.end()) {
1594 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1595 if (Offset == getMemoryOpOffset(BI->second[i])) {
1601 BI->second.push_back(MI);
1603 SmallVector<MachineInstr*, 4> MIs;
1605 Base2LdsMap[Base] = MIs;
1606 LdBases.push_back(Base);
1609 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1610 Base2StsMap.find(Base);
1611 if (BI != Base2StsMap.end()) {
1612 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1613 if (Offset == getMemoryOpOffset(BI->second[i])) {
1619 BI->second.push_back(MI);
1621 SmallVector<MachineInstr*, 4> MIs;
1623 Base2StsMap[Base] = MIs;
1624 StBases.push_back(Base);
1629 // Found a duplicate (a base+offset combination that's seen earlier).
1636 // Re-schedule loads.
1637 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1638 unsigned Base = LdBases[i];
1639 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1641 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1644 // Re-schedule stores.
1645 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1646 unsigned Base = StBases[i];
1647 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1649 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1653 Base2LdsMap.clear();
1654 Base2StsMap.clear();
1664 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1665 /// optimization pass.
1666 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1668 return new ARMPreAllocLoadStoreOpt();
1669 return new ARMLoadStoreOpt();