1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
80 MachineBasicBlock::iterator MBBI;
82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
83 MachineBasicBlock::iterator i)
84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
87 typedef MemOpQueue::iterator MemOpQueueIter;
89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
90 int Offset, unsigned Base, bool BaseKill, int Opcode,
91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
93 void MergeOpsUpdate(MachineBasicBlock &MBB,
102 ARMCC::CondCodes Pred,
106 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
108 int Opcode, unsigned Size,
109 ARMCC::CondCodes Pred, unsigned PredReg,
110 unsigned Scratch, MemOpQueue &MemOps,
111 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
115 MachineBasicBlock::iterator &MBBI);
116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
117 MachineBasicBlock::iterator MBBI,
118 const TargetInstrInfo *TII,
120 MachineBasicBlock::iterator &I);
121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator MBBI,
124 MachineBasicBlock::iterator &I);
125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128 char ARMLoadStoreOpt::ID = 0;
131 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
133 default: llvm_unreachable("Unhandled opcode!");
137 default: llvm_unreachable("Unhandled submode!");
138 case ARM_AM::ia: return ARM::LDMIA;
139 case ARM_AM::da: return ARM::LDMDA;
140 case ARM_AM::db: return ARM::LDMDB;
141 case ARM_AM::ib: return ARM::LDMIB;
147 default: llvm_unreachable("Unhandled submode!");
148 case ARM_AM::ia: return ARM::STMIA;
149 case ARM_AM::da: return ARM::STMDA;
150 case ARM_AM::db: return ARM::STMDB;
151 case ARM_AM::ib: return ARM::STMIB;
158 default: llvm_unreachable("Unhandled submode!");
159 case ARM_AM::ia: return ARM::t2LDMIA;
160 case ARM_AM::db: return ARM::t2LDMDB;
167 default: llvm_unreachable("Unhandled submode!");
168 case ARM_AM::ia: return ARM::t2STMIA;
169 case ARM_AM::db: return ARM::t2STMDB;
175 default: llvm_unreachable("Unhandled submode!");
176 case ARM_AM::ia: return ARM::VLDMSIA;
177 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
183 default: llvm_unreachable("Unhandled submode!");
184 case ARM_AM::ia: return ARM::VSTMSIA;
185 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
191 default: llvm_unreachable("Unhandled submode!");
192 case ARM_AM::ia: return ARM::VLDMDIA;
193 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
199 default: llvm_unreachable("Unhandled submode!");
200 case ARM_AM::ia: return ARM::VSTMDIA;
201 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
212 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
214 default: llvm_unreachable("Unhandled opcode!");
220 case ARM::t2LDMIA_RET:
222 case ARM::t2LDMIA_UPD:
224 case ARM::t2STMIA_UPD:
226 case ARM::VLDMSIA_UPD:
228 case ARM::VSTMSIA_UPD:
230 case ARM::VLDMDIA_UPD:
232 case ARM::VSTMDIA_UPD:
246 case ARM::t2LDMDB_UPD:
248 case ARM::t2STMDB_UPD:
249 case ARM::VLDMSDB_UPD:
250 case ARM::VSTMSDB_UPD:
251 case ARM::VLDMDDB_UPD:
252 case ARM::VSTMDDB_UPD:
262 return ARM_AM::bad_am_submode;
265 } // end namespace ARM_AM
266 } // end namespace llvm
268 static bool isT2i32Load(unsigned Opc) {
269 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
272 static bool isi32Load(unsigned Opc) {
273 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
276 static bool isT2i32Store(unsigned Opc) {
277 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
280 static bool isi32Store(unsigned Opc) {
281 return Opc == ARM::STRi12 || isT2i32Store(Opc);
284 /// MergeOps - Create and insert a LDM or STM with Base as base register and
285 /// registers in Regs as the register operands that would be loaded / stored.
286 /// It returns true if the transformation is done.
288 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
289 MachineBasicBlock::iterator MBBI,
290 int Offset, unsigned Base, bool BaseKill,
291 int Opcode, ARMCC::CondCodes Pred,
292 unsigned PredReg, unsigned Scratch, DebugLoc dl,
293 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
294 // Only a single register to load / store. Don't bother.
295 unsigned NumRegs = Regs.size();
299 ARM_AM::AMSubMode Mode = ARM_AM::ia;
300 // VFP and Thumb2 do not support IB or DA modes.
301 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
302 bool haveIBAndDA = isNotVFP && !isThumb2;
303 if (Offset == 4 && haveIBAndDA)
305 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
307 else if (Offset == -4 * (int)NumRegs && isNotVFP)
308 // VLDM/VSTM do not support DB mode without also updating the base reg.
310 else if (Offset != 0) {
311 // Check if this is a supported opcode before we insert instructions to
312 // calculate a new base register.
313 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
315 // If starting offset isn't zero, insert a MI to materialize a new base.
316 // But only do so if it is cost effective, i.e. merging more than two
322 if (isi32Load(Opcode))
323 // If it is a load, then just use one of the destination register to
324 // use as the new base.
325 NewBase = Regs[NumRegs-1].first;
327 // Use the scratch register to use as a new base.
332 int BaseOpc = !isThumb2
334 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
338 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
341 int ImmedOffset = isThumb2
342 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
343 if (ImmedOffset == -1)
344 // FIXME: Try t2ADDri12 or t2SUBri12?
345 return false; // Probably not worth it then.
347 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
348 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
349 .addImm(Pred).addReg(PredReg).addReg(0);
351 BaseKill = true; // New base is always killed right its use.
354 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
355 Opcode == ARM::VLDRD);
356 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
357 if (!Opcode) return false;
358 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
359 .addReg(Base, getKillRegState(BaseKill))
360 .addImm(Pred).addReg(PredReg);
361 for (unsigned i = 0; i != NumRegs; ++i)
362 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
363 | getKillRegState(Regs[i].second));
368 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
370 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
372 unsigned memOpsBegin, unsigned memOpsEnd,
373 unsigned insertAfter, int Offset,
374 unsigned Base, bool BaseKill,
376 ARMCC::CondCodes Pred, unsigned PredReg,
379 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
380 // First calculate which of the registers should be killed by the merged
382 const unsigned insertPos = memOps[insertAfter].Position;
383 SmallSet<unsigned, 4> KilledRegs;
384 DenseMap<unsigned, unsigned> Killer;
385 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
386 if (i == memOpsBegin) {
391 if (memOps[i].Position < insertPos && memOps[i].isKill) {
392 unsigned Reg = memOps[i].Reg;
393 KilledRegs.insert(Reg);
398 SmallVector<std::pair<unsigned, bool>, 8> Regs;
399 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
400 unsigned Reg = memOps[i].Reg;
401 // If we are inserting the merged operation after an operation that
402 // uses the same register, make sure to transfer any kill flag.
403 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
404 Regs.push_back(std::make_pair(Reg, isKill));
407 // Try to do the merge.
408 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
410 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
411 Pred, PredReg, Scratch, dl, Regs))
414 // Merge succeeded, update records.
415 Merges.push_back(prior(Loc));
416 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
417 // Remove kill flags from any memops that come before insertPos.
418 if (Regs[i-memOpsBegin].second) {
419 unsigned Reg = Regs[i-memOpsBegin].first;
420 if (KilledRegs.count(Reg)) {
421 unsigned j = Killer[Reg];
422 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
423 assert(Idx >= 0 && "Cannot find killing operand");
424 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
425 memOps[j].isKill = false;
427 memOps[i].isKill = true;
429 MBB.erase(memOps[i].MBBI);
430 // Update this memop to refer to the merged instruction.
431 // We may need to move kill flags again.
432 memOps[i].Merged = true;
433 memOps[i].MBBI = Merges.back();
434 memOps[i].Position = insertPos;
438 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
439 /// load / store multiple instructions.
441 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
442 unsigned Base, int Opcode, unsigned Size,
443 ARMCC::CondCodes Pred, unsigned PredReg,
444 unsigned Scratch, MemOpQueue &MemOps,
445 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
446 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
447 int Offset = MemOps[SIndex].Offset;
448 int SOffset = Offset;
449 unsigned insertAfter = SIndex;
450 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
451 DebugLoc dl = Loc->getDebugLoc();
452 const MachineOperand &PMO = Loc->getOperand(0);
453 unsigned PReg = PMO.getReg();
454 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
455 : getARMRegisterNumbering(PReg);
457 unsigned Limit = ~0U;
459 // vldm / vstm limit are 32 for S variants, 16 for D variants.
477 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
478 int NewOffset = MemOps[i].Offset;
479 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
480 unsigned Reg = MO.getReg();
481 unsigned RegNum = MO.isUndef() ? UINT_MAX
482 : getARMRegisterNumbering(Reg);
483 // Register numbers must be in ascending order. For VFP / NEON load and
484 // store multiples, the registers must also be consecutive and within the
485 // limit on the number of registers per instruction.
486 if (Reg != ARM::SP &&
487 NewOffset == Offset + (int)Size &&
488 ((isNotVFP && RegNum > PRegNum) ||
489 ((Count < Limit) && RegNum == PRegNum+1))) {
494 // Can't merge this in. Try merge the earlier ones first.
495 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
496 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
497 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
502 if (MemOps[i].Position > MemOps[insertAfter].Position)
506 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
507 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
508 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
512 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
513 unsigned Bytes, unsigned Limit,
514 ARMCC::CondCodes Pred, unsigned PredReg){
515 unsigned MyPredReg = 0;
518 if (MI->getOpcode() != ARM::t2SUBri &&
519 MI->getOpcode() != ARM::t2SUBrSPi &&
520 MI->getOpcode() != ARM::t2SUBrSPi12 &&
521 MI->getOpcode() != ARM::tSUBspi &&
522 MI->getOpcode() != ARM::SUBri)
525 // Make sure the offset fits in 8 bits.
526 if (Bytes == 0 || (Limit && Bytes >= Limit))
529 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
530 return (MI->getOperand(0).getReg() == Base &&
531 MI->getOperand(1).getReg() == Base &&
532 (MI->getOperand(2).getImm()*Scale) == Bytes &&
533 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
534 MyPredReg == PredReg);
537 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
538 unsigned Bytes, unsigned Limit,
539 ARMCC::CondCodes Pred, unsigned PredReg){
540 unsigned MyPredReg = 0;
543 if (MI->getOpcode() != ARM::t2ADDri &&
544 MI->getOpcode() != ARM::t2ADDrSPi &&
545 MI->getOpcode() != ARM::t2ADDrSPi12 &&
546 MI->getOpcode() != ARM::tADDspi &&
547 MI->getOpcode() != ARM::ADDri)
550 if (Bytes == 0 || (Limit && Bytes >= Limit))
551 // Make sure the offset fits in 8 bits.
554 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
555 return (MI->getOperand(0).getReg() == Base &&
556 MI->getOperand(1).getReg() == Base &&
557 (MI->getOperand(2).getImm()*Scale) == Bytes &&
558 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
559 MyPredReg == PredReg);
562 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
563 switch (MI->getOpcode()) {
591 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
594 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
598 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
599 ARM_AM::AMSubMode Mode) {
601 default: llvm_unreachable("Unhandled opcode!");
607 default: llvm_unreachable("Unhandled submode!");
608 case ARM_AM::ia: return ARM::LDMIA_UPD;
609 case ARM_AM::ib: return ARM::LDMIB_UPD;
610 case ARM_AM::da: return ARM::LDMDA_UPD;
611 case ARM_AM::db: return ARM::LDMDB_UPD;
619 default: llvm_unreachable("Unhandled submode!");
620 case ARM_AM::ia: return ARM::STMIA_UPD;
621 case ARM_AM::ib: return ARM::STMIB_UPD;
622 case ARM_AM::da: return ARM::STMDA_UPD;
623 case ARM_AM::db: return ARM::STMDB_UPD;
629 default: llvm_unreachable("Unhandled submode!");
630 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
631 case ARM_AM::db: return ARM::t2LDMDB_UPD;
637 default: llvm_unreachable("Unhandled submode!");
638 case ARM_AM::ia: return ARM::t2STMIA_UPD;
639 case ARM_AM::db: return ARM::t2STMDB_UPD;
644 default: llvm_unreachable("Unhandled submode!");
645 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
646 case ARM_AM::db: return ARM::VLDMSDB_UPD;
651 default: llvm_unreachable("Unhandled submode!");
652 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
653 case ARM_AM::db: return ARM::VLDMDDB_UPD;
658 default: llvm_unreachable("Unhandled submode!");
659 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
660 case ARM_AM::db: return ARM::VSTMSDB_UPD;
665 default: llvm_unreachable("Unhandled submode!");
666 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
667 case ARM_AM::db: return ARM::VSTMDDB_UPD;
675 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
676 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
678 /// stmia rn, <ra, rb, rc>
679 /// rn := rn + 4 * 3;
681 /// stmia rn!, <ra, rb, rc>
683 /// rn := rn - 4 * 3;
684 /// ldmia rn, <ra, rb, rc>
686 /// ldmdb rn!, <ra, rb, rc>
687 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
688 MachineBasicBlock::iterator MBBI,
690 MachineBasicBlock::iterator &I) {
691 MachineInstr *MI = MBBI;
692 unsigned Base = MI->getOperand(0).getReg();
693 bool BaseKill = MI->getOperand(0).isKill();
694 unsigned Bytes = getLSMultipleTransferSize(MI);
695 unsigned PredReg = 0;
696 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
697 int Opcode = MI->getOpcode();
698 DebugLoc dl = MI->getDebugLoc();
700 // Can't use an updating ld/st if the base register is also a dest
701 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
702 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
703 if (MI->getOperand(i).getReg() == Base)
706 bool DoMerge = false;
707 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
709 // Try merging with the previous instruction.
710 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
711 if (MBBI != BeginMBBI) {
712 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
713 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
715 if (Mode == ARM_AM::ia &&
716 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
719 } else if (Mode == ARM_AM::ib &&
720 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
728 // Try merging with the next instruction.
729 MachineBasicBlock::iterator EndMBBI = MBB.end();
730 if (!DoMerge && MBBI != EndMBBI) {
731 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
732 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
734 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
735 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
737 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
738 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
753 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
754 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
755 .addReg(Base, getDefRegState(true)) // WB base register
756 .addReg(Base, getKillRegState(BaseKill))
757 .addImm(Pred).addReg(PredReg);
759 // Transfer the rest of operands.
760 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
761 MIB.addOperand(MI->getOperand(OpNum));
763 // Transfer memoperands.
764 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
770 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
771 ARM_AM::AddrOpc Mode) {
778 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
780 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
782 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
784 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
787 return ARM::t2LDR_PRE;
790 return ARM::t2STR_PRE;
791 default: llvm_unreachable("Unhandled opcode!");
796 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
797 ARM_AM::AddrOpc Mode) {
800 return ARM::LDR_POST;
802 return ARM::STR_POST;
804 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
806 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
808 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
810 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
813 return ARM::t2LDR_POST;
816 return ARM::t2STR_POST;
817 default: llvm_unreachable("Unhandled opcode!");
822 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
823 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
824 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
825 MachineBasicBlock::iterator MBBI,
826 const TargetInstrInfo *TII,
828 MachineBasicBlock::iterator &I) {
829 MachineInstr *MI = MBBI;
830 unsigned Base = MI->getOperand(1).getReg();
831 bool BaseKill = MI->getOperand(1).isKill();
832 unsigned Bytes = getLSMultipleTransferSize(MI);
833 int Opcode = MI->getOpcode();
834 DebugLoc dl = MI->getDebugLoc();
835 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
836 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
837 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
838 if (isi32Load(Opcode) || isi32Store(Opcode))
839 if (MI->getOperand(2).getImm() != 0)
841 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
844 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
845 // Can't do the merge if the destination register is the same as the would-be
846 // writeback register.
847 if (isLd && MI->getOperand(0).getReg() == Base)
850 unsigned PredReg = 0;
851 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
852 bool DoMerge = false;
853 ARM_AM::AddrOpc AddSub = ARM_AM::add;
855 // AM2 - 12 bits, thumb2 - 8 bits.
856 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
858 // Try merging with the previous instruction.
859 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
860 if (MBBI != BeginMBBI) {
861 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
862 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
864 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
866 AddSub = ARM_AM::sub;
868 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
872 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
877 // Try merging with the next instruction.
878 MachineBasicBlock::iterator EndMBBI = MBB.end();
879 if (!DoMerge && MBBI != EndMBBI) {
880 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
881 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
884 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
886 AddSub = ARM_AM::sub;
887 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
891 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
905 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
907 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
910 // VLDM[SD}_UPD, VSTM[SD]_UPD
911 // (There are no base-updating versions of VLDR/VSTR instructions, but the
912 // updating load/store-multiple instructions can be used with only one
914 MachineOperand &MO = MI->getOperand(0);
915 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
916 .addReg(Base, getDefRegState(true)) // WB base register
917 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
918 .addImm(Pred).addReg(PredReg)
919 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
920 getKillRegState(MO.isKill())));
923 // LDR_PRE, LDR_POST,
924 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
925 .addReg(Base, RegState::Define)
926 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
928 // t2LDR_PRE, t2LDR_POST
929 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
930 .addReg(Base, RegState::Define)
931 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
933 MachineOperand &MO = MI->getOperand(0);
936 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
937 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
938 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
940 // t2STR_PRE, t2STR_POST
941 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
942 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
943 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
950 /// isMemoryOp - Returns true if instruction is a memory operations (that this
951 /// pass is capable of operating on).
952 static bool isMemoryOp(const MachineInstr *MI) {
953 // When no memory operands are present, conservatively assume unaligned,
954 // volatile, unfoldable.
955 if (!MI->hasOneMemOperand())
958 const MachineMemOperand *MMO = *MI->memoperands_begin();
960 // Don't touch volatile memory accesses - we may be changing their order.
961 if (MMO->isVolatile())
964 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
966 if (MMO->getAlignment() < 4)
969 // str <undef> could probably be eliminated entirely, but for now we just want
970 // to avoid making a mess of it.
971 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
972 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
973 MI->getOperand(0).isUndef())
976 // Likewise don't mess with references to undefined addresses.
977 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
978 MI->getOperand(1).isUndef())
981 int Opcode = MI->getOpcode();
986 return MI->getOperand(1).isReg();
989 return MI->getOperand(1).isReg();
996 return MI->getOperand(1).isReg();
1001 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1002 /// op that is being merged.
1003 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1004 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1005 unsigned Position = MemOps[0].Position;
1006 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1007 if (MemOps[i].Position < Position) {
1008 Position = MemOps[i].Position;
1009 Loc = MemOps[i].MBBI;
1013 if (Loc != MBB.begin())
1014 RS->forward(prior(Loc));
1017 static int getMemoryOpOffset(const MachineInstr *MI) {
1018 int Opcode = MI->getOpcode();
1019 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1020 unsigned NumOperands = MI->getDesc().getNumOperands();
1021 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1023 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1024 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1025 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1026 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1029 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1030 : ARM_AM::getAM5Offset(OffField) * 4;
1032 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1035 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1041 static void InsertLDR_STR(MachineBasicBlock &MBB,
1042 MachineBasicBlock::iterator &MBBI,
1043 int Offset, bool isDef,
1044 DebugLoc dl, unsigned NewOpc,
1045 unsigned Reg, bool RegDeadKill, bool RegUndef,
1046 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1047 bool OffKill, bool OffUndef,
1048 ARMCC::CondCodes Pred, unsigned PredReg,
1049 const TargetInstrInfo *TII, bool isT2) {
1051 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1053 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1054 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1055 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1057 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1059 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1060 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1061 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1065 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1066 MachineBasicBlock::iterator &MBBI) {
1067 MachineInstr *MI = &*MBBI;
1068 unsigned Opcode = MI->getOpcode();
1069 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1070 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1071 unsigned EvenReg = MI->getOperand(0).getReg();
1072 unsigned OddReg = MI->getOperand(1).getReg();
1073 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1074 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1075 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
1078 MachineBasicBlock::iterator NewBBI = MBBI;
1079 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1080 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1081 bool EvenDeadKill = isLd ?
1082 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1083 bool EvenUndef = MI->getOperand(0).isUndef();
1084 bool OddDeadKill = isLd ?
1085 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1086 bool OddUndef = MI->getOperand(1).isUndef();
1087 const MachineOperand &BaseOp = MI->getOperand(2);
1088 unsigned BaseReg = BaseOp.getReg();
1089 bool BaseKill = BaseOp.isKill();
1090 bool BaseUndef = BaseOp.isUndef();
1091 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1092 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1093 int OffImm = getMemoryOpOffset(MI);
1094 unsigned PredReg = 0;
1095 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1097 if (OddRegNum > EvenRegNum && OffImm == 0) {
1098 // Ascending register numbers and no offset. It's safe to change it to a
1100 unsigned NewOpc = (isLd)
1101 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1102 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1104 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1105 .addReg(BaseReg, getKillRegState(BaseKill))
1106 .addImm(Pred).addReg(PredReg)
1107 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1108 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1111 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1112 .addReg(BaseReg, getKillRegState(BaseKill))
1113 .addImm(Pred).addReg(PredReg)
1115 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1117 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1120 NewBBI = llvm::prior(MBBI);
1122 // Split into two instructions.
1123 unsigned NewOpc = (isLd)
1124 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1125 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1126 DebugLoc dl = MBBI->getDebugLoc();
1127 // If this is a load and base register is killed, it may have been
1128 // re-defed by the load, make sure the first load does not clobber it.
1130 (BaseKill || OffKill) &&
1131 (TRI->regsOverlap(EvenReg, BaseReg))) {
1132 assert(!TRI->regsOverlap(OddReg, BaseReg));
1133 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1134 OddReg, OddDeadKill, false,
1135 BaseReg, false, BaseUndef, false, OffUndef,
1136 Pred, PredReg, TII, isT2);
1137 NewBBI = llvm::prior(MBBI);
1138 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1139 EvenReg, EvenDeadKill, false,
1140 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1141 Pred, PredReg, TII, isT2);
1143 if (OddReg == EvenReg && EvenDeadKill) {
1144 // If the two source operands are the same, the kill marker is
1145 // probably on the first one. e.g.
1146 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1147 EvenDeadKill = false;
1150 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1151 EvenReg, EvenDeadKill, EvenUndef,
1152 BaseReg, false, BaseUndef, false, OffUndef,
1153 Pred, PredReg, TII, isT2);
1154 NewBBI = llvm::prior(MBBI);
1155 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1156 OddReg, OddDeadKill, OddUndef,
1157 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1158 Pred, PredReg, TII, isT2);
1173 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1174 /// ops of the same base and incrementing offset into LDM / STM ops.
1175 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1176 unsigned NumMerges = 0;
1177 unsigned NumMemOps = 0;
1179 unsigned CurrBase = 0;
1181 unsigned CurrSize = 0;
1182 ARMCC::CondCodes CurrPred = ARMCC::AL;
1183 unsigned CurrPredReg = 0;
1184 unsigned Position = 0;
1185 SmallVector<MachineBasicBlock::iterator,4> Merges;
1187 RS->enterBasicBlock(&MBB);
1188 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1190 if (FixInvalidRegPairOp(MBB, MBBI))
1193 bool Advance = false;
1194 bool TryMerge = false;
1195 bool Clobber = false;
1197 bool isMemOp = isMemoryOp(MBBI);
1199 int Opcode = MBBI->getOpcode();
1200 unsigned Size = getLSMultipleTransferSize(MBBI);
1201 const MachineOperand &MO = MBBI->getOperand(0);
1202 unsigned Reg = MO.getReg();
1203 bool isKill = MO.isDef() ? false : MO.isKill();
1204 unsigned Base = MBBI->getOperand(1).getReg();
1205 unsigned PredReg = 0;
1206 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1207 int Offset = getMemoryOpOffset(MBBI);
1210 // r5 := ldr [r5, #4]
1211 // r6 := ldr [r5, #8]
1213 // The second ldr has effectively broken the chain even though it
1214 // looks like the later ldr(s) use the same base register. Try to
1215 // merge the ldr's so far, including this one. But don't try to
1216 // combine the following ldr(s).
1217 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1218 if (CurrBase == 0 && !Clobber) {
1219 // Start of a new chain.
1224 CurrPredReg = PredReg;
1225 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1234 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1235 // No need to match PredReg.
1236 // Continue adding to the queue.
1237 if (Offset > MemOps.back().Offset) {
1238 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1243 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1245 if (Offset < I->Offset) {
1246 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1251 } else if (Offset == I->Offset) {
1252 // Collision! This can't be merged!
1261 if (MBBI->isDebugValue()) {
1264 // Reach the end of the block, try merging the memory instructions.
1266 } else if (Advance) {
1270 // Reach the end of the block, try merging the memory instructions.
1276 if (NumMemOps > 1) {
1277 // Try to find a free register to use as a new base in case it's needed.
1278 // First advance to the instruction just before the start of the chain.
1279 AdvanceRS(MBB, MemOps);
1280 // Find a scratch register.
1281 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1282 // Process the load / store instructions.
1283 RS->forward(prior(MBBI));
1287 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1288 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1290 // Try folding preceding/trailing base inc/dec into the generated
1292 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1293 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1295 NumMerges += Merges.size();
1297 // Try folding preceding/trailing base inc/dec into those load/store
1298 // that were not merged to form LDM/STM ops.
1299 for (unsigned i = 0; i != NumMemOps; ++i)
1300 if (!MemOps[i].Merged)
1301 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1304 // RS may be pointing to an instruction that's deleted.
1305 RS->skipTo(prior(MBBI));
1306 } else if (NumMemOps == 1) {
1307 // Try folding preceding/trailing base inc/dec into the single
1309 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1311 RS->forward(prior(MBBI));
1318 CurrPred = ARMCC::AL;
1325 // If iterator hasn't been advanced and this is not a memory op, skip it.
1326 // It can't start a new chain anyway.
1327 if (!Advance && !isMemOp && MBBI != E) {
1333 return NumMerges > 0;
1336 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1337 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1338 /// directly restore the value of LR into pc.
1339 /// ldmfd sp!, {..., lr}
1342 /// ldmfd sp!, {..., lr}
1345 /// ldmfd sp!, {..., pc}
1346 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1347 if (MBB.empty()) return false;
1349 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1350 if (MBBI != MBB.begin() &&
1351 (MBBI->getOpcode() == ARM::BX_RET ||
1352 MBBI->getOpcode() == ARM::tBX_RET ||
1353 MBBI->getOpcode() == ARM::MOVPCLR)) {
1354 MachineInstr *PrevMI = prior(MBBI);
1355 unsigned Opcode = PrevMI->getOpcode();
1356 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1357 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1358 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1359 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1360 if (MO.getReg() != ARM::LR)
1362 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1363 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1364 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1365 PrevMI->setDesc(TII->get(NewOpc));
1367 PrevMI->copyImplicitOps(&*MBBI);
1375 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1376 const TargetMachine &TM = Fn.getTarget();
1377 AFI = Fn.getInfo<ARMFunctionInfo>();
1378 TII = TM.getInstrInfo();
1379 TRI = TM.getRegisterInfo();
1380 RS = new RegScavenger();
1381 isThumb2 = AFI->isThumb2Function();
1383 bool Modified = false;
1384 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1386 MachineBasicBlock &MBB = *MFI;
1387 Modified |= LoadStoreMultipleOpti(MBB);
1388 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1389 Modified |= MergeReturnIntoLDM(MBB);
1397 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1398 /// load / stores from consecutive locations close to make it more
1399 /// likely they will be combined later.
1402 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1404 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1406 const TargetData *TD;
1407 const TargetInstrInfo *TII;
1408 const TargetRegisterInfo *TRI;
1409 const ARMSubtarget *STI;
1410 MachineRegisterInfo *MRI;
1411 MachineFunction *MF;
1413 virtual bool runOnMachineFunction(MachineFunction &Fn);
1415 virtual const char *getPassName() const {
1416 return "ARM pre- register allocation load / store optimization pass";
1420 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1421 unsigned &NewOpc, unsigned &EvenReg,
1422 unsigned &OddReg, unsigned &BaseReg,
1424 unsigned &PredReg, ARMCC::CondCodes &Pred,
1426 bool RescheduleOps(MachineBasicBlock *MBB,
1427 SmallVector<MachineInstr*, 4> &Ops,
1428 unsigned Base, bool isLd,
1429 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1430 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1432 char ARMPreAllocLoadStoreOpt::ID = 0;
1435 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1436 TD = Fn.getTarget().getTargetData();
1437 TII = Fn.getTarget().getInstrInfo();
1438 TRI = Fn.getTarget().getRegisterInfo();
1439 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1440 MRI = &Fn.getRegInfo();
1443 bool Modified = false;
1444 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1446 Modified |= RescheduleLoadStoreInstrs(MFI);
1451 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1452 MachineBasicBlock::iterator I,
1453 MachineBasicBlock::iterator E,
1454 SmallPtrSet<MachineInstr*, 4> &MemOps,
1455 SmallSet<unsigned, 4> &MemRegs,
1456 const TargetRegisterInfo *TRI) {
1457 // Are there stores / loads / calls between them?
1458 // FIXME: This is overly conservative. We should make use of alias information
1460 SmallSet<unsigned, 4> AddedRegPressure;
1462 if (I->isDebugValue() || MemOps.count(&*I))
1464 const TargetInstrDesc &TID = I->getDesc();
1465 if (TID.isCall() || TID.isTerminator() || I->hasUnmodeledSideEffects())
1467 if (isLd && TID.mayStore())
1472 // It's not safe to move the first 'str' down.
1475 // str r4, [r0, #+4]
1479 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1480 MachineOperand &MO = I->getOperand(j);
1483 unsigned Reg = MO.getReg();
1484 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1486 if (Reg != Base && !MemRegs.count(Reg))
1487 AddedRegPressure.insert(Reg);
1491 // Estimate register pressure increase due to the transformation.
1492 if (MemRegs.size() <= 4)
1493 // Ok if we are moving small number of instructions.
1495 return AddedRegPressure.size() <= MemRegs.size() * 2;
1499 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1501 unsigned &NewOpc, unsigned &EvenReg,
1502 unsigned &OddReg, unsigned &BaseReg,
1503 int &Offset, unsigned &PredReg,
1504 ARMCC::CondCodes &Pred,
1506 // Make sure we're allowed to generate LDRD/STRD.
1507 if (!STI->hasV5TEOps())
1510 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1512 unsigned Opcode = Op0->getOpcode();
1513 if (Opcode == ARM::LDRi12)
1515 else if (Opcode == ARM::STRi12)
1517 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1518 NewOpc = ARM::t2LDRDi8;
1521 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1522 NewOpc = ARM::t2STRDi8;
1528 // Make sure the base address satisfies i64 ld / st alignment requirement.
1529 if (!Op0->hasOneMemOperand() ||
1530 !(*Op0->memoperands_begin())->getValue() ||
1531 (*Op0->memoperands_begin())->isVolatile())
1534 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1535 const Function *Func = MF->getFunction();
1536 unsigned ReqAlign = STI->hasV6Ops()
1537 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1538 : 8; // Pre-v6 need 8-byte align
1539 if (Align < ReqAlign)
1542 // Then make sure the immediate offset fits.
1543 int OffImm = getMemoryOpOffset(Op0);
1545 int Limit = (1 << 8) * Scale;
1546 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1550 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1552 AddSub = ARM_AM::sub;
1555 int Limit = (1 << 8) * Scale;
1556 if (OffImm >= Limit || (OffImm & (Scale-1)))
1558 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1560 EvenReg = Op0->getOperand(0).getReg();
1561 OddReg = Op1->getOperand(0).getReg();
1562 if (EvenReg == OddReg)
1564 BaseReg = Op0->getOperand(1).getReg();
1565 Pred = llvm::getInstrPredicate(Op0, PredReg);
1566 dl = Op0->getDebugLoc();
1571 struct OffsetCompare {
1572 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1573 int LOffset = getMemoryOpOffset(LHS);
1574 int ROffset = getMemoryOpOffset(RHS);
1575 assert(LHS == RHS || LOffset != ROffset);
1576 return LOffset > ROffset;
1581 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1582 SmallVector<MachineInstr*, 4> &Ops,
1583 unsigned Base, bool isLd,
1584 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1585 bool RetVal = false;
1587 // Sort by offset (in reverse order).
1588 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1590 // The loads / stores of the same base are in order. Scan them from first to
1591 // last and check for the following:
1592 // 1. Any def of base.
1594 while (Ops.size() > 1) {
1595 unsigned FirstLoc = ~0U;
1596 unsigned LastLoc = 0;
1597 MachineInstr *FirstOp = 0;
1598 MachineInstr *LastOp = 0;
1600 unsigned LastOpcode = 0;
1601 unsigned LastBytes = 0;
1602 unsigned NumMove = 0;
1603 for (int i = Ops.size() - 1; i >= 0; --i) {
1604 MachineInstr *Op = Ops[i];
1605 unsigned Loc = MI2LocMap[Op];
1606 if (Loc <= FirstLoc) {
1610 if (Loc >= LastLoc) {
1615 unsigned Opcode = Op->getOpcode();
1616 if (LastOpcode && Opcode != LastOpcode)
1619 int Offset = getMemoryOpOffset(Op);
1620 unsigned Bytes = getLSMultipleTransferSize(Op);
1622 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1625 LastOffset = Offset;
1627 LastOpcode = Opcode;
1628 if (++NumMove == 8) // FIXME: Tune this limit.
1635 SmallPtrSet<MachineInstr*, 4> MemOps;
1636 SmallSet<unsigned, 4> MemRegs;
1637 for (int i = NumMove-1; i >= 0; --i) {
1638 MemOps.insert(Ops[i]);
1639 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1642 // Be conservative, if the instructions are too far apart, don't
1643 // move them. We want to limit the increase of register pressure.
1644 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1646 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1647 MemOps, MemRegs, TRI);
1649 for (unsigned i = 0; i != NumMove; ++i)
1652 // This is the new location for the loads / stores.
1653 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1654 while (InsertPos != MBB->end()
1655 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1658 // If we are moving a pair of loads / stores, see if it makes sense
1659 // to try to allocate a pair of registers that can form register pairs.
1660 MachineInstr *Op0 = Ops.back();
1661 MachineInstr *Op1 = Ops[Ops.size()-2];
1662 unsigned EvenReg = 0, OddReg = 0;
1663 unsigned BaseReg = 0, PredReg = 0;
1664 ARMCC::CondCodes Pred = ARMCC::AL;
1666 unsigned NewOpc = 0;
1669 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1670 EvenReg, OddReg, BaseReg,
1671 Offset, PredReg, Pred, isT2)) {
1675 // Form the pair instruction.
1677 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1678 dl, TII->get(NewOpc))
1679 .addReg(EvenReg, RegState::Define)
1680 .addReg(OddReg, RegState::Define)
1682 // FIXME: We're converting from LDRi12 to an insn that still
1683 // uses addrmode2, so we need an explicit offset reg. It should
1684 // always by reg0 since we're transforming LDRi12s.
1687 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1690 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1691 dl, TII->get(NewOpc))
1695 // FIXME: We're converting from LDRi12 to an insn that still
1696 // uses addrmode2, so we need an explicit offset reg. It should
1697 // always by reg0 since we're transforming STRi12s.
1700 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1706 // Add register allocation hints to form register pairs.
1707 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1708 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1710 for (unsigned i = 0; i != NumMove; ++i) {
1711 MachineInstr *Op = Ops.back();
1713 MBB->splice(InsertPos, MBB, Op);
1717 NumLdStMoved += NumMove;
1727 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1728 bool RetVal = false;
1730 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1731 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1732 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1733 SmallVector<unsigned, 4> LdBases;
1734 SmallVector<unsigned, 4> StBases;
1737 MachineBasicBlock::iterator MBBI = MBB->begin();
1738 MachineBasicBlock::iterator E = MBB->end();
1740 for (; MBBI != E; ++MBBI) {
1741 MachineInstr *MI = MBBI;
1742 const TargetInstrDesc &TID = MI->getDesc();
1743 if (TID.isCall() || TID.isTerminator()) {
1744 // Stop at barriers.
1749 if (!MI->isDebugValue())
1750 MI2LocMap[MI] = ++Loc;
1752 if (!isMemoryOp(MI))
1754 unsigned PredReg = 0;
1755 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1758 int Opc = MI->getOpcode();
1759 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1760 unsigned Base = MI->getOperand(1).getReg();
1761 int Offset = getMemoryOpOffset(MI);
1763 bool StopHere = false;
1765 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1766 Base2LdsMap.find(Base);
1767 if (BI != Base2LdsMap.end()) {
1768 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1769 if (Offset == getMemoryOpOffset(BI->second[i])) {
1775 BI->second.push_back(MI);
1777 SmallVector<MachineInstr*, 4> MIs;
1779 Base2LdsMap[Base] = MIs;
1780 LdBases.push_back(Base);
1783 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1784 Base2StsMap.find(Base);
1785 if (BI != Base2StsMap.end()) {
1786 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1787 if (Offset == getMemoryOpOffset(BI->second[i])) {
1793 BI->second.push_back(MI);
1795 SmallVector<MachineInstr*, 4> MIs;
1797 Base2StsMap[Base] = MIs;
1798 StBases.push_back(Base);
1803 // Found a duplicate (a base+offset combination that's seen earlier).
1810 // Re-schedule loads.
1811 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1812 unsigned Base = LdBases[i];
1813 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1815 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1818 // Re-schedule stores.
1819 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1820 unsigned Base = StBases[i];
1821 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1823 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1827 Base2LdsMap.clear();
1828 Base2StsMap.clear();
1838 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1839 /// optimization pass.
1840 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1842 return new ARMPreAllocLoadStoreOpt();
1843 return new ARMLoadStoreOpt();