1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMAddressingModes.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMRegisterInfo.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/Target/TargetData.h"
30 #include "llvm/Target/TargetInstrInfo.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include "llvm/Target/TargetRegisterInfo.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/STLExtras.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallSet.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
50 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
51 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
52 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55 /// load / store instructions to form ldm / stm instructions.
58 struct ARMLoadStoreOpt : public MachineFunctionPass {
60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
62 const TargetInstrInfo *TII;
63 const TargetRegisterInfo *TRI;
68 virtual bool runOnMachineFunction(MachineFunction &Fn);
70 virtual const char *getPassName() const {
71 return "ARM load / store optimization pass";
75 struct MemOpQueueEntry {
80 MachineBasicBlock::iterator MBBI;
82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
83 MachineBasicBlock::iterator i)
84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
87 typedef MemOpQueue::iterator MemOpQueueIter;
89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
90 int Offset, unsigned Base, bool BaseKill, int Opcode,
91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
93 void MergeOpsUpdate(MachineBasicBlock &MBB,
102 ARMCC::CondCodes Pred,
106 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
108 int Opcode, unsigned Size,
109 ARMCC::CondCodes Pred, unsigned PredReg,
110 unsigned Scratch, MemOpQueue &MemOps,
111 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
115 MachineBasicBlock::iterator &MBBI);
116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
117 MachineBasicBlock::iterator MBBI,
118 const TargetInstrInfo *TII,
120 MachineBasicBlock::iterator &I);
121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
122 MachineBasicBlock::iterator MBBI,
124 MachineBasicBlock::iterator &I);
125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128 char ARMLoadStoreOpt::ID = 0;
131 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
133 default: llvm_unreachable("Unhandled opcode!");
137 default: llvm_unreachable("Unhandled submode!");
138 case ARM_AM::ia: return ARM::LDMIA;
139 case ARM_AM::da: return ARM::LDMDA;
140 case ARM_AM::db: return ARM::LDMDB;
141 case ARM_AM::ib: return ARM::LDMIB;
147 default: llvm_unreachable("Unhandled submode!");
148 case ARM_AM::ia: return ARM::STMIA;
149 case ARM_AM::da: return ARM::STMDA;
150 case ARM_AM::db: return ARM::STMDB;
151 case ARM_AM::ib: return ARM::STMIB;
158 default: llvm_unreachable("Unhandled submode!");
159 case ARM_AM::ia: return ARM::t2LDMIA;
160 case ARM_AM::db: return ARM::t2LDMDB;
167 default: llvm_unreachable("Unhandled submode!");
168 case ARM_AM::ia: return ARM::t2STMIA;
169 case ARM_AM::db: return ARM::t2STMDB;
175 default: llvm_unreachable("Unhandled submode!");
176 case ARM_AM::ia: return ARM::VLDMSIA;
177 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
183 default: llvm_unreachable("Unhandled submode!");
184 case ARM_AM::ia: return ARM::VSTMSIA;
185 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
191 default: llvm_unreachable("Unhandled submode!");
192 case ARM_AM::ia: return ARM::VLDMDIA;
193 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
199 default: llvm_unreachable("Unhandled submode!");
200 case ARM_AM::ia: return ARM::VSTMDIA;
201 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
212 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
214 default: llvm_unreachable("Unhandled opcode!");
220 case ARM::t2LDMIA_RET:
222 case ARM::t2LDMIA_UPD:
224 case ARM::t2STMIA_UPD:
226 case ARM::VLDMSIA_UPD:
228 case ARM::VSTMSIA_UPD:
230 case ARM::VLDMDIA_UPD:
232 case ARM::VSTMDIA_UPD:
246 case ARM::t2LDMDB_UPD:
248 case ARM::t2STMDB_UPD:
249 case ARM::VLDMSDB_UPD:
250 case ARM::VSTMSDB_UPD:
251 case ARM::VLDMDDB_UPD:
252 case ARM::VSTMDDB_UPD:
262 return ARM_AM::bad_am_submode;
265 } // end namespace ARM_AM
266 } // end namespace llvm
268 static bool isT2i32Load(unsigned Opc) {
269 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
272 static bool isi32Load(unsigned Opc) {
273 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
276 static bool isT2i32Store(unsigned Opc) {
277 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
280 static bool isi32Store(unsigned Opc) {
281 return Opc == ARM::STRi12 || isT2i32Store(Opc);
284 /// MergeOps - Create and insert a LDM or STM with Base as base register and
285 /// registers in Regs as the register operands that would be loaded / stored.
286 /// It returns true if the transformation is done.
288 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
289 MachineBasicBlock::iterator MBBI,
290 int Offset, unsigned Base, bool BaseKill,
291 int Opcode, ARMCC::CondCodes Pred,
292 unsigned PredReg, unsigned Scratch, DebugLoc dl,
293 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
294 // Only a single register to load / store. Don't bother.
295 unsigned NumRegs = Regs.size();
299 ARM_AM::AMSubMode Mode = ARM_AM::ia;
300 // VFP and Thumb2 do not support IB or DA modes.
301 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
302 bool haveIBAndDA = isNotVFP && !isThumb2;
303 if (Offset == 4 && haveIBAndDA)
305 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
307 else if (Offset == -4 * (int)NumRegs && isNotVFP)
308 // VLDM/VSTM do not support DB mode without also updating the base reg.
310 else if (Offset != 0) {
311 // Check if this is a supported opcode before we insert instructions to
312 // calculate a new base register.
313 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
315 // If starting offset isn't zero, insert a MI to materialize a new base.
316 // But only do so if it is cost effective, i.e. merging more than two
322 if (isi32Load(Opcode))
323 // If it is a load, then just use one of the destination register to
324 // use as the new base.
325 NewBase = Regs[NumRegs-1].first;
327 // Use the scratch register to use as a new base.
332 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
334 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
337 int ImmedOffset = isThumb2
338 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
339 if (ImmedOffset == -1)
340 // FIXME: Try t2ADDri12 or t2SUBri12?
341 return false; // Probably not worth it then.
343 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
344 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
345 .addImm(Pred).addReg(PredReg).addReg(0);
347 BaseKill = true; // New base is always killed right its use.
350 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
351 Opcode == ARM::VLDRD);
352 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
353 if (!Opcode) return false;
354 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
355 .addReg(Base, getKillRegState(BaseKill))
356 .addImm(Pred).addReg(PredReg);
357 for (unsigned i = 0; i != NumRegs; ++i)
358 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
359 | getKillRegState(Regs[i].second));
364 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
366 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
368 unsigned memOpsBegin, unsigned memOpsEnd,
369 unsigned insertAfter, int Offset,
370 unsigned Base, bool BaseKill,
372 ARMCC::CondCodes Pred, unsigned PredReg,
375 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
376 // First calculate which of the registers should be killed by the merged
378 const unsigned insertPos = memOps[insertAfter].Position;
379 SmallSet<unsigned, 4> KilledRegs;
380 DenseMap<unsigned, unsigned> Killer;
381 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
382 if (i == memOpsBegin) {
387 if (memOps[i].Position < insertPos && memOps[i].isKill) {
388 unsigned Reg = memOps[i].Reg;
389 KilledRegs.insert(Reg);
394 SmallVector<std::pair<unsigned, bool>, 8> Regs;
395 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
396 unsigned Reg = memOps[i].Reg;
397 // If we are inserting the merged operation after an operation that
398 // uses the same register, make sure to transfer any kill flag.
399 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
400 Regs.push_back(std::make_pair(Reg, isKill));
403 // Try to do the merge.
404 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
406 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
407 Pred, PredReg, Scratch, dl, Regs))
410 // Merge succeeded, update records.
411 Merges.push_back(prior(Loc));
412 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
413 // Remove kill flags from any memops that come before insertPos.
414 if (Regs[i-memOpsBegin].second) {
415 unsigned Reg = Regs[i-memOpsBegin].first;
416 if (KilledRegs.count(Reg)) {
417 unsigned j = Killer[Reg];
418 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
419 assert(Idx >= 0 && "Cannot find killing operand");
420 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
421 memOps[j].isKill = false;
423 memOps[i].isKill = true;
425 MBB.erase(memOps[i].MBBI);
426 // Update this memop to refer to the merged instruction.
427 // We may need to move kill flags again.
428 memOps[i].Merged = true;
429 memOps[i].MBBI = Merges.back();
430 memOps[i].Position = insertPos;
434 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
435 /// load / store multiple instructions.
437 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
438 unsigned Base, int Opcode, unsigned Size,
439 ARMCC::CondCodes Pred, unsigned PredReg,
440 unsigned Scratch, MemOpQueue &MemOps,
441 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
442 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
443 int Offset = MemOps[SIndex].Offset;
444 int SOffset = Offset;
445 unsigned insertAfter = SIndex;
446 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
447 DebugLoc dl = Loc->getDebugLoc();
448 const MachineOperand &PMO = Loc->getOperand(0);
449 unsigned PReg = PMO.getReg();
450 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
451 : getARMRegisterNumbering(PReg);
453 unsigned Limit = ~0U;
455 // vldm / vstm limit are 32 for S variants, 16 for D variants.
473 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
474 int NewOffset = MemOps[i].Offset;
475 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
476 unsigned Reg = MO.getReg();
477 unsigned RegNum = MO.isUndef() ? UINT_MAX
478 : getARMRegisterNumbering(Reg);
479 // Register numbers must be in ascending order. For VFP / NEON load and
480 // store multiples, the registers must also be consecutive and within the
481 // limit on the number of registers per instruction.
482 if (Reg != ARM::SP &&
483 NewOffset == Offset + (int)Size &&
484 ((isNotVFP && RegNum > PRegNum) ||
485 ((Count < Limit) && RegNum == PRegNum+1))) {
490 // Can't merge this in. Try merge the earlier ones first.
491 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
492 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
493 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
498 if (MemOps[i].Position > MemOps[insertAfter].Position)
502 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
503 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
504 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
508 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
509 unsigned Bytes, unsigned Limit,
510 ARMCC::CondCodes Pred, unsigned PredReg){
511 unsigned MyPredReg = 0;
514 if (MI->getOpcode() != ARM::t2SUBri &&
515 MI->getOpcode() != ARM::tSUBspi &&
516 MI->getOpcode() != ARM::SUBri)
519 // Make sure the offset fits in 8 bits.
520 if (Bytes == 0 || (Limit && Bytes >= Limit))
523 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
524 return (MI->getOperand(0).getReg() == Base &&
525 MI->getOperand(1).getReg() == Base &&
526 (MI->getOperand(2).getImm()*Scale) == Bytes &&
527 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
528 MyPredReg == PredReg);
531 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
532 unsigned Bytes, unsigned Limit,
533 ARMCC::CondCodes Pred, unsigned PredReg){
534 unsigned MyPredReg = 0;
537 if (MI->getOpcode() != ARM::t2ADDri &&
538 MI->getOpcode() != ARM::tADDspi &&
539 MI->getOpcode() != ARM::ADDri)
542 if (Bytes == 0 || (Limit && Bytes >= Limit))
543 // Make sure the offset fits in 8 bits.
546 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
547 return (MI->getOperand(0).getReg() == Base &&
548 MI->getOperand(1).getReg() == Base &&
549 (MI->getOperand(2).getImm()*Scale) == Bytes &&
550 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
551 MyPredReg == PredReg);
554 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
555 switch (MI->getOpcode()) {
583 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
586 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
590 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
591 ARM_AM::AMSubMode Mode) {
593 default: llvm_unreachable("Unhandled opcode!");
599 default: llvm_unreachable("Unhandled submode!");
600 case ARM_AM::ia: return ARM::LDMIA_UPD;
601 case ARM_AM::ib: return ARM::LDMIB_UPD;
602 case ARM_AM::da: return ARM::LDMDA_UPD;
603 case ARM_AM::db: return ARM::LDMDB_UPD;
611 default: llvm_unreachable("Unhandled submode!");
612 case ARM_AM::ia: return ARM::STMIA_UPD;
613 case ARM_AM::ib: return ARM::STMIB_UPD;
614 case ARM_AM::da: return ARM::STMDA_UPD;
615 case ARM_AM::db: return ARM::STMDB_UPD;
621 default: llvm_unreachable("Unhandled submode!");
622 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
623 case ARM_AM::db: return ARM::t2LDMDB_UPD;
629 default: llvm_unreachable("Unhandled submode!");
630 case ARM_AM::ia: return ARM::t2STMIA_UPD;
631 case ARM_AM::db: return ARM::t2STMDB_UPD;
636 default: llvm_unreachable("Unhandled submode!");
637 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
638 case ARM_AM::db: return ARM::VLDMSDB_UPD;
643 default: llvm_unreachable("Unhandled submode!");
644 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
645 case ARM_AM::db: return ARM::VLDMDDB_UPD;
650 default: llvm_unreachable("Unhandled submode!");
651 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
652 case ARM_AM::db: return ARM::VSTMSDB_UPD;
657 default: llvm_unreachable("Unhandled submode!");
658 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
659 case ARM_AM::db: return ARM::VSTMDDB_UPD;
667 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
668 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
670 /// stmia rn, <ra, rb, rc>
671 /// rn := rn + 4 * 3;
673 /// stmia rn!, <ra, rb, rc>
675 /// rn := rn - 4 * 3;
676 /// ldmia rn, <ra, rb, rc>
678 /// ldmdb rn!, <ra, rb, rc>
679 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
680 MachineBasicBlock::iterator MBBI,
682 MachineBasicBlock::iterator &I) {
683 MachineInstr *MI = MBBI;
684 unsigned Base = MI->getOperand(0).getReg();
685 bool BaseKill = MI->getOperand(0).isKill();
686 unsigned Bytes = getLSMultipleTransferSize(MI);
687 unsigned PredReg = 0;
688 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
689 int Opcode = MI->getOpcode();
690 DebugLoc dl = MI->getDebugLoc();
692 // Can't use an updating ld/st if the base register is also a dest
693 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
694 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
695 if (MI->getOperand(i).getReg() == Base)
698 bool DoMerge = false;
699 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
701 // Try merging with the previous instruction.
702 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
703 if (MBBI != BeginMBBI) {
704 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
705 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
707 if (Mode == ARM_AM::ia &&
708 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
711 } else if (Mode == ARM_AM::ib &&
712 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
720 // Try merging with the next instruction.
721 MachineBasicBlock::iterator EndMBBI = MBB.end();
722 if (!DoMerge && MBBI != EndMBBI) {
723 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
724 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
726 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
727 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
729 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
730 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
745 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
746 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
747 .addReg(Base, getDefRegState(true)) // WB base register
748 .addReg(Base, getKillRegState(BaseKill))
749 .addImm(Pred).addReg(PredReg);
751 // Transfer the rest of operands.
752 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
753 MIB.addOperand(MI->getOperand(OpNum));
755 // Transfer memoperands.
756 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
762 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
763 ARM_AM::AddrOpc Mode) {
770 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
772 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
774 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
776 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
779 return ARM::t2LDR_PRE;
782 return ARM::t2STR_PRE;
783 default: llvm_unreachable("Unhandled opcode!");
788 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
789 ARM_AM::AddrOpc Mode) {
792 return ARM::LDR_POST;
794 return ARM::STR_POST;
796 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
798 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
800 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
802 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
805 return ARM::t2LDR_POST;
808 return ARM::t2STR_POST;
809 default: llvm_unreachable("Unhandled opcode!");
814 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
815 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
816 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
817 MachineBasicBlock::iterator MBBI,
818 const TargetInstrInfo *TII,
820 MachineBasicBlock::iterator &I) {
821 MachineInstr *MI = MBBI;
822 unsigned Base = MI->getOperand(1).getReg();
823 bool BaseKill = MI->getOperand(1).isKill();
824 unsigned Bytes = getLSMultipleTransferSize(MI);
825 int Opcode = MI->getOpcode();
826 DebugLoc dl = MI->getDebugLoc();
827 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
828 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
829 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
830 if (isi32Load(Opcode) || isi32Store(Opcode))
831 if (MI->getOperand(2).getImm() != 0)
833 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
836 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
837 // Can't do the merge if the destination register is the same as the would-be
838 // writeback register.
839 if (isLd && MI->getOperand(0).getReg() == Base)
842 unsigned PredReg = 0;
843 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
844 bool DoMerge = false;
845 ARM_AM::AddrOpc AddSub = ARM_AM::add;
847 // AM2 - 12 bits, thumb2 - 8 bits.
848 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
850 // Try merging with the previous instruction.
851 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
852 if (MBBI != BeginMBBI) {
853 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
854 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
856 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
858 AddSub = ARM_AM::sub;
860 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
864 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
869 // Try merging with the next instruction.
870 MachineBasicBlock::iterator EndMBBI = MBB.end();
871 if (!DoMerge && MBBI != EndMBBI) {
872 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
873 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
876 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
878 AddSub = ARM_AM::sub;
879 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
883 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
897 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
899 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
902 // VLDM[SD}_UPD, VSTM[SD]_UPD
903 // (There are no base-updating versions of VLDR/VSTR instructions, but the
904 // updating load/store-multiple instructions can be used with only one
906 MachineOperand &MO = MI->getOperand(0);
907 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
908 .addReg(Base, getDefRegState(true)) // WB base register
909 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
910 .addImm(Pred).addReg(PredReg)
911 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
912 getKillRegState(MO.isKill())));
915 // LDR_PRE, LDR_POST,
916 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
917 .addReg(Base, RegState::Define)
918 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
920 // t2LDR_PRE, t2LDR_POST
921 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
922 .addReg(Base, RegState::Define)
923 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
925 MachineOperand &MO = MI->getOperand(0);
928 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
929 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
930 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
932 // t2STR_PRE, t2STR_POST
933 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
934 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
935 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
942 /// isMemoryOp - Returns true if instruction is a memory operation that this
943 /// pass is capable of operating on.
944 static bool isMemoryOp(const MachineInstr *MI) {
945 // When no memory operands are present, conservatively assume unaligned,
946 // volatile, unfoldable.
947 if (!MI->hasOneMemOperand())
950 const MachineMemOperand *MMO = *MI->memoperands_begin();
952 // Don't touch volatile memory accesses - we may be changing their order.
953 if (MMO->isVolatile())
956 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
958 if (MMO->getAlignment() < 4)
961 // str <undef> could probably be eliminated entirely, but for now we just want
962 // to avoid making a mess of it.
963 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
964 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
965 MI->getOperand(0).isUndef())
968 // Likewise don't mess with references to undefined addresses.
969 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
970 MI->getOperand(1).isUndef())
973 int Opcode = MI->getOpcode();
978 return MI->getOperand(1).isReg();
981 return MI->getOperand(1).isReg();
988 return MI->getOperand(1).isReg();
993 /// AdvanceRS - Advance register scavenger to just before the earliest memory
994 /// op that is being merged.
995 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
996 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
997 unsigned Position = MemOps[0].Position;
998 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
999 if (MemOps[i].Position < Position) {
1000 Position = MemOps[i].Position;
1001 Loc = MemOps[i].MBBI;
1005 if (Loc != MBB.begin())
1006 RS->forward(prior(Loc));
1009 static int getMemoryOpOffset(const MachineInstr *MI) {
1010 int Opcode = MI->getOpcode();
1011 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1012 unsigned NumOperands = MI->getDesc().getNumOperands();
1013 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1015 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1016 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1017 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1018 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1021 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1022 : ARM_AM::getAM5Offset(OffField) * 4;
1024 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1027 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1033 static void InsertLDR_STR(MachineBasicBlock &MBB,
1034 MachineBasicBlock::iterator &MBBI,
1035 int Offset, bool isDef,
1036 DebugLoc dl, unsigned NewOpc,
1037 unsigned Reg, bool RegDeadKill, bool RegUndef,
1038 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1039 bool OffKill, bool OffUndef,
1040 ARMCC::CondCodes Pred, unsigned PredReg,
1041 const TargetInstrInfo *TII, bool isT2) {
1043 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1045 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1046 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1047 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1049 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1051 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1052 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1053 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1057 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1058 MachineBasicBlock::iterator &MBBI) {
1059 MachineInstr *MI = &*MBBI;
1060 unsigned Opcode = MI->getOpcode();
1061 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1062 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1063 unsigned EvenReg = MI->getOperand(0).getReg();
1064 unsigned OddReg = MI->getOperand(1).getReg();
1065 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1066 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1067 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
1070 MachineBasicBlock::iterator NewBBI = MBBI;
1071 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1072 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1073 bool EvenDeadKill = isLd ?
1074 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1075 bool EvenUndef = MI->getOperand(0).isUndef();
1076 bool OddDeadKill = isLd ?
1077 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1078 bool OddUndef = MI->getOperand(1).isUndef();
1079 const MachineOperand &BaseOp = MI->getOperand(2);
1080 unsigned BaseReg = BaseOp.getReg();
1081 bool BaseKill = BaseOp.isKill();
1082 bool BaseUndef = BaseOp.isUndef();
1083 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1084 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1085 int OffImm = getMemoryOpOffset(MI);
1086 unsigned PredReg = 0;
1087 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1089 if (OddRegNum > EvenRegNum && OffImm == 0) {
1090 // Ascending register numbers and no offset. It's safe to change it to a
1092 unsigned NewOpc = (isLd)
1093 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1094 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1096 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1097 .addReg(BaseReg, getKillRegState(BaseKill))
1098 .addImm(Pred).addReg(PredReg)
1099 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1100 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1103 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1104 .addReg(BaseReg, getKillRegState(BaseKill))
1105 .addImm(Pred).addReg(PredReg)
1107 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1109 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1112 NewBBI = llvm::prior(MBBI);
1114 // Split into two instructions.
1115 unsigned NewOpc = (isLd)
1116 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1117 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1118 DebugLoc dl = MBBI->getDebugLoc();
1119 // If this is a load and base register is killed, it may have been
1120 // re-defed by the load, make sure the first load does not clobber it.
1122 (BaseKill || OffKill) &&
1123 (TRI->regsOverlap(EvenReg, BaseReg))) {
1124 assert(!TRI->regsOverlap(OddReg, BaseReg));
1125 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1126 OddReg, OddDeadKill, false,
1127 BaseReg, false, BaseUndef, false, OffUndef,
1128 Pred, PredReg, TII, isT2);
1129 NewBBI = llvm::prior(MBBI);
1130 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1131 EvenReg, EvenDeadKill, false,
1132 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1133 Pred, PredReg, TII, isT2);
1135 if (OddReg == EvenReg && EvenDeadKill) {
1136 // If the two source operands are the same, the kill marker is
1137 // probably on the first one. e.g.
1138 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1139 EvenDeadKill = false;
1142 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1143 EvenReg, EvenDeadKill, EvenUndef,
1144 BaseReg, false, BaseUndef, false, OffUndef,
1145 Pred, PredReg, TII, isT2);
1146 NewBBI = llvm::prior(MBBI);
1147 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1148 OddReg, OddDeadKill, OddUndef,
1149 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1150 Pred, PredReg, TII, isT2);
1165 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1166 /// ops of the same base and incrementing offset into LDM / STM ops.
1167 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1168 unsigned NumMerges = 0;
1169 unsigned NumMemOps = 0;
1171 unsigned CurrBase = 0;
1173 unsigned CurrSize = 0;
1174 ARMCC::CondCodes CurrPred = ARMCC::AL;
1175 unsigned CurrPredReg = 0;
1176 unsigned Position = 0;
1177 SmallVector<MachineBasicBlock::iterator,4> Merges;
1179 RS->enterBasicBlock(&MBB);
1180 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1182 if (FixInvalidRegPairOp(MBB, MBBI))
1185 bool Advance = false;
1186 bool TryMerge = false;
1187 bool Clobber = false;
1189 bool isMemOp = isMemoryOp(MBBI);
1191 int Opcode = MBBI->getOpcode();
1192 unsigned Size = getLSMultipleTransferSize(MBBI);
1193 const MachineOperand &MO = MBBI->getOperand(0);
1194 unsigned Reg = MO.getReg();
1195 bool isKill = MO.isDef() ? false : MO.isKill();
1196 unsigned Base = MBBI->getOperand(1).getReg();
1197 unsigned PredReg = 0;
1198 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1199 int Offset = getMemoryOpOffset(MBBI);
1202 // r5 := ldr [r5, #4]
1203 // r6 := ldr [r5, #8]
1205 // The second ldr has effectively broken the chain even though it
1206 // looks like the later ldr(s) use the same base register. Try to
1207 // merge the ldr's so far, including this one. But don't try to
1208 // combine the following ldr(s).
1209 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1210 if (CurrBase == 0 && !Clobber) {
1211 // Start of a new chain.
1216 CurrPredReg = PredReg;
1217 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1226 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1227 // No need to match PredReg.
1228 // Continue adding to the queue.
1229 if (Offset > MemOps.back().Offset) {
1230 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1235 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1237 if (Offset < I->Offset) {
1238 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1243 } else if (Offset == I->Offset) {
1244 // Collision! This can't be merged!
1253 if (MBBI->isDebugValue()) {
1256 // Reach the end of the block, try merging the memory instructions.
1258 } else if (Advance) {
1262 // Reach the end of the block, try merging the memory instructions.
1268 if (NumMemOps > 1) {
1269 // Try to find a free register to use as a new base in case it's needed.
1270 // First advance to the instruction just before the start of the chain.
1271 AdvanceRS(MBB, MemOps);
1272 // Find a scratch register.
1273 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1274 // Process the load / store instructions.
1275 RS->forward(prior(MBBI));
1279 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1280 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1282 // Try folding preceding/trailing base inc/dec into the generated
1284 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1285 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1287 NumMerges += Merges.size();
1289 // Try folding preceding/trailing base inc/dec into those load/store
1290 // that were not merged to form LDM/STM ops.
1291 for (unsigned i = 0; i != NumMemOps; ++i)
1292 if (!MemOps[i].Merged)
1293 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1296 // RS may be pointing to an instruction that's deleted.
1297 RS->skipTo(prior(MBBI));
1298 } else if (NumMemOps == 1) {
1299 // Try folding preceding/trailing base inc/dec into the single
1301 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1303 RS->forward(prior(MBBI));
1310 CurrPred = ARMCC::AL;
1317 // If iterator hasn't been advanced and this is not a memory op, skip it.
1318 // It can't start a new chain anyway.
1319 if (!Advance && !isMemOp && MBBI != E) {
1325 return NumMerges > 0;
1328 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1329 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1330 /// directly restore the value of LR into pc.
1331 /// ldmfd sp!, {..., lr}
1334 /// ldmfd sp!, {..., lr}
1337 /// ldmfd sp!, {..., pc}
1338 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1339 if (MBB.empty()) return false;
1341 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1342 if (MBBI != MBB.begin() &&
1343 (MBBI->getOpcode() == ARM::BX_RET ||
1344 MBBI->getOpcode() == ARM::tBX_RET ||
1345 MBBI->getOpcode() == ARM::MOVPCLR)) {
1346 MachineInstr *PrevMI = prior(MBBI);
1347 unsigned Opcode = PrevMI->getOpcode();
1348 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1349 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1350 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1351 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1352 if (MO.getReg() != ARM::LR)
1354 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1355 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1356 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1357 PrevMI->setDesc(TII->get(NewOpc));
1359 PrevMI->copyImplicitOps(&*MBBI);
1367 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1368 const TargetMachine &TM = Fn.getTarget();
1369 AFI = Fn.getInfo<ARMFunctionInfo>();
1370 TII = TM.getInstrInfo();
1371 TRI = TM.getRegisterInfo();
1372 RS = new RegScavenger();
1373 isThumb2 = AFI->isThumb2Function();
1375 bool Modified = false;
1376 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1378 MachineBasicBlock &MBB = *MFI;
1379 Modified |= LoadStoreMultipleOpti(MBB);
1380 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1381 Modified |= MergeReturnIntoLDM(MBB);
1389 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1390 /// load / stores from consecutive locations close to make it more
1391 /// likely they will be combined later.
1394 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1396 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1398 const TargetData *TD;
1399 const TargetInstrInfo *TII;
1400 const TargetRegisterInfo *TRI;
1401 const ARMSubtarget *STI;
1402 MachineRegisterInfo *MRI;
1403 MachineFunction *MF;
1405 virtual bool runOnMachineFunction(MachineFunction &Fn);
1407 virtual const char *getPassName() const {
1408 return "ARM pre- register allocation load / store optimization pass";
1412 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1413 unsigned &NewOpc, unsigned &EvenReg,
1414 unsigned &OddReg, unsigned &BaseReg,
1416 unsigned &PredReg, ARMCC::CondCodes &Pred,
1418 bool RescheduleOps(MachineBasicBlock *MBB,
1419 SmallVector<MachineInstr*, 4> &Ops,
1420 unsigned Base, bool isLd,
1421 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1422 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1424 char ARMPreAllocLoadStoreOpt::ID = 0;
1427 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1428 TD = Fn.getTarget().getTargetData();
1429 TII = Fn.getTarget().getInstrInfo();
1430 TRI = Fn.getTarget().getRegisterInfo();
1431 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1432 MRI = &Fn.getRegInfo();
1435 bool Modified = false;
1436 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1438 Modified |= RescheduleLoadStoreInstrs(MFI);
1443 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1444 MachineBasicBlock::iterator I,
1445 MachineBasicBlock::iterator E,
1446 SmallPtrSet<MachineInstr*, 4> &MemOps,
1447 SmallSet<unsigned, 4> &MemRegs,
1448 const TargetRegisterInfo *TRI) {
1449 // Are there stores / loads / calls between them?
1450 // FIXME: This is overly conservative. We should make use of alias information
1452 SmallSet<unsigned, 4> AddedRegPressure;
1454 if (I->isDebugValue() || MemOps.count(&*I))
1456 const MCInstrDesc &MCID = I->getDesc();
1457 if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
1459 if (isLd && MCID.mayStore())
1464 // It's not safe to move the first 'str' down.
1467 // str r4, [r0, #+4]
1468 if (MCID.mayStore())
1471 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1472 MachineOperand &MO = I->getOperand(j);
1475 unsigned Reg = MO.getReg();
1476 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1478 if (Reg != Base && !MemRegs.count(Reg))
1479 AddedRegPressure.insert(Reg);
1483 // Estimate register pressure increase due to the transformation.
1484 if (MemRegs.size() <= 4)
1485 // Ok if we are moving small number of instructions.
1487 return AddedRegPressure.size() <= MemRegs.size() * 2;
1491 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1493 unsigned &NewOpc, unsigned &EvenReg,
1494 unsigned &OddReg, unsigned &BaseReg,
1495 int &Offset, unsigned &PredReg,
1496 ARMCC::CondCodes &Pred,
1498 // Make sure we're allowed to generate LDRD/STRD.
1499 if (!STI->hasV5TEOps())
1502 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1504 unsigned Opcode = Op0->getOpcode();
1505 if (Opcode == ARM::LDRi12)
1507 else if (Opcode == ARM::STRi12)
1509 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1510 NewOpc = ARM::t2LDRDi8;
1513 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1514 NewOpc = ARM::t2STRDi8;
1520 // Make sure the base address satisfies i64 ld / st alignment requirement.
1521 if (!Op0->hasOneMemOperand() ||
1522 !(*Op0->memoperands_begin())->getValue() ||
1523 (*Op0->memoperands_begin())->isVolatile())
1526 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1527 const Function *Func = MF->getFunction();
1528 unsigned ReqAlign = STI->hasV6Ops()
1529 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1530 : 8; // Pre-v6 need 8-byte align
1531 if (Align < ReqAlign)
1534 // Then make sure the immediate offset fits.
1535 int OffImm = getMemoryOpOffset(Op0);
1537 int Limit = (1 << 8) * Scale;
1538 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1542 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1544 AddSub = ARM_AM::sub;
1547 int Limit = (1 << 8) * Scale;
1548 if (OffImm >= Limit || (OffImm & (Scale-1)))
1550 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1552 EvenReg = Op0->getOperand(0).getReg();
1553 OddReg = Op1->getOperand(0).getReg();
1554 if (EvenReg == OddReg)
1556 BaseReg = Op0->getOperand(1).getReg();
1557 Pred = llvm::getInstrPredicate(Op0, PredReg);
1558 dl = Op0->getDebugLoc();
1563 struct OffsetCompare {
1564 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1565 int LOffset = getMemoryOpOffset(LHS);
1566 int ROffset = getMemoryOpOffset(RHS);
1567 assert(LHS == RHS || LOffset != ROffset);
1568 return LOffset > ROffset;
1573 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1574 SmallVector<MachineInstr*, 4> &Ops,
1575 unsigned Base, bool isLd,
1576 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1577 bool RetVal = false;
1579 // Sort by offset (in reverse order).
1580 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1582 // The loads / stores of the same base are in order. Scan them from first to
1583 // last and check for the following:
1584 // 1. Any def of base.
1586 while (Ops.size() > 1) {
1587 unsigned FirstLoc = ~0U;
1588 unsigned LastLoc = 0;
1589 MachineInstr *FirstOp = 0;
1590 MachineInstr *LastOp = 0;
1592 unsigned LastOpcode = 0;
1593 unsigned LastBytes = 0;
1594 unsigned NumMove = 0;
1595 for (int i = Ops.size() - 1; i >= 0; --i) {
1596 MachineInstr *Op = Ops[i];
1597 unsigned Loc = MI2LocMap[Op];
1598 if (Loc <= FirstLoc) {
1602 if (Loc >= LastLoc) {
1607 unsigned Opcode = Op->getOpcode();
1608 if (LastOpcode && Opcode != LastOpcode)
1611 int Offset = getMemoryOpOffset(Op);
1612 unsigned Bytes = getLSMultipleTransferSize(Op);
1614 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1617 LastOffset = Offset;
1619 LastOpcode = Opcode;
1620 if (++NumMove == 8) // FIXME: Tune this limit.
1627 SmallPtrSet<MachineInstr*, 4> MemOps;
1628 SmallSet<unsigned, 4> MemRegs;
1629 for (int i = NumMove-1; i >= 0; --i) {
1630 MemOps.insert(Ops[i]);
1631 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1634 // Be conservative, if the instructions are too far apart, don't
1635 // move them. We want to limit the increase of register pressure.
1636 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1638 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1639 MemOps, MemRegs, TRI);
1641 for (unsigned i = 0; i != NumMove; ++i)
1644 // This is the new location for the loads / stores.
1645 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1646 while (InsertPos != MBB->end()
1647 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1650 // If we are moving a pair of loads / stores, see if it makes sense
1651 // to try to allocate a pair of registers that can form register pairs.
1652 MachineInstr *Op0 = Ops.back();
1653 MachineInstr *Op1 = Ops[Ops.size()-2];
1654 unsigned EvenReg = 0, OddReg = 0;
1655 unsigned BaseReg = 0, PredReg = 0;
1656 ARMCC::CondCodes Pred = ARMCC::AL;
1658 unsigned NewOpc = 0;
1661 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1662 EvenReg, OddReg, BaseReg,
1663 Offset, PredReg, Pred, isT2)) {
1667 const MCInstrDesc &MCID = TII->get(NewOpc);
1668 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
1669 MRI->constrainRegClass(EvenReg, TRC);
1670 MRI->constrainRegClass(OddReg, TRC);
1672 // Form the pair instruction.
1674 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1675 .addReg(EvenReg, RegState::Define)
1676 .addReg(OddReg, RegState::Define)
1678 // FIXME: We're converting from LDRi12 to an insn that still
1679 // uses addrmode2, so we need an explicit offset reg. It should
1680 // always by reg0 since we're transforming LDRi12s.
1683 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1686 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1690 // FIXME: We're converting from LDRi12 to an insn that still
1691 // uses addrmode2, so we need an explicit offset reg. It should
1692 // always by reg0 since we're transforming STRi12s.
1695 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1701 // Add register allocation hints to form register pairs.
1702 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1703 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1705 for (unsigned i = 0; i != NumMove; ++i) {
1706 MachineInstr *Op = Ops.back();
1708 MBB->splice(InsertPos, MBB, Op);
1712 NumLdStMoved += NumMove;
1722 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1723 bool RetVal = false;
1725 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1726 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1727 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1728 SmallVector<unsigned, 4> LdBases;
1729 SmallVector<unsigned, 4> StBases;
1732 MachineBasicBlock::iterator MBBI = MBB->begin();
1733 MachineBasicBlock::iterator E = MBB->end();
1735 for (; MBBI != E; ++MBBI) {
1736 MachineInstr *MI = MBBI;
1737 const MCInstrDesc &MCID = MI->getDesc();
1738 if (MCID.isCall() || MCID.isTerminator()) {
1739 // Stop at barriers.
1744 if (!MI->isDebugValue())
1745 MI2LocMap[MI] = ++Loc;
1747 if (!isMemoryOp(MI))
1749 unsigned PredReg = 0;
1750 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1753 int Opc = MI->getOpcode();
1754 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1755 unsigned Base = MI->getOperand(1).getReg();
1756 int Offset = getMemoryOpOffset(MI);
1758 bool StopHere = false;
1760 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1761 Base2LdsMap.find(Base);
1762 if (BI != Base2LdsMap.end()) {
1763 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1764 if (Offset == getMemoryOpOffset(BI->second[i])) {
1770 BI->second.push_back(MI);
1772 SmallVector<MachineInstr*, 4> MIs;
1774 Base2LdsMap[Base] = MIs;
1775 LdBases.push_back(Base);
1778 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1779 Base2StsMap.find(Base);
1780 if (BI != Base2StsMap.end()) {
1781 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1782 if (Offset == getMemoryOpOffset(BI->second[i])) {
1788 BI->second.push_back(MI);
1790 SmallVector<MachineInstr*, 4> MIs;
1792 Base2StsMap[Base] = MIs;
1793 StBases.push_back(Base);
1798 // Found a duplicate (a base+offset combination that's seen earlier).
1805 // Re-schedule loads.
1806 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1807 unsigned Base = LdBases[i];
1808 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1810 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1813 // Re-schedule stores.
1814 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1815 unsigned Base = StBases[i];
1816 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1818 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1822 Base2LdsMap.clear();
1823 Base2StsMap.clear();
1833 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1834 /// optimization pass.
1835 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1837 return new ARMPreAllocLoadStoreOpt();
1838 return new ARMLoadStoreOpt();