1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMRegisterInfo.h"
20 #include "MCTargetDesc/ARMAddressingModes.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/CodeGen/SelectionDAGNodes.h"
30 #include "llvm/Target/TargetData.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 #include "llvm/Target/TargetMachine.h"
33 #include "llvm/Target/TargetRegisterInfo.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/ADT/DenseMap.h"
36 #include "llvm/ADT/STLExtras.h"
37 #include "llvm/ADT/SmallPtrSet.h"
38 #include "llvm/ADT/SmallSet.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/ADT/Statistic.h"
43 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
44 STATISTIC(NumSTMGened , "Number of stm instructions generated");
45 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
46 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
47 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
48 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
49 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
50 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
51 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
52 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
53 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
55 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
56 /// load / store instructions to form ldm / stm instructions.
59 struct ARMLoadStoreOpt : public MachineFunctionPass {
61 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
63 const TargetInstrInfo *TII;
64 const TargetRegisterInfo *TRI;
65 const ARMSubtarget *STI;
70 virtual bool runOnMachineFunction(MachineFunction &Fn);
72 virtual const char *getPassName() const {
73 return "ARM load / store optimization pass";
77 struct MemOpQueueEntry {
82 MachineBasicBlock::iterator MBBI;
84 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
85 MachineBasicBlock::iterator i)
86 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
88 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
89 typedef MemOpQueue::iterator MemOpQueueIter;
91 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
92 int Offset, unsigned Base, bool BaseKill, int Opcode,
93 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
94 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
95 void MergeOpsUpdate(MachineBasicBlock &MBB,
104 ARMCC::CondCodes Pred,
108 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
109 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
110 int Opcode, unsigned Size,
111 ARMCC::CondCodes Pred, unsigned PredReg,
112 unsigned Scratch, MemOpQueue &MemOps,
113 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
115 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
116 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
117 MachineBasicBlock::iterator &MBBI);
118 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
119 MachineBasicBlock::iterator MBBI,
120 const TargetInstrInfo *TII,
122 MachineBasicBlock::iterator &I);
123 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
124 MachineBasicBlock::iterator MBBI,
126 MachineBasicBlock::iterator &I);
127 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
128 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
130 char ARMLoadStoreOpt::ID = 0;
133 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
135 default: llvm_unreachable("Unhandled opcode!");
139 default: llvm_unreachable("Unhandled submode!");
140 case ARM_AM::ia: return ARM::LDMIA;
141 case ARM_AM::da: return ARM::LDMDA;
142 case ARM_AM::db: return ARM::LDMDB;
143 case ARM_AM::ib: return ARM::LDMIB;
149 default: llvm_unreachable("Unhandled submode!");
150 case ARM_AM::ia: return ARM::STMIA;
151 case ARM_AM::da: return ARM::STMDA;
152 case ARM_AM::db: return ARM::STMDB;
153 case ARM_AM::ib: return ARM::STMIB;
160 default: llvm_unreachable("Unhandled submode!");
161 case ARM_AM::ia: return ARM::t2LDMIA;
162 case ARM_AM::db: return ARM::t2LDMDB;
169 default: llvm_unreachable("Unhandled submode!");
170 case ARM_AM::ia: return ARM::t2STMIA;
171 case ARM_AM::db: return ARM::t2STMDB;
177 default: llvm_unreachable("Unhandled submode!");
178 case ARM_AM::ia: return ARM::VLDMSIA;
179 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
185 default: llvm_unreachable("Unhandled submode!");
186 case ARM_AM::ia: return ARM::VSTMSIA;
187 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
193 default: llvm_unreachable("Unhandled submode!");
194 case ARM_AM::ia: return ARM::VLDMDIA;
195 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
201 default: llvm_unreachable("Unhandled submode!");
202 case ARM_AM::ia: return ARM::VSTMDIA;
203 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
214 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
216 default: llvm_unreachable("Unhandled opcode!");
222 case ARM::t2LDMIA_RET:
224 case ARM::t2LDMIA_UPD:
226 case ARM::t2STMIA_UPD:
228 case ARM::VLDMSIA_UPD:
230 case ARM::VSTMSIA_UPD:
232 case ARM::VLDMDIA_UPD:
234 case ARM::VSTMDIA_UPD:
248 case ARM::t2LDMDB_UPD:
250 case ARM::t2STMDB_UPD:
251 case ARM::VLDMSDB_UPD:
252 case ARM::VSTMSDB_UPD:
253 case ARM::VLDMDDB_UPD:
254 case ARM::VSTMDDB_UPD:
264 return ARM_AM::bad_am_submode;
267 } // end namespace ARM_AM
268 } // end namespace llvm
270 static bool isT2i32Load(unsigned Opc) {
271 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
274 static bool isi32Load(unsigned Opc) {
275 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
278 static bool isT2i32Store(unsigned Opc) {
279 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
282 static bool isi32Store(unsigned Opc) {
283 return Opc == ARM::STRi12 || isT2i32Store(Opc);
286 /// MergeOps - Create and insert a LDM or STM with Base as base register and
287 /// registers in Regs as the register operands that would be loaded / stored.
288 /// It returns true if the transformation is done.
290 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
291 MachineBasicBlock::iterator MBBI,
292 int Offset, unsigned Base, bool BaseKill,
293 int Opcode, ARMCC::CondCodes Pred,
294 unsigned PredReg, unsigned Scratch, DebugLoc dl,
295 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
296 // Only a single register to load / store. Don't bother.
297 unsigned NumRegs = Regs.size();
301 ARM_AM::AMSubMode Mode = ARM_AM::ia;
302 // VFP and Thumb2 do not support IB or DA modes.
303 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
304 bool haveIBAndDA = isNotVFP && !isThumb2;
305 if (Offset == 4 && haveIBAndDA)
307 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
309 else if (Offset == -4 * (int)NumRegs && isNotVFP)
310 // VLDM/VSTM do not support DB mode without also updating the base reg.
312 else if (Offset != 0) {
313 // Check if this is a supported opcode before we insert instructions to
314 // calculate a new base register.
315 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
317 // If starting offset isn't zero, insert a MI to materialize a new base.
318 // But only do so if it is cost effective, i.e. merging more than two
324 if (isi32Load(Opcode))
325 // If it is a load, then just use one of the destination register to
326 // use as the new base.
327 NewBase = Regs[NumRegs-1].first;
329 // Use the scratch register to use as a new base.
334 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
336 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
339 int ImmedOffset = isThumb2
340 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
341 if (ImmedOffset == -1)
342 // FIXME: Try t2ADDri12 or t2SUBri12?
343 return false; // Probably not worth it then.
345 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
346 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
347 .addImm(Pred).addReg(PredReg).addReg(0);
349 BaseKill = true; // New base is always killed right its use.
352 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
353 Opcode == ARM::VLDRD);
354 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
355 if (!Opcode) return false;
356 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
357 .addReg(Base, getKillRegState(BaseKill))
358 .addImm(Pred).addReg(PredReg);
359 for (unsigned i = 0; i != NumRegs; ++i)
360 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
361 | getKillRegState(Regs[i].second));
366 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
368 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
370 unsigned memOpsBegin, unsigned memOpsEnd,
371 unsigned insertAfter, int Offset,
372 unsigned Base, bool BaseKill,
374 ARMCC::CondCodes Pred, unsigned PredReg,
377 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
378 // First calculate which of the registers should be killed by the merged
380 const unsigned insertPos = memOps[insertAfter].Position;
381 SmallSet<unsigned, 4> KilledRegs;
382 DenseMap<unsigned, unsigned> Killer;
383 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
384 if (i == memOpsBegin) {
389 if (memOps[i].Position < insertPos && memOps[i].isKill) {
390 unsigned Reg = memOps[i].Reg;
391 KilledRegs.insert(Reg);
396 SmallVector<std::pair<unsigned, bool>, 8> Regs;
397 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
398 unsigned Reg = memOps[i].Reg;
399 // If we are inserting the merged operation after an operation that
400 // uses the same register, make sure to transfer any kill flag.
401 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
402 Regs.push_back(std::make_pair(Reg, isKill));
405 // Try to do the merge.
406 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
408 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
409 Pred, PredReg, Scratch, dl, Regs))
412 // Merge succeeded, update records.
413 Merges.push_back(prior(Loc));
414 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
415 // Remove kill flags from any memops that come before insertPos.
416 if (Regs[i-memOpsBegin].second) {
417 unsigned Reg = Regs[i-memOpsBegin].first;
418 if (KilledRegs.count(Reg)) {
419 unsigned j = Killer[Reg];
420 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
421 assert(Idx >= 0 && "Cannot find killing operand");
422 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
423 memOps[j].isKill = false;
425 memOps[i].isKill = true;
427 MBB.erase(memOps[i].MBBI);
428 // Update this memop to refer to the merged instruction.
429 // We may need to move kill flags again.
430 memOps[i].Merged = true;
431 memOps[i].MBBI = Merges.back();
432 memOps[i].Position = insertPos;
436 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
437 /// load / store multiple instructions.
439 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
440 unsigned Base, int Opcode, unsigned Size,
441 ARMCC::CondCodes Pred, unsigned PredReg,
442 unsigned Scratch, MemOpQueue &MemOps,
443 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
444 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
445 int Offset = MemOps[SIndex].Offset;
446 int SOffset = Offset;
447 unsigned insertAfter = SIndex;
448 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
449 DebugLoc dl = Loc->getDebugLoc();
450 const MachineOperand &PMO = Loc->getOperand(0);
451 unsigned PReg = PMO.getReg();
452 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
453 : getARMRegisterNumbering(PReg);
455 unsigned Limit = ~0U;
457 // vldm / vstm limit are 32 for S variants, 16 for D variants.
475 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
476 int NewOffset = MemOps[i].Offset;
477 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
478 unsigned Reg = MO.getReg();
479 unsigned RegNum = MO.isUndef() ? UINT_MAX
480 : getARMRegisterNumbering(Reg);
481 // Register numbers must be in ascending order. For VFP / NEON load and
482 // store multiples, the registers must also be consecutive and within the
483 // limit on the number of registers per instruction.
484 if (Reg != ARM::SP &&
485 NewOffset == Offset + (int)Size &&
486 ((isNotVFP && RegNum > PRegNum) ||
487 ((Count < Limit) && RegNum == PRegNum+1))) {
492 // Can't merge this in. Try merge the earlier ones first.
493 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
494 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
495 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
500 if (MemOps[i].Position > MemOps[insertAfter].Position)
504 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
505 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
506 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
510 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
511 unsigned Bytes, unsigned Limit,
512 ARMCC::CondCodes Pred, unsigned PredReg){
513 unsigned MyPredReg = 0;
516 if (MI->getOpcode() != ARM::t2SUBri &&
517 MI->getOpcode() != ARM::tSUBspi &&
518 MI->getOpcode() != ARM::SUBri)
521 // Make sure the offset fits in 8 bits.
522 if (Bytes == 0 || (Limit && Bytes >= Limit))
525 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
526 return (MI->getOperand(0).getReg() == Base &&
527 MI->getOperand(1).getReg() == Base &&
528 (MI->getOperand(2).getImm()*Scale) == Bytes &&
529 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
530 MyPredReg == PredReg);
533 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
534 unsigned Bytes, unsigned Limit,
535 ARMCC::CondCodes Pred, unsigned PredReg){
536 unsigned MyPredReg = 0;
539 if (MI->getOpcode() != ARM::t2ADDri &&
540 MI->getOpcode() != ARM::tADDspi &&
541 MI->getOpcode() != ARM::ADDri)
544 if (Bytes == 0 || (Limit && Bytes >= Limit))
545 // Make sure the offset fits in 8 bits.
548 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
549 return (MI->getOperand(0).getReg() == Base &&
550 MI->getOperand(1).getReg() == Base &&
551 (MI->getOperand(2).getImm()*Scale) == Bytes &&
552 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
553 MyPredReg == PredReg);
556 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
557 switch (MI->getOpcode()) {
585 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
588 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
592 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
593 ARM_AM::AMSubMode Mode) {
595 default: llvm_unreachable("Unhandled opcode!");
601 default: llvm_unreachable("Unhandled submode!");
602 case ARM_AM::ia: return ARM::LDMIA_UPD;
603 case ARM_AM::ib: return ARM::LDMIB_UPD;
604 case ARM_AM::da: return ARM::LDMDA_UPD;
605 case ARM_AM::db: return ARM::LDMDB_UPD;
613 default: llvm_unreachable("Unhandled submode!");
614 case ARM_AM::ia: return ARM::STMIA_UPD;
615 case ARM_AM::ib: return ARM::STMIB_UPD;
616 case ARM_AM::da: return ARM::STMDA_UPD;
617 case ARM_AM::db: return ARM::STMDB_UPD;
623 default: llvm_unreachable("Unhandled submode!");
624 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
625 case ARM_AM::db: return ARM::t2LDMDB_UPD;
631 default: llvm_unreachable("Unhandled submode!");
632 case ARM_AM::ia: return ARM::t2STMIA_UPD;
633 case ARM_AM::db: return ARM::t2STMDB_UPD;
638 default: llvm_unreachable("Unhandled submode!");
639 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
640 case ARM_AM::db: return ARM::VLDMSDB_UPD;
645 default: llvm_unreachable("Unhandled submode!");
646 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
647 case ARM_AM::db: return ARM::VLDMDDB_UPD;
652 default: llvm_unreachable("Unhandled submode!");
653 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
654 case ARM_AM::db: return ARM::VSTMSDB_UPD;
659 default: llvm_unreachable("Unhandled submode!");
660 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
661 case ARM_AM::db: return ARM::VSTMDDB_UPD;
669 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
670 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
672 /// stmia rn, <ra, rb, rc>
673 /// rn := rn + 4 * 3;
675 /// stmia rn!, <ra, rb, rc>
677 /// rn := rn - 4 * 3;
678 /// ldmia rn, <ra, rb, rc>
680 /// ldmdb rn!, <ra, rb, rc>
681 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
682 MachineBasicBlock::iterator MBBI,
684 MachineBasicBlock::iterator &I) {
685 MachineInstr *MI = MBBI;
686 unsigned Base = MI->getOperand(0).getReg();
687 bool BaseKill = MI->getOperand(0).isKill();
688 unsigned Bytes = getLSMultipleTransferSize(MI);
689 unsigned PredReg = 0;
690 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
691 int Opcode = MI->getOpcode();
692 DebugLoc dl = MI->getDebugLoc();
694 // Can't use an updating ld/st if the base register is also a dest
695 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
696 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
697 if (MI->getOperand(i).getReg() == Base)
700 bool DoMerge = false;
701 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
703 // Try merging with the previous instruction.
704 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
705 if (MBBI != BeginMBBI) {
706 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
707 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
709 if (Mode == ARM_AM::ia &&
710 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
713 } else if (Mode == ARM_AM::ib &&
714 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
722 // Try merging with the next instruction.
723 MachineBasicBlock::iterator EndMBBI = MBB.end();
724 if (!DoMerge && MBBI != EndMBBI) {
725 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
726 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
728 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
729 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
731 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
732 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
747 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
748 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
749 .addReg(Base, getDefRegState(true)) // WB base register
750 .addReg(Base, getKillRegState(BaseKill))
751 .addImm(Pred).addReg(PredReg);
753 // Transfer the rest of operands.
754 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
755 MIB.addOperand(MI->getOperand(OpNum));
757 // Transfer memoperands.
758 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
764 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
765 ARM_AM::AddrOpc Mode) {
768 return ARM::LDR_PRE_IMM;
770 return ARM::STR_PRE_IMM;
772 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
774 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
776 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
778 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
781 return ARM::t2LDR_PRE;
784 return ARM::t2STR_PRE;
785 default: llvm_unreachable("Unhandled opcode!");
790 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
791 ARM_AM::AddrOpc Mode) {
794 return ARM::LDR_POST_IMM;
796 return ARM::STR_POST_IMM;
798 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
800 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
802 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
804 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
807 return ARM::t2LDR_POST;
810 return ARM::t2STR_POST;
811 default: llvm_unreachable("Unhandled opcode!");
816 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
817 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
818 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
819 MachineBasicBlock::iterator MBBI,
820 const TargetInstrInfo *TII,
822 MachineBasicBlock::iterator &I) {
823 MachineInstr *MI = MBBI;
824 unsigned Base = MI->getOperand(1).getReg();
825 bool BaseKill = MI->getOperand(1).isKill();
826 unsigned Bytes = getLSMultipleTransferSize(MI);
827 int Opcode = MI->getOpcode();
828 DebugLoc dl = MI->getDebugLoc();
829 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
830 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
831 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
832 if (isi32Load(Opcode) || isi32Store(Opcode))
833 if (MI->getOperand(2).getImm() != 0)
835 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
838 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
839 // Can't do the merge if the destination register is the same as the would-be
840 // writeback register.
841 if (isLd && MI->getOperand(0).getReg() == Base)
844 unsigned PredReg = 0;
845 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
846 bool DoMerge = false;
847 ARM_AM::AddrOpc AddSub = ARM_AM::add;
849 // AM2 - 12 bits, thumb2 - 8 bits.
850 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
852 // Try merging with the previous instruction.
853 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
854 if (MBBI != BeginMBBI) {
855 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
856 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
858 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
860 AddSub = ARM_AM::sub;
862 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
866 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
871 // Try merging with the next instruction.
872 MachineBasicBlock::iterator EndMBBI = MBB.end();
873 if (!DoMerge && MBBI != EndMBBI) {
874 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
875 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
878 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
880 AddSub = ARM_AM::sub;
881 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
885 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
898 // VLDM[SD}_UPD, VSTM[SD]_UPD
899 // (There are no base-updating versions of VLDR/VSTR instructions, but the
900 // updating load/store-multiple instructions can be used with only one
902 MachineOperand &MO = MI->getOperand(0);
903 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
904 .addReg(Base, getDefRegState(true)) // WB base register
905 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
906 .addImm(Pred).addReg(PredReg)
907 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
908 getKillRegState(MO.isKill())));
912 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
913 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
914 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
915 .addReg(Base, RegState::Define)
916 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
918 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
919 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
920 .addReg(Base, RegState::Define)
921 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
924 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
925 // t2LDR_PRE, t2LDR_POST
926 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
927 .addReg(Base, RegState::Define)
928 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
931 MachineOperand &MO = MI->getOperand(0);
932 // FIXME: post-indexed stores use am2offset_imm, which still encodes
933 // the vestigal zero-reg offset register. When that's fixed, this clause
934 // can be removed entirely.
935 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
936 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
938 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
939 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
940 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
942 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
943 // t2STR_PRE, t2STR_POST
944 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
945 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
946 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
954 /// isMemoryOp - Returns true if instruction is a memory operation that this
955 /// pass is capable of operating on.
956 static bool isMemoryOp(const MachineInstr *MI) {
957 // When no memory operands are present, conservatively assume unaligned,
958 // volatile, unfoldable.
959 if (!MI->hasOneMemOperand())
962 const MachineMemOperand *MMO = *MI->memoperands_begin();
964 // Don't touch volatile memory accesses - we may be changing their order.
965 if (MMO->isVolatile())
968 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
970 if (MMO->getAlignment() < 4)
973 // str <undef> could probably be eliminated entirely, but for now we just want
974 // to avoid making a mess of it.
975 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
976 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
977 MI->getOperand(0).isUndef())
980 // Likewise don't mess with references to undefined addresses.
981 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
982 MI->getOperand(1).isUndef())
985 int Opcode = MI->getOpcode();
990 return MI->getOperand(1).isReg();
993 return MI->getOperand(1).isReg();
1000 return MI->getOperand(1).isReg();
1005 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1006 /// op that is being merged.
1007 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1008 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1009 unsigned Position = MemOps[0].Position;
1010 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1011 if (MemOps[i].Position < Position) {
1012 Position = MemOps[i].Position;
1013 Loc = MemOps[i].MBBI;
1017 if (Loc != MBB.begin())
1018 RS->forward(prior(Loc));
1021 static int getMemoryOpOffset(const MachineInstr *MI) {
1022 int Opcode = MI->getOpcode();
1023 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1024 unsigned NumOperands = MI->getDesc().getNumOperands();
1025 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1027 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1028 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1029 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1030 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1033 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1034 : ARM_AM::getAM5Offset(OffField) * 4;
1036 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1039 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1045 static void InsertLDR_STR(MachineBasicBlock &MBB,
1046 MachineBasicBlock::iterator &MBBI,
1047 int Offset, bool isDef,
1048 DebugLoc dl, unsigned NewOpc,
1049 unsigned Reg, bool RegDeadKill, bool RegUndef,
1050 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1051 bool OffKill, bool OffUndef,
1052 ARMCC::CondCodes Pred, unsigned PredReg,
1053 const TargetInstrInfo *TII, bool isT2) {
1055 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1057 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1058 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1059 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1061 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1063 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1064 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1065 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1069 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1070 MachineBasicBlock::iterator &MBBI) {
1071 MachineInstr *MI = &*MBBI;
1072 unsigned Opcode = MI->getOpcode();
1073 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1074 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1075 const MachineOperand &BaseOp = MI->getOperand(2);
1076 unsigned BaseReg = BaseOp.getReg();
1077 unsigned EvenReg = MI->getOperand(0).getReg();
1078 unsigned OddReg = MI->getOperand(1).getReg();
1079 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1080 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1081 // ARM errata 602117: LDRD with base in list may result in incorrect base
1082 // register when interrupted or faulted.
1083 bool Errata602117 = EvenReg == BaseReg && STI->getCPUString() == "cortex-m3";
1084 if (!Errata602117 &&
1085 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1088 MachineBasicBlock::iterator NewBBI = MBBI;
1089 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1090 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1091 bool EvenDeadKill = isLd ?
1092 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1093 bool EvenUndef = MI->getOperand(0).isUndef();
1094 bool OddDeadKill = isLd ?
1095 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1096 bool OddUndef = MI->getOperand(1).isUndef();
1097 bool BaseKill = BaseOp.isKill();
1098 bool BaseUndef = BaseOp.isUndef();
1099 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1100 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1101 int OffImm = getMemoryOpOffset(MI);
1102 unsigned PredReg = 0;
1103 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1105 if (OddRegNum > EvenRegNum && OffImm == 0) {
1106 // Ascending register numbers and no offset. It's safe to change it to a
1108 unsigned NewOpc = (isLd)
1109 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1110 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1112 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1113 .addReg(BaseReg, getKillRegState(BaseKill))
1114 .addImm(Pred).addReg(PredReg)
1115 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1116 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1119 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1120 .addReg(BaseReg, getKillRegState(BaseKill))
1121 .addImm(Pred).addReg(PredReg)
1123 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1125 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1128 NewBBI = llvm::prior(MBBI);
1130 // Split into two instructions.
1131 unsigned NewOpc = (isLd)
1132 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1133 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1134 DebugLoc dl = MBBI->getDebugLoc();
1135 // If this is a load and base register is killed, it may have been
1136 // re-defed by the load, make sure the first load does not clobber it.
1138 (BaseKill || OffKill) &&
1139 (TRI->regsOverlap(EvenReg, BaseReg))) {
1140 assert(!TRI->regsOverlap(OddReg, BaseReg));
1141 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1142 OddReg, OddDeadKill, false,
1143 BaseReg, false, BaseUndef, false, OffUndef,
1144 Pred, PredReg, TII, isT2);
1145 NewBBI = llvm::prior(MBBI);
1146 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1147 EvenReg, EvenDeadKill, false,
1148 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1149 Pred, PredReg, TII, isT2);
1151 if (OddReg == EvenReg && EvenDeadKill) {
1152 // If the two source operands are the same, the kill marker is
1153 // probably on the first one. e.g.
1154 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1155 EvenDeadKill = false;
1158 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1159 EvenReg, EvenDeadKill, EvenUndef,
1160 BaseReg, false, BaseUndef, false, OffUndef,
1161 Pred, PredReg, TII, isT2);
1162 NewBBI = llvm::prior(MBBI);
1163 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1164 OddReg, OddDeadKill, OddUndef,
1165 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1166 Pred, PredReg, TII, isT2);
1181 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1182 /// ops of the same base and incrementing offset into LDM / STM ops.
1183 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1184 unsigned NumMerges = 0;
1185 unsigned NumMemOps = 0;
1187 unsigned CurrBase = 0;
1189 unsigned CurrSize = 0;
1190 ARMCC::CondCodes CurrPred = ARMCC::AL;
1191 unsigned CurrPredReg = 0;
1192 unsigned Position = 0;
1193 SmallVector<MachineBasicBlock::iterator,4> Merges;
1195 RS->enterBasicBlock(&MBB);
1196 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1198 if (FixInvalidRegPairOp(MBB, MBBI))
1201 bool Advance = false;
1202 bool TryMerge = false;
1203 bool Clobber = false;
1205 bool isMemOp = isMemoryOp(MBBI);
1207 int Opcode = MBBI->getOpcode();
1208 unsigned Size = getLSMultipleTransferSize(MBBI);
1209 const MachineOperand &MO = MBBI->getOperand(0);
1210 unsigned Reg = MO.getReg();
1211 bool isKill = MO.isDef() ? false : MO.isKill();
1212 unsigned Base = MBBI->getOperand(1).getReg();
1213 unsigned PredReg = 0;
1214 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1215 int Offset = getMemoryOpOffset(MBBI);
1218 // r5 := ldr [r5, #4]
1219 // r6 := ldr [r5, #8]
1221 // The second ldr has effectively broken the chain even though it
1222 // looks like the later ldr(s) use the same base register. Try to
1223 // merge the ldr's so far, including this one. But don't try to
1224 // combine the following ldr(s).
1225 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1226 if (CurrBase == 0 && !Clobber) {
1227 // Start of a new chain.
1232 CurrPredReg = PredReg;
1233 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1242 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1243 // No need to match PredReg.
1244 // Continue adding to the queue.
1245 if (Offset > MemOps.back().Offset) {
1246 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1251 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1253 if (Offset < I->Offset) {
1254 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1259 } else if (Offset == I->Offset) {
1260 // Collision! This can't be merged!
1269 if (MBBI->isDebugValue()) {
1272 // Reach the end of the block, try merging the memory instructions.
1274 } else if (Advance) {
1278 // Reach the end of the block, try merging the memory instructions.
1284 if (NumMemOps > 1) {
1285 // Try to find a free register to use as a new base in case it's needed.
1286 // First advance to the instruction just before the start of the chain.
1287 AdvanceRS(MBB, MemOps);
1288 // Find a scratch register.
1289 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1290 // Process the load / store instructions.
1291 RS->forward(prior(MBBI));
1295 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1296 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1298 // Try folding preceding/trailing base inc/dec into the generated
1300 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1301 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1303 NumMerges += Merges.size();
1305 // Try folding preceding/trailing base inc/dec into those load/store
1306 // that were not merged to form LDM/STM ops.
1307 for (unsigned i = 0; i != NumMemOps; ++i)
1308 if (!MemOps[i].Merged)
1309 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1312 // RS may be pointing to an instruction that's deleted.
1313 RS->skipTo(prior(MBBI));
1314 } else if (NumMemOps == 1) {
1315 // Try folding preceding/trailing base inc/dec into the single
1317 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1319 RS->forward(prior(MBBI));
1326 CurrPred = ARMCC::AL;
1333 // If iterator hasn't been advanced and this is not a memory op, skip it.
1334 // It can't start a new chain anyway.
1335 if (!Advance && !isMemOp && MBBI != E) {
1341 return NumMerges > 0;
1344 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1345 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1346 /// directly restore the value of LR into pc.
1347 /// ldmfd sp!, {..., lr}
1350 /// ldmfd sp!, {..., lr}
1353 /// ldmfd sp!, {..., pc}
1354 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1355 if (MBB.empty()) return false;
1357 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1358 if (MBBI != MBB.begin() &&
1359 (MBBI->getOpcode() == ARM::BX_RET ||
1360 MBBI->getOpcode() == ARM::tBX_RET ||
1361 MBBI->getOpcode() == ARM::MOVPCLR)) {
1362 MachineInstr *PrevMI = prior(MBBI);
1363 unsigned Opcode = PrevMI->getOpcode();
1364 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1365 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1366 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1367 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1368 if (MO.getReg() != ARM::LR)
1370 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1371 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1372 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1373 PrevMI->setDesc(TII->get(NewOpc));
1375 PrevMI->copyImplicitOps(&*MBBI);
1383 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1384 const TargetMachine &TM = Fn.getTarget();
1385 AFI = Fn.getInfo<ARMFunctionInfo>();
1386 TII = TM.getInstrInfo();
1387 TRI = TM.getRegisterInfo();
1388 STI = &TM.getSubtarget<ARMSubtarget>();
1389 RS = new RegScavenger();
1390 isThumb2 = AFI->isThumb2Function();
1392 bool Modified = false;
1393 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1395 MachineBasicBlock &MBB = *MFI;
1396 Modified |= LoadStoreMultipleOpti(MBB);
1397 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1398 Modified |= MergeReturnIntoLDM(MBB);
1406 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1407 /// load / stores from consecutive locations close to make it more
1408 /// likely they will be combined later.
1411 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1413 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1415 const TargetData *TD;
1416 const TargetInstrInfo *TII;
1417 const TargetRegisterInfo *TRI;
1418 const ARMSubtarget *STI;
1419 MachineRegisterInfo *MRI;
1420 MachineFunction *MF;
1422 virtual bool runOnMachineFunction(MachineFunction &Fn);
1424 virtual const char *getPassName() const {
1425 return "ARM pre- register allocation load / store optimization pass";
1429 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1430 unsigned &NewOpc, unsigned &EvenReg,
1431 unsigned &OddReg, unsigned &BaseReg,
1433 unsigned &PredReg, ARMCC::CondCodes &Pred,
1435 bool RescheduleOps(MachineBasicBlock *MBB,
1436 SmallVector<MachineInstr*, 4> &Ops,
1437 unsigned Base, bool isLd,
1438 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1439 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1441 char ARMPreAllocLoadStoreOpt::ID = 0;
1444 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1445 TD = Fn.getTarget().getTargetData();
1446 TII = Fn.getTarget().getInstrInfo();
1447 TRI = Fn.getTarget().getRegisterInfo();
1448 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1449 MRI = &Fn.getRegInfo();
1452 bool Modified = false;
1453 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1455 Modified |= RescheduleLoadStoreInstrs(MFI);
1460 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1461 MachineBasicBlock::iterator I,
1462 MachineBasicBlock::iterator E,
1463 SmallPtrSet<MachineInstr*, 4> &MemOps,
1464 SmallSet<unsigned, 4> &MemRegs,
1465 const TargetRegisterInfo *TRI) {
1466 // Are there stores / loads / calls between them?
1467 // FIXME: This is overly conservative. We should make use of alias information
1469 SmallSet<unsigned, 4> AddedRegPressure;
1471 if (I->isDebugValue() || MemOps.count(&*I))
1473 const MCInstrDesc &MCID = I->getDesc();
1474 if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
1476 if (isLd && MCID.mayStore())
1481 // It's not safe to move the first 'str' down.
1484 // str r4, [r0, #+4]
1485 if (MCID.mayStore())
1488 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1489 MachineOperand &MO = I->getOperand(j);
1492 unsigned Reg = MO.getReg();
1493 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1495 if (Reg != Base && !MemRegs.count(Reg))
1496 AddedRegPressure.insert(Reg);
1500 // Estimate register pressure increase due to the transformation.
1501 if (MemRegs.size() <= 4)
1502 // Ok if we are moving small number of instructions.
1504 return AddedRegPressure.size() <= MemRegs.size() * 2;
1508 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1510 unsigned &NewOpc, unsigned &EvenReg,
1511 unsigned &OddReg, unsigned &BaseReg,
1512 int &Offset, unsigned &PredReg,
1513 ARMCC::CondCodes &Pred,
1515 // Make sure we're allowed to generate LDRD/STRD.
1516 if (!STI->hasV5TEOps())
1519 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1521 unsigned Opcode = Op0->getOpcode();
1522 if (Opcode == ARM::LDRi12)
1524 else if (Opcode == ARM::STRi12)
1526 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1527 NewOpc = ARM::t2LDRDi8;
1530 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1531 NewOpc = ARM::t2STRDi8;
1537 // Make sure the base address satisfies i64 ld / st alignment requirement.
1538 if (!Op0->hasOneMemOperand() ||
1539 !(*Op0->memoperands_begin())->getValue() ||
1540 (*Op0->memoperands_begin())->isVolatile())
1543 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1544 const Function *Func = MF->getFunction();
1545 unsigned ReqAlign = STI->hasV6Ops()
1546 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1547 : 8; // Pre-v6 need 8-byte align
1548 if (Align < ReqAlign)
1551 // Then make sure the immediate offset fits.
1552 int OffImm = getMemoryOpOffset(Op0);
1554 int Limit = (1 << 8) * Scale;
1555 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1559 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1561 AddSub = ARM_AM::sub;
1564 int Limit = (1 << 8) * Scale;
1565 if (OffImm >= Limit || (OffImm & (Scale-1)))
1567 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1569 EvenReg = Op0->getOperand(0).getReg();
1570 OddReg = Op1->getOperand(0).getReg();
1571 if (EvenReg == OddReg)
1573 BaseReg = Op0->getOperand(1).getReg();
1574 Pred = llvm::getInstrPredicate(Op0, PredReg);
1575 dl = Op0->getDebugLoc();
1580 struct OffsetCompare {
1581 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1582 int LOffset = getMemoryOpOffset(LHS);
1583 int ROffset = getMemoryOpOffset(RHS);
1584 assert(LHS == RHS || LOffset != ROffset);
1585 return LOffset > ROffset;
1590 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1591 SmallVector<MachineInstr*, 4> &Ops,
1592 unsigned Base, bool isLd,
1593 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1594 bool RetVal = false;
1596 // Sort by offset (in reverse order).
1597 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1599 // The loads / stores of the same base are in order. Scan them from first to
1600 // last and check for the following:
1601 // 1. Any def of base.
1603 while (Ops.size() > 1) {
1604 unsigned FirstLoc = ~0U;
1605 unsigned LastLoc = 0;
1606 MachineInstr *FirstOp = 0;
1607 MachineInstr *LastOp = 0;
1609 unsigned LastOpcode = 0;
1610 unsigned LastBytes = 0;
1611 unsigned NumMove = 0;
1612 for (int i = Ops.size() - 1; i >= 0; --i) {
1613 MachineInstr *Op = Ops[i];
1614 unsigned Loc = MI2LocMap[Op];
1615 if (Loc <= FirstLoc) {
1619 if (Loc >= LastLoc) {
1624 unsigned Opcode = Op->getOpcode();
1625 if (LastOpcode && Opcode != LastOpcode)
1628 int Offset = getMemoryOpOffset(Op);
1629 unsigned Bytes = getLSMultipleTransferSize(Op);
1631 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1634 LastOffset = Offset;
1636 LastOpcode = Opcode;
1637 if (++NumMove == 8) // FIXME: Tune this limit.
1644 SmallPtrSet<MachineInstr*, 4> MemOps;
1645 SmallSet<unsigned, 4> MemRegs;
1646 for (int i = NumMove-1; i >= 0; --i) {
1647 MemOps.insert(Ops[i]);
1648 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1651 // Be conservative, if the instructions are too far apart, don't
1652 // move them. We want to limit the increase of register pressure.
1653 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1655 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1656 MemOps, MemRegs, TRI);
1658 for (unsigned i = 0; i != NumMove; ++i)
1661 // This is the new location for the loads / stores.
1662 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1663 while (InsertPos != MBB->end()
1664 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1667 // If we are moving a pair of loads / stores, see if it makes sense
1668 // to try to allocate a pair of registers that can form register pairs.
1669 MachineInstr *Op0 = Ops.back();
1670 MachineInstr *Op1 = Ops[Ops.size()-2];
1671 unsigned EvenReg = 0, OddReg = 0;
1672 unsigned BaseReg = 0, PredReg = 0;
1673 ARMCC::CondCodes Pred = ARMCC::AL;
1675 unsigned NewOpc = 0;
1678 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1679 EvenReg, OddReg, BaseReg,
1680 Offset, PredReg, Pred, isT2)) {
1684 const MCInstrDesc &MCID = TII->get(NewOpc);
1685 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
1686 MRI->constrainRegClass(EvenReg, TRC);
1687 MRI->constrainRegClass(OddReg, TRC);
1689 // Form the pair instruction.
1691 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1692 .addReg(EvenReg, RegState::Define)
1693 .addReg(OddReg, RegState::Define)
1695 // FIXME: We're converting from LDRi12 to an insn that still
1696 // uses addrmode2, so we need an explicit offset reg. It should
1697 // always by reg0 since we're transforming LDRi12s.
1700 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1703 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1707 // FIXME: We're converting from LDRi12 to an insn that still
1708 // uses addrmode2, so we need an explicit offset reg. It should
1709 // always by reg0 since we're transforming STRi12s.
1712 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1718 // Add register allocation hints to form register pairs.
1719 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1720 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1722 for (unsigned i = 0; i != NumMove; ++i) {
1723 MachineInstr *Op = Ops.back();
1725 MBB->splice(InsertPos, MBB, Op);
1729 NumLdStMoved += NumMove;
1739 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1740 bool RetVal = false;
1742 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1743 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1744 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1745 SmallVector<unsigned, 4> LdBases;
1746 SmallVector<unsigned, 4> StBases;
1749 MachineBasicBlock::iterator MBBI = MBB->begin();
1750 MachineBasicBlock::iterator E = MBB->end();
1752 for (; MBBI != E; ++MBBI) {
1753 MachineInstr *MI = MBBI;
1754 const MCInstrDesc &MCID = MI->getDesc();
1755 if (MCID.isCall() || MCID.isTerminator()) {
1756 // Stop at barriers.
1761 if (!MI->isDebugValue())
1762 MI2LocMap[MI] = ++Loc;
1764 if (!isMemoryOp(MI))
1766 unsigned PredReg = 0;
1767 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1770 int Opc = MI->getOpcode();
1771 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1772 unsigned Base = MI->getOperand(1).getReg();
1773 int Offset = getMemoryOpOffset(MI);
1775 bool StopHere = false;
1777 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1778 Base2LdsMap.find(Base);
1779 if (BI != Base2LdsMap.end()) {
1780 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1781 if (Offset == getMemoryOpOffset(BI->second[i])) {
1787 BI->second.push_back(MI);
1789 SmallVector<MachineInstr*, 4> MIs;
1791 Base2LdsMap[Base] = MIs;
1792 LdBases.push_back(Base);
1795 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1796 Base2StsMap.find(Base);
1797 if (BI != Base2StsMap.end()) {
1798 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1799 if (Offset == getMemoryOpOffset(BI->second[i])) {
1805 BI->second.push_back(MI);
1807 SmallVector<MachineInstr*, 4> MIs;
1809 Base2StsMap[Base] = MIs;
1810 StBases.push_back(Base);
1815 // Found a duplicate (a base+offset combination that's seen earlier).
1822 // Re-schedule loads.
1823 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1824 unsigned Base = LdBases[i];
1825 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1827 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1830 // Re-schedule stores.
1831 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1832 unsigned Base = StBases[i];
1833 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1835 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1839 Base2LdsMap.clear();
1840 Base2StsMap.clear();
1850 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1851 /// optimization pass.
1852 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1854 return new ARMPreAllocLoadStoreOpt();
1855 return new ARMLoadStoreOpt();