1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMRegisterInfo.h"
20 #include "MCTargetDesc/ARMAddressingModes.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/CodeGen/SelectionDAGNodes.h"
30 #include "llvm/Target/TargetData.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 #include "llvm/Target/TargetMachine.h"
33 #include "llvm/Target/TargetRegisterInfo.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/ADT/DenseMap.h"
37 #include "llvm/ADT/STLExtras.h"
38 #include "llvm/ADT/SmallPtrSet.h"
39 #include "llvm/ADT/SmallSet.h"
40 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/ADT/Statistic.h"
44 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
45 STATISTIC(NumSTMGened , "Number of stm instructions generated");
46 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
47 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
48 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
49 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
50 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
51 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
52 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
53 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
54 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
56 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
57 /// load / store instructions to form ldm / stm instructions.
60 struct ARMLoadStoreOpt : public MachineFunctionPass {
62 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
64 const TargetInstrInfo *TII;
65 const TargetRegisterInfo *TRI;
66 const ARMSubtarget *STI;
71 virtual bool runOnMachineFunction(MachineFunction &Fn);
73 virtual const char *getPassName() const {
74 return "ARM load / store optimization pass";
78 struct MemOpQueueEntry {
83 MachineBasicBlock::iterator MBBI;
85 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
86 MachineBasicBlock::iterator i)
87 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
89 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
90 typedef MemOpQueue::iterator MemOpQueueIter;
92 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
93 int Offset, unsigned Base, bool BaseKill, int Opcode,
94 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
95 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
96 void MergeOpsUpdate(MachineBasicBlock &MBB,
100 unsigned insertAfter,
105 ARMCC::CondCodes Pred,
109 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
110 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
111 int Opcode, unsigned Size,
112 ARMCC::CondCodes Pred, unsigned PredReg,
113 unsigned Scratch, MemOpQueue &MemOps,
114 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
116 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
117 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
118 MachineBasicBlock::iterator &MBBI);
119 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
120 MachineBasicBlock::iterator MBBI,
121 const TargetInstrInfo *TII,
123 MachineBasicBlock::iterator &I);
124 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
125 MachineBasicBlock::iterator MBBI,
127 MachineBasicBlock::iterator &I);
128 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
129 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
131 char ARMLoadStoreOpt::ID = 0;
134 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
136 default: llvm_unreachable("Unhandled opcode!");
140 default: llvm_unreachable("Unhandled submode!");
141 case ARM_AM::ia: return ARM::LDMIA;
142 case ARM_AM::da: return ARM::LDMDA;
143 case ARM_AM::db: return ARM::LDMDB;
144 case ARM_AM::ib: return ARM::LDMIB;
150 default: llvm_unreachable("Unhandled submode!");
151 case ARM_AM::ia: return ARM::STMIA;
152 case ARM_AM::da: return ARM::STMDA;
153 case ARM_AM::db: return ARM::STMDB;
154 case ARM_AM::ib: return ARM::STMIB;
161 default: llvm_unreachable("Unhandled submode!");
162 case ARM_AM::ia: return ARM::t2LDMIA;
163 case ARM_AM::db: return ARM::t2LDMDB;
170 default: llvm_unreachable("Unhandled submode!");
171 case ARM_AM::ia: return ARM::t2STMIA;
172 case ARM_AM::db: return ARM::t2STMDB;
178 default: llvm_unreachable("Unhandled submode!");
179 case ARM_AM::ia: return ARM::VLDMSIA;
180 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
186 default: llvm_unreachable("Unhandled submode!");
187 case ARM_AM::ia: return ARM::VSTMSIA;
188 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
194 default: llvm_unreachable("Unhandled submode!");
195 case ARM_AM::ia: return ARM::VLDMDIA;
196 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
202 default: llvm_unreachable("Unhandled submode!");
203 case ARM_AM::ia: return ARM::VSTMDIA;
204 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
215 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
217 default: llvm_unreachable("Unhandled opcode!");
223 case ARM::t2LDMIA_RET:
225 case ARM::t2LDMIA_UPD:
227 case ARM::t2STMIA_UPD:
229 case ARM::VLDMSIA_UPD:
231 case ARM::VSTMSIA_UPD:
233 case ARM::VLDMDIA_UPD:
235 case ARM::VSTMDIA_UPD:
249 case ARM::t2LDMDB_UPD:
251 case ARM::t2STMDB_UPD:
252 case ARM::VLDMSDB_UPD:
253 case ARM::VSTMSDB_UPD:
254 case ARM::VLDMDDB_UPD:
255 case ARM::VSTMDDB_UPD:
265 return ARM_AM::bad_am_submode;
268 } // end namespace ARM_AM
269 } // end namespace llvm
271 static bool isT2i32Load(unsigned Opc) {
272 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
275 static bool isi32Load(unsigned Opc) {
276 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
279 static bool isT2i32Store(unsigned Opc) {
280 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
283 static bool isi32Store(unsigned Opc) {
284 return Opc == ARM::STRi12 || isT2i32Store(Opc);
287 /// MergeOps - Create and insert a LDM or STM with Base as base register and
288 /// registers in Regs as the register operands that would be loaded / stored.
289 /// It returns true if the transformation is done.
291 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
292 MachineBasicBlock::iterator MBBI,
293 int Offset, unsigned Base, bool BaseKill,
294 int Opcode, ARMCC::CondCodes Pred,
295 unsigned PredReg, unsigned Scratch, DebugLoc dl,
296 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
297 // Only a single register to load / store. Don't bother.
298 unsigned NumRegs = Regs.size();
302 ARM_AM::AMSubMode Mode = ARM_AM::ia;
303 // VFP and Thumb2 do not support IB or DA modes.
304 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
305 bool haveIBAndDA = isNotVFP && !isThumb2;
306 if (Offset == 4 && haveIBAndDA)
308 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
310 else if (Offset == -4 * (int)NumRegs && isNotVFP)
311 // VLDM/VSTM do not support DB mode without also updating the base reg.
313 else if (Offset != 0) {
314 // Check if this is a supported opcode before we insert instructions to
315 // calculate a new base register.
316 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
318 // If starting offset isn't zero, insert a MI to materialize a new base.
319 // But only do so if it is cost effective, i.e. merging more than two
325 if (isi32Load(Opcode))
326 // If it is a load, then just use one of the destination register to
327 // use as the new base.
328 NewBase = Regs[NumRegs-1].first;
330 // Use the scratch register to use as a new base.
335 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
337 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
340 int ImmedOffset = isThumb2
341 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
342 if (ImmedOffset == -1)
343 // FIXME: Try t2ADDri12 or t2SUBri12?
344 return false; // Probably not worth it then.
346 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
347 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
348 .addImm(Pred).addReg(PredReg).addReg(0);
350 BaseKill = true; // New base is always killed right its use.
353 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
354 Opcode == ARM::VLDRD);
355 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
356 if (!Opcode) return false;
357 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
358 .addReg(Base, getKillRegState(BaseKill))
359 .addImm(Pred).addReg(PredReg);
360 for (unsigned i = 0; i != NumRegs; ++i)
361 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
362 | getKillRegState(Regs[i].second));
367 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
369 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
371 unsigned memOpsBegin, unsigned memOpsEnd,
372 unsigned insertAfter, int Offset,
373 unsigned Base, bool BaseKill,
375 ARMCC::CondCodes Pred, unsigned PredReg,
378 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
379 // First calculate which of the registers should be killed by the merged
381 const unsigned insertPos = memOps[insertAfter].Position;
382 SmallSet<unsigned, 4> KilledRegs;
383 DenseMap<unsigned, unsigned> Killer;
384 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
385 if (i == memOpsBegin) {
390 if (memOps[i].Position < insertPos && memOps[i].isKill) {
391 unsigned Reg = memOps[i].Reg;
392 KilledRegs.insert(Reg);
397 SmallVector<std::pair<unsigned, bool>, 8> Regs;
398 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
399 unsigned Reg = memOps[i].Reg;
400 // If we are inserting the merged operation after an operation that
401 // uses the same register, make sure to transfer any kill flag.
402 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
403 Regs.push_back(std::make_pair(Reg, isKill));
406 // Try to do the merge.
407 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
409 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
410 Pred, PredReg, Scratch, dl, Regs))
413 // Merge succeeded, update records.
414 Merges.push_back(prior(Loc));
415 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
416 // Remove kill flags from any memops that come before insertPos.
417 if (Regs[i-memOpsBegin].second) {
418 unsigned Reg = Regs[i-memOpsBegin].first;
419 if (KilledRegs.count(Reg)) {
420 unsigned j = Killer[Reg];
421 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
422 assert(Idx >= 0 && "Cannot find killing operand");
423 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
424 memOps[j].isKill = false;
426 memOps[i].isKill = true;
428 MBB.erase(memOps[i].MBBI);
429 // Update this memop to refer to the merged instruction.
430 // We may need to move kill flags again.
431 memOps[i].Merged = true;
432 memOps[i].MBBI = Merges.back();
433 memOps[i].Position = insertPos;
437 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
438 /// load / store multiple instructions.
440 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
441 unsigned Base, int Opcode, unsigned Size,
442 ARMCC::CondCodes Pred, unsigned PredReg,
443 unsigned Scratch, MemOpQueue &MemOps,
444 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
445 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
446 int Offset = MemOps[SIndex].Offset;
447 int SOffset = Offset;
448 unsigned insertAfter = SIndex;
449 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
450 DebugLoc dl = Loc->getDebugLoc();
451 const MachineOperand &PMO = Loc->getOperand(0);
452 unsigned PReg = PMO.getReg();
453 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
454 : getARMRegisterNumbering(PReg);
456 unsigned Limit = ~0U;
458 // vldm / vstm limit are 32 for S variants, 16 for D variants.
476 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
477 int NewOffset = MemOps[i].Offset;
478 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
479 unsigned Reg = MO.getReg();
480 unsigned RegNum = MO.isUndef() ? UINT_MAX
481 : getARMRegisterNumbering(Reg);
482 // Register numbers must be in ascending order. For VFP / NEON load and
483 // store multiples, the registers must also be consecutive and within the
484 // limit on the number of registers per instruction.
485 if (Reg != ARM::SP &&
486 NewOffset == Offset + (int)Size &&
487 ((isNotVFP && RegNum > PRegNum) ||
488 ((Count < Limit) && RegNum == PRegNum+1))) {
493 // Can't merge this in. Try merge the earlier ones first.
494 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
495 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
496 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
501 if (MemOps[i].Position > MemOps[insertAfter].Position)
505 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
506 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
507 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
511 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
512 unsigned Bytes, unsigned Limit,
513 ARMCC::CondCodes Pred, unsigned PredReg){
514 unsigned MyPredReg = 0;
517 if (MI->getOpcode() != ARM::t2SUBri &&
518 MI->getOpcode() != ARM::tSUBspi &&
519 MI->getOpcode() != ARM::SUBri)
522 // Make sure the offset fits in 8 bits.
523 if (Bytes == 0 || (Limit && Bytes >= Limit))
526 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
527 return (MI->getOperand(0).getReg() == Base &&
528 MI->getOperand(1).getReg() == Base &&
529 (MI->getOperand(2).getImm()*Scale) == Bytes &&
530 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
531 MyPredReg == PredReg);
534 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
535 unsigned Bytes, unsigned Limit,
536 ARMCC::CondCodes Pred, unsigned PredReg){
537 unsigned MyPredReg = 0;
540 if (MI->getOpcode() != ARM::t2ADDri &&
541 MI->getOpcode() != ARM::tADDspi &&
542 MI->getOpcode() != ARM::ADDri)
545 if (Bytes == 0 || (Limit && Bytes >= Limit))
546 // Make sure the offset fits in 8 bits.
549 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
550 return (MI->getOperand(0).getReg() == Base &&
551 MI->getOperand(1).getReg() == Base &&
552 (MI->getOperand(2).getImm()*Scale) == Bytes &&
553 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
554 MyPredReg == PredReg);
557 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
558 switch (MI->getOpcode()) {
586 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
589 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
593 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
594 ARM_AM::AMSubMode Mode) {
596 default: llvm_unreachable("Unhandled opcode!");
602 default: llvm_unreachable("Unhandled submode!");
603 case ARM_AM::ia: return ARM::LDMIA_UPD;
604 case ARM_AM::ib: return ARM::LDMIB_UPD;
605 case ARM_AM::da: return ARM::LDMDA_UPD;
606 case ARM_AM::db: return ARM::LDMDB_UPD;
614 default: llvm_unreachable("Unhandled submode!");
615 case ARM_AM::ia: return ARM::STMIA_UPD;
616 case ARM_AM::ib: return ARM::STMIB_UPD;
617 case ARM_AM::da: return ARM::STMDA_UPD;
618 case ARM_AM::db: return ARM::STMDB_UPD;
624 default: llvm_unreachable("Unhandled submode!");
625 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
626 case ARM_AM::db: return ARM::t2LDMDB_UPD;
632 default: llvm_unreachable("Unhandled submode!");
633 case ARM_AM::ia: return ARM::t2STMIA_UPD;
634 case ARM_AM::db: return ARM::t2STMDB_UPD;
639 default: llvm_unreachable("Unhandled submode!");
640 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
641 case ARM_AM::db: return ARM::VLDMSDB_UPD;
646 default: llvm_unreachable("Unhandled submode!");
647 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
648 case ARM_AM::db: return ARM::VLDMDDB_UPD;
653 default: llvm_unreachable("Unhandled submode!");
654 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
655 case ARM_AM::db: return ARM::VSTMSDB_UPD;
660 default: llvm_unreachable("Unhandled submode!");
661 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
662 case ARM_AM::db: return ARM::VSTMDDB_UPD;
670 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
671 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
673 /// stmia rn, <ra, rb, rc>
674 /// rn := rn + 4 * 3;
676 /// stmia rn!, <ra, rb, rc>
678 /// rn := rn - 4 * 3;
679 /// ldmia rn, <ra, rb, rc>
681 /// ldmdb rn!, <ra, rb, rc>
682 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
683 MachineBasicBlock::iterator MBBI,
685 MachineBasicBlock::iterator &I) {
686 MachineInstr *MI = MBBI;
687 unsigned Base = MI->getOperand(0).getReg();
688 bool BaseKill = MI->getOperand(0).isKill();
689 unsigned Bytes = getLSMultipleTransferSize(MI);
690 unsigned PredReg = 0;
691 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
692 int Opcode = MI->getOpcode();
693 DebugLoc dl = MI->getDebugLoc();
695 // Can't use an updating ld/st if the base register is also a dest
696 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
697 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
698 if (MI->getOperand(i).getReg() == Base)
701 bool DoMerge = false;
702 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
704 // Try merging with the previous instruction.
705 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
706 if (MBBI != BeginMBBI) {
707 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
708 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
710 if (Mode == ARM_AM::ia &&
711 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
714 } else if (Mode == ARM_AM::ib &&
715 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
723 // Try merging with the next instruction.
724 MachineBasicBlock::iterator EndMBBI = MBB.end();
725 if (!DoMerge && MBBI != EndMBBI) {
726 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
727 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
729 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
730 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
732 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
733 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
748 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
749 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
750 .addReg(Base, getDefRegState(true)) // WB base register
751 .addReg(Base, getKillRegState(BaseKill))
752 .addImm(Pred).addReg(PredReg);
754 // Transfer the rest of operands.
755 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
756 MIB.addOperand(MI->getOperand(OpNum));
758 // Transfer memoperands.
759 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
765 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
766 ARM_AM::AddrOpc Mode) {
769 return ARM::LDR_PRE_IMM;
771 return ARM::STR_PRE_IMM;
773 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
775 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
777 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
779 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
782 return ARM::t2LDR_PRE;
785 return ARM::t2STR_PRE;
786 default: llvm_unreachable("Unhandled opcode!");
791 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
792 ARM_AM::AddrOpc Mode) {
795 return ARM::LDR_POST_IMM;
797 return ARM::STR_POST_IMM;
799 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
801 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
803 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
805 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
808 return ARM::t2LDR_POST;
811 return ARM::t2STR_POST;
812 default: llvm_unreachable("Unhandled opcode!");
817 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
818 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
819 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
820 MachineBasicBlock::iterator MBBI,
821 const TargetInstrInfo *TII,
823 MachineBasicBlock::iterator &I) {
824 MachineInstr *MI = MBBI;
825 unsigned Base = MI->getOperand(1).getReg();
826 bool BaseKill = MI->getOperand(1).isKill();
827 unsigned Bytes = getLSMultipleTransferSize(MI);
828 int Opcode = MI->getOpcode();
829 DebugLoc dl = MI->getDebugLoc();
830 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
831 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
832 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
833 if (isi32Load(Opcode) || isi32Store(Opcode))
834 if (MI->getOperand(2).getImm() != 0)
836 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
839 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
840 // Can't do the merge if the destination register is the same as the would-be
841 // writeback register.
842 if (isLd && MI->getOperand(0).getReg() == Base)
845 unsigned PredReg = 0;
846 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
847 bool DoMerge = false;
848 ARM_AM::AddrOpc AddSub = ARM_AM::add;
850 // AM2 - 12 bits, thumb2 - 8 bits.
851 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
853 // Try merging with the previous instruction.
854 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
855 if (MBBI != BeginMBBI) {
856 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
857 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
859 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
861 AddSub = ARM_AM::sub;
863 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
867 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
872 // Try merging with the next instruction.
873 MachineBasicBlock::iterator EndMBBI = MBB.end();
874 if (!DoMerge && MBBI != EndMBBI) {
875 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
876 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
879 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
881 AddSub = ARM_AM::sub;
882 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
886 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
899 // VLDM[SD}_UPD, VSTM[SD]_UPD
900 // (There are no base-updating versions of VLDR/VSTR instructions, but the
901 // updating load/store-multiple instructions can be used with only one
903 MachineOperand &MO = MI->getOperand(0);
904 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
905 .addReg(Base, getDefRegState(true)) // WB base register
906 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
907 .addImm(Pred).addReg(PredReg)
908 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
909 getKillRegState(MO.isKill())));
913 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
914 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
915 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
916 .addReg(Base, RegState::Define)
917 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
919 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
920 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
921 .addReg(Base, RegState::Define)
922 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
925 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
926 // t2LDR_PRE, t2LDR_POST
927 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
928 .addReg(Base, RegState::Define)
929 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
932 MachineOperand &MO = MI->getOperand(0);
933 // FIXME: post-indexed stores use am2offset_imm, which still encodes
934 // the vestigal zero-reg offset register. When that's fixed, this clause
935 // can be removed entirely.
936 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
937 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
939 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
940 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
941 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
943 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
944 // t2STR_PRE, t2STR_POST
945 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
946 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
947 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
955 /// isMemoryOp - Returns true if instruction is a memory operation that this
956 /// pass is capable of operating on.
957 static bool isMemoryOp(const MachineInstr *MI) {
958 // When no memory operands are present, conservatively assume unaligned,
959 // volatile, unfoldable.
960 if (!MI->hasOneMemOperand())
963 const MachineMemOperand *MMO = *MI->memoperands_begin();
965 // Don't touch volatile memory accesses - we may be changing their order.
966 if (MMO->isVolatile())
969 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
971 if (MMO->getAlignment() < 4)
974 // str <undef> could probably be eliminated entirely, but for now we just want
975 // to avoid making a mess of it.
976 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
977 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
978 MI->getOperand(0).isUndef())
981 // Likewise don't mess with references to undefined addresses.
982 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
983 MI->getOperand(1).isUndef())
986 int Opcode = MI->getOpcode();
991 return MI->getOperand(1).isReg();
994 return MI->getOperand(1).isReg();
1001 return MI->getOperand(1).isReg();
1006 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1007 /// op that is being merged.
1008 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1009 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1010 unsigned Position = MemOps[0].Position;
1011 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1012 if (MemOps[i].Position < Position) {
1013 Position = MemOps[i].Position;
1014 Loc = MemOps[i].MBBI;
1018 if (Loc != MBB.begin())
1019 RS->forward(prior(Loc));
1022 static int getMemoryOpOffset(const MachineInstr *MI) {
1023 int Opcode = MI->getOpcode();
1024 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1025 unsigned NumOperands = MI->getDesc().getNumOperands();
1026 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1028 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1029 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1030 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1031 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1034 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1035 : ARM_AM::getAM5Offset(OffField) * 4;
1037 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1040 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1046 static void InsertLDR_STR(MachineBasicBlock &MBB,
1047 MachineBasicBlock::iterator &MBBI,
1048 int Offset, bool isDef,
1049 DebugLoc dl, unsigned NewOpc,
1050 unsigned Reg, bool RegDeadKill, bool RegUndef,
1051 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1052 bool OffKill, bool OffUndef,
1053 ARMCC::CondCodes Pred, unsigned PredReg,
1054 const TargetInstrInfo *TII, bool isT2) {
1056 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1058 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1059 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1060 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1062 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1064 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1065 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1066 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1070 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1071 MachineBasicBlock::iterator &MBBI) {
1072 MachineInstr *MI = &*MBBI;
1073 unsigned Opcode = MI->getOpcode();
1074 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1075 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1076 const MachineOperand &BaseOp = MI->getOperand(2);
1077 unsigned BaseReg = BaseOp.getReg();
1078 unsigned EvenReg = MI->getOperand(0).getReg();
1079 unsigned OddReg = MI->getOperand(1).getReg();
1080 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1081 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1082 // ARM errata 602117: LDRD with base in list may result in incorrect base
1083 // register when interrupted or faulted.
1084 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1085 if (!Errata602117 &&
1086 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1089 MachineBasicBlock::iterator NewBBI = MBBI;
1090 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1091 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1092 bool EvenDeadKill = isLd ?
1093 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1094 bool EvenUndef = MI->getOperand(0).isUndef();
1095 bool OddDeadKill = isLd ?
1096 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1097 bool OddUndef = MI->getOperand(1).isUndef();
1098 bool BaseKill = BaseOp.isKill();
1099 bool BaseUndef = BaseOp.isUndef();
1100 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1101 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1102 int OffImm = getMemoryOpOffset(MI);
1103 unsigned PredReg = 0;
1104 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1106 if (OddRegNum > EvenRegNum && OffImm == 0) {
1107 // Ascending register numbers and no offset. It's safe to change it to a
1109 unsigned NewOpc = (isLd)
1110 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1111 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1113 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1114 .addReg(BaseReg, getKillRegState(BaseKill))
1115 .addImm(Pred).addReg(PredReg)
1116 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1117 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1120 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1121 .addReg(BaseReg, getKillRegState(BaseKill))
1122 .addImm(Pred).addReg(PredReg)
1124 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1126 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1129 NewBBI = llvm::prior(MBBI);
1131 // Split into two instructions.
1132 unsigned NewOpc = (isLd)
1133 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1134 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1135 DebugLoc dl = MBBI->getDebugLoc();
1136 // If this is a load and base register is killed, it may have been
1137 // re-defed by the load, make sure the first load does not clobber it.
1139 (BaseKill || OffKill) &&
1140 (TRI->regsOverlap(EvenReg, BaseReg))) {
1141 assert(!TRI->regsOverlap(OddReg, BaseReg));
1142 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1143 OddReg, OddDeadKill, false,
1144 BaseReg, false, BaseUndef, false, OffUndef,
1145 Pred, PredReg, TII, isT2);
1146 NewBBI = llvm::prior(MBBI);
1147 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1148 EvenReg, EvenDeadKill, false,
1149 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1150 Pred, PredReg, TII, isT2);
1152 if (OddReg == EvenReg && EvenDeadKill) {
1153 // If the two source operands are the same, the kill marker is
1154 // probably on the first one. e.g.
1155 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1156 EvenDeadKill = false;
1159 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1160 EvenReg, EvenDeadKill, EvenUndef,
1161 BaseReg, false, BaseUndef, false, OffUndef,
1162 Pred, PredReg, TII, isT2);
1163 NewBBI = llvm::prior(MBBI);
1164 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1165 OddReg, OddDeadKill, OddUndef,
1166 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1167 Pred, PredReg, TII, isT2);
1182 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1183 /// ops of the same base and incrementing offset into LDM / STM ops.
1184 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1185 unsigned NumMerges = 0;
1186 unsigned NumMemOps = 0;
1188 unsigned CurrBase = 0;
1190 unsigned CurrSize = 0;
1191 ARMCC::CondCodes CurrPred = ARMCC::AL;
1192 unsigned CurrPredReg = 0;
1193 unsigned Position = 0;
1194 SmallVector<MachineBasicBlock::iterator,4> Merges;
1196 RS->enterBasicBlock(&MBB);
1197 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1199 if (FixInvalidRegPairOp(MBB, MBBI))
1202 bool Advance = false;
1203 bool TryMerge = false;
1204 bool Clobber = false;
1206 bool isMemOp = isMemoryOp(MBBI);
1208 int Opcode = MBBI->getOpcode();
1209 unsigned Size = getLSMultipleTransferSize(MBBI);
1210 const MachineOperand &MO = MBBI->getOperand(0);
1211 unsigned Reg = MO.getReg();
1212 bool isKill = MO.isDef() ? false : MO.isKill();
1213 unsigned Base = MBBI->getOperand(1).getReg();
1214 unsigned PredReg = 0;
1215 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1216 int Offset = getMemoryOpOffset(MBBI);
1219 // r5 := ldr [r5, #4]
1220 // r6 := ldr [r5, #8]
1222 // The second ldr has effectively broken the chain even though it
1223 // looks like the later ldr(s) use the same base register. Try to
1224 // merge the ldr's so far, including this one. But don't try to
1225 // combine the following ldr(s).
1226 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1227 if (CurrBase == 0 && !Clobber) {
1228 // Start of a new chain.
1233 CurrPredReg = PredReg;
1234 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1243 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1244 // No need to match PredReg.
1245 // Continue adding to the queue.
1246 if (Offset > MemOps.back().Offset) {
1247 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1252 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1254 if (Offset < I->Offset) {
1255 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1260 } else if (Offset == I->Offset) {
1261 // Collision! This can't be merged!
1270 if (MBBI->isDebugValue()) {
1273 // Reach the end of the block, try merging the memory instructions.
1275 } else if (Advance) {
1279 // Reach the end of the block, try merging the memory instructions.
1285 if (NumMemOps > 1) {
1286 // Try to find a free register to use as a new base in case it's needed.
1287 // First advance to the instruction just before the start of the chain.
1288 AdvanceRS(MBB, MemOps);
1289 // Find a scratch register.
1290 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1291 // Process the load / store instructions.
1292 RS->forward(prior(MBBI));
1296 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1297 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1299 // Try folding preceding/trailing base inc/dec into the generated
1301 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1302 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1304 NumMerges += Merges.size();
1306 // Try folding preceding/trailing base inc/dec into those load/store
1307 // that were not merged to form LDM/STM ops.
1308 for (unsigned i = 0; i != NumMemOps; ++i)
1309 if (!MemOps[i].Merged)
1310 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1313 // RS may be pointing to an instruction that's deleted.
1314 RS->skipTo(prior(MBBI));
1315 } else if (NumMemOps == 1) {
1316 // Try folding preceding/trailing base inc/dec into the single
1318 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1320 RS->forward(prior(MBBI));
1327 CurrPred = ARMCC::AL;
1334 // If iterator hasn't been advanced and this is not a memory op, skip it.
1335 // It can't start a new chain anyway.
1336 if (!Advance && !isMemOp && MBBI != E) {
1342 return NumMerges > 0;
1345 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1346 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1347 /// directly restore the value of LR into pc.
1348 /// ldmfd sp!, {..., lr}
1351 /// ldmfd sp!, {..., lr}
1354 /// ldmfd sp!, {..., pc}
1355 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1356 if (MBB.empty()) return false;
1358 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1359 if (MBBI != MBB.begin() &&
1360 (MBBI->getOpcode() == ARM::BX_RET ||
1361 MBBI->getOpcode() == ARM::tBX_RET ||
1362 MBBI->getOpcode() == ARM::MOVPCLR)) {
1363 MachineInstr *PrevMI = prior(MBBI);
1364 unsigned Opcode = PrevMI->getOpcode();
1365 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1366 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1367 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1368 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1369 if (MO.getReg() != ARM::LR)
1371 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1372 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1373 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1374 PrevMI->setDesc(TII->get(NewOpc));
1376 PrevMI->copyImplicitOps(&*MBBI);
1384 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1385 const TargetMachine &TM = Fn.getTarget();
1386 AFI = Fn.getInfo<ARMFunctionInfo>();
1387 TII = TM.getInstrInfo();
1388 TRI = TM.getRegisterInfo();
1389 STI = &TM.getSubtarget<ARMSubtarget>();
1390 RS = new RegScavenger();
1391 isThumb2 = AFI->isThumb2Function();
1393 bool Modified = false;
1394 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1396 MachineBasicBlock &MBB = *MFI;
1397 Modified |= LoadStoreMultipleOpti(MBB);
1398 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1399 Modified |= MergeReturnIntoLDM(MBB);
1407 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1408 /// load / stores from consecutive locations close to make it more
1409 /// likely they will be combined later.
1412 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1414 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1416 const TargetData *TD;
1417 const TargetInstrInfo *TII;
1418 const TargetRegisterInfo *TRI;
1419 const ARMSubtarget *STI;
1420 MachineRegisterInfo *MRI;
1421 MachineFunction *MF;
1423 virtual bool runOnMachineFunction(MachineFunction &Fn);
1425 virtual const char *getPassName() const {
1426 return "ARM pre- register allocation load / store optimization pass";
1430 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1431 unsigned &NewOpc, unsigned &EvenReg,
1432 unsigned &OddReg, unsigned &BaseReg,
1434 unsigned &PredReg, ARMCC::CondCodes &Pred,
1436 bool RescheduleOps(MachineBasicBlock *MBB,
1437 SmallVector<MachineInstr*, 4> &Ops,
1438 unsigned Base, bool isLd,
1439 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1440 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1442 char ARMPreAllocLoadStoreOpt::ID = 0;
1445 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1446 TD = Fn.getTarget().getTargetData();
1447 TII = Fn.getTarget().getInstrInfo();
1448 TRI = Fn.getTarget().getRegisterInfo();
1449 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1450 MRI = &Fn.getRegInfo();
1453 bool Modified = false;
1454 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1456 Modified |= RescheduleLoadStoreInstrs(MFI);
1461 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1462 MachineBasicBlock::iterator I,
1463 MachineBasicBlock::iterator E,
1464 SmallPtrSet<MachineInstr*, 4> &MemOps,
1465 SmallSet<unsigned, 4> &MemRegs,
1466 const TargetRegisterInfo *TRI) {
1467 // Are there stores / loads / calls between them?
1468 // FIXME: This is overly conservative. We should make use of alias information
1470 SmallSet<unsigned, 4> AddedRegPressure;
1472 if (I->isDebugValue() || MemOps.count(&*I))
1474 const MCInstrDesc &MCID = I->getDesc();
1475 if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
1477 if (isLd && MCID.mayStore())
1482 // It's not safe to move the first 'str' down.
1485 // str r4, [r0, #+4]
1486 if (MCID.mayStore())
1489 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1490 MachineOperand &MO = I->getOperand(j);
1493 unsigned Reg = MO.getReg();
1494 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1496 if (Reg != Base && !MemRegs.count(Reg))
1497 AddedRegPressure.insert(Reg);
1501 // Estimate register pressure increase due to the transformation.
1502 if (MemRegs.size() <= 4)
1503 // Ok if we are moving small number of instructions.
1505 return AddedRegPressure.size() <= MemRegs.size() * 2;
1509 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1510 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1511 MachineInstr *Op1) {
1512 assert(MI->memoperands_empty() && "expected a new machineinstr");
1513 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1514 + (Op1->memoperands_end() - Op1->memoperands_begin());
1516 MachineFunction *MF = MI->getParent()->getParent();
1517 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1518 MachineSDNode::mmo_iterator MemEnd =
1519 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1521 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1522 MI->setMemRefs(MemBegin, MemEnd);
1526 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1528 unsigned &NewOpc, unsigned &EvenReg,
1529 unsigned &OddReg, unsigned &BaseReg,
1530 int &Offset, unsigned &PredReg,
1531 ARMCC::CondCodes &Pred,
1533 // Make sure we're allowed to generate LDRD/STRD.
1534 if (!STI->hasV5TEOps())
1537 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1539 unsigned Opcode = Op0->getOpcode();
1540 if (Opcode == ARM::LDRi12)
1542 else if (Opcode == ARM::STRi12)
1544 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1545 NewOpc = ARM::t2LDRDi8;
1548 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1549 NewOpc = ARM::t2STRDi8;
1555 // Make sure the base address satisfies i64 ld / st alignment requirement.
1556 if (!Op0->hasOneMemOperand() ||
1557 !(*Op0->memoperands_begin())->getValue() ||
1558 (*Op0->memoperands_begin())->isVolatile())
1561 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1562 const Function *Func = MF->getFunction();
1563 unsigned ReqAlign = STI->hasV6Ops()
1564 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1565 : 8; // Pre-v6 need 8-byte align
1566 if (Align < ReqAlign)
1569 // Then make sure the immediate offset fits.
1570 int OffImm = getMemoryOpOffset(Op0);
1572 int Limit = (1 << 8) * Scale;
1573 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1577 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1579 AddSub = ARM_AM::sub;
1582 int Limit = (1 << 8) * Scale;
1583 if (OffImm >= Limit || (OffImm & (Scale-1)))
1585 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1587 EvenReg = Op0->getOperand(0).getReg();
1588 OddReg = Op1->getOperand(0).getReg();
1589 if (EvenReg == OddReg)
1591 BaseReg = Op0->getOperand(1).getReg();
1592 Pred = llvm::getInstrPredicate(Op0, PredReg);
1593 dl = Op0->getDebugLoc();
1598 struct OffsetCompare {
1599 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1600 int LOffset = getMemoryOpOffset(LHS);
1601 int ROffset = getMemoryOpOffset(RHS);
1602 assert(LHS == RHS || LOffset != ROffset);
1603 return LOffset > ROffset;
1608 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1609 SmallVector<MachineInstr*, 4> &Ops,
1610 unsigned Base, bool isLd,
1611 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1612 bool RetVal = false;
1614 // Sort by offset (in reverse order).
1615 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1617 // The loads / stores of the same base are in order. Scan them from first to
1618 // last and check for the following:
1619 // 1. Any def of base.
1621 while (Ops.size() > 1) {
1622 unsigned FirstLoc = ~0U;
1623 unsigned LastLoc = 0;
1624 MachineInstr *FirstOp = 0;
1625 MachineInstr *LastOp = 0;
1627 unsigned LastOpcode = 0;
1628 unsigned LastBytes = 0;
1629 unsigned NumMove = 0;
1630 for (int i = Ops.size() - 1; i >= 0; --i) {
1631 MachineInstr *Op = Ops[i];
1632 unsigned Loc = MI2LocMap[Op];
1633 if (Loc <= FirstLoc) {
1637 if (Loc >= LastLoc) {
1642 unsigned Opcode = Op->getOpcode();
1643 if (LastOpcode && Opcode != LastOpcode)
1646 int Offset = getMemoryOpOffset(Op);
1647 unsigned Bytes = getLSMultipleTransferSize(Op);
1649 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1652 LastOffset = Offset;
1654 LastOpcode = Opcode;
1655 if (++NumMove == 8) // FIXME: Tune this limit.
1662 SmallPtrSet<MachineInstr*, 4> MemOps;
1663 SmallSet<unsigned, 4> MemRegs;
1664 for (int i = NumMove-1; i >= 0; --i) {
1665 MemOps.insert(Ops[i]);
1666 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1669 // Be conservative, if the instructions are too far apart, don't
1670 // move them. We want to limit the increase of register pressure.
1671 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1673 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1674 MemOps, MemRegs, TRI);
1676 for (unsigned i = 0; i != NumMove; ++i)
1679 // This is the new location for the loads / stores.
1680 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1681 while (InsertPos != MBB->end()
1682 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1685 // If we are moving a pair of loads / stores, see if it makes sense
1686 // to try to allocate a pair of registers that can form register pairs.
1687 MachineInstr *Op0 = Ops.back();
1688 MachineInstr *Op1 = Ops[Ops.size()-2];
1689 unsigned EvenReg = 0, OddReg = 0;
1690 unsigned BaseReg = 0, PredReg = 0;
1691 ARMCC::CondCodes Pred = ARMCC::AL;
1693 unsigned NewOpc = 0;
1696 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1697 EvenReg, OddReg, BaseReg,
1698 Offset, PredReg, Pred, isT2)) {
1702 const MCInstrDesc &MCID = TII->get(NewOpc);
1703 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
1704 MRI->constrainRegClass(EvenReg, TRC);
1705 MRI->constrainRegClass(OddReg, TRC);
1707 // Form the pair instruction.
1709 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1710 .addReg(EvenReg, RegState::Define)
1711 .addReg(OddReg, RegState::Define)
1713 // FIXME: We're converting from LDRi12 to an insn that still
1714 // uses addrmode2, so we need an explicit offset reg. It should
1715 // always by reg0 since we're transforming LDRi12s.
1718 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1719 concatenateMemOperands(MIB, Op0, Op1);
1720 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1723 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1727 // FIXME: We're converting from LDRi12 to an insn that still
1728 // uses addrmode2, so we need an explicit offset reg. It should
1729 // always by reg0 since we're transforming STRi12s.
1732 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1733 concatenateMemOperands(MIB, Op0, Op1);
1734 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1740 // Add register allocation hints to form register pairs.
1741 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1742 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1744 for (unsigned i = 0; i != NumMove; ++i) {
1745 MachineInstr *Op = Ops.back();
1747 MBB->splice(InsertPos, MBB, Op);
1751 NumLdStMoved += NumMove;
1761 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1762 bool RetVal = false;
1764 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1765 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1766 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1767 SmallVector<unsigned, 4> LdBases;
1768 SmallVector<unsigned, 4> StBases;
1771 MachineBasicBlock::iterator MBBI = MBB->begin();
1772 MachineBasicBlock::iterator E = MBB->end();
1774 for (; MBBI != E; ++MBBI) {
1775 MachineInstr *MI = MBBI;
1776 const MCInstrDesc &MCID = MI->getDesc();
1777 if (MCID.isCall() || MCID.isTerminator()) {
1778 // Stop at barriers.
1783 if (!MI->isDebugValue())
1784 MI2LocMap[MI] = ++Loc;
1786 if (!isMemoryOp(MI))
1788 unsigned PredReg = 0;
1789 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1792 int Opc = MI->getOpcode();
1793 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1794 unsigned Base = MI->getOperand(1).getReg();
1795 int Offset = getMemoryOpOffset(MI);
1797 bool StopHere = false;
1799 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1800 Base2LdsMap.find(Base);
1801 if (BI != Base2LdsMap.end()) {
1802 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1803 if (Offset == getMemoryOpOffset(BI->second[i])) {
1809 BI->second.push_back(MI);
1811 SmallVector<MachineInstr*, 4> MIs;
1813 Base2LdsMap[Base] = MIs;
1814 LdBases.push_back(Base);
1817 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1818 Base2StsMap.find(Base);
1819 if (BI != Base2StsMap.end()) {
1820 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1821 if (Offset == getMemoryOpOffset(BI->second[i])) {
1827 BI->second.push_back(MI);
1829 SmallVector<MachineInstr*, 4> MIs;
1831 Base2StsMap[Base] = MIs;
1832 StBases.push_back(Base);
1837 // Found a duplicate (a base+offset combination that's seen earlier).
1844 // Re-schedule loads.
1845 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1846 unsigned Base = LdBases[i];
1847 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1849 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1852 // Re-schedule stores.
1853 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1854 unsigned Base = StBases[i];
1855 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1857 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1861 Base2LdsMap.clear();
1862 Base2StsMap.clear();
1872 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1873 /// optimization pass.
1874 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1876 return new ARMPreAllocLoadStoreOpt();
1877 return new ARMLoadStoreOpt();