1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMRegisterInfo.h"
20 #include "MCTargetDesc/ARMAddressingModes.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/CodeGen/SelectionDAGNodes.h"
30 #include "llvm/Target/TargetData.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 #include "llvm/Target/TargetMachine.h"
33 #include "llvm/Target/TargetRegisterInfo.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "llvm/ADT/DenseMap.h"
38 #include "llvm/ADT/STLExtras.h"
39 #include "llvm/ADT/SmallPtrSet.h"
40 #include "llvm/ADT/SmallSet.h"
41 #include "llvm/ADT/SmallVector.h"
42 #include "llvm/ADT/Statistic.h"
45 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
46 STATISTIC(NumSTMGened , "Number of stm instructions generated");
47 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
48 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
49 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
50 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
51 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
52 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
53 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
54 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
55 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
57 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
58 /// load / store instructions to form ldm / stm instructions.
61 struct ARMLoadStoreOpt : public MachineFunctionPass {
63 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
65 const TargetInstrInfo *TII;
66 const TargetRegisterInfo *TRI;
67 const ARMSubtarget *STI;
72 virtual bool runOnMachineFunction(MachineFunction &Fn);
74 virtual const char *getPassName() const {
75 return "ARM load / store optimization pass";
79 struct MemOpQueueEntry {
84 MachineBasicBlock::iterator MBBI;
86 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
87 MachineBasicBlock::iterator i)
88 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
90 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
91 typedef MemOpQueue::iterator MemOpQueueIter;
93 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
94 int Offset, unsigned Base, bool BaseKill, int Opcode,
95 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
96 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
97 void MergeOpsUpdate(MachineBasicBlock &MBB,
101 unsigned insertAfter,
106 ARMCC::CondCodes Pred,
110 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
111 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
112 int Opcode, unsigned Size,
113 ARMCC::CondCodes Pred, unsigned PredReg,
114 unsigned Scratch, MemOpQueue &MemOps,
115 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
117 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
118 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
119 MachineBasicBlock::iterator &MBBI);
120 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
121 MachineBasicBlock::iterator MBBI,
122 const TargetInstrInfo *TII,
124 MachineBasicBlock::iterator &I);
125 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
126 MachineBasicBlock::iterator MBBI,
128 MachineBasicBlock::iterator &I);
129 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
130 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
132 char ARMLoadStoreOpt::ID = 0;
135 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
137 default: llvm_unreachable("Unhandled opcode!");
141 default: llvm_unreachable("Unhandled submode!");
142 case ARM_AM::ia: return ARM::LDMIA;
143 case ARM_AM::da: return ARM::LDMDA;
144 case ARM_AM::db: return ARM::LDMDB;
145 case ARM_AM::ib: return ARM::LDMIB;
150 default: llvm_unreachable("Unhandled submode!");
151 case ARM_AM::ia: return ARM::STMIA;
152 case ARM_AM::da: return ARM::STMDA;
153 case ARM_AM::db: return ARM::STMDB;
154 case ARM_AM::ib: return ARM::STMIB;
160 default: llvm_unreachable("Unhandled submode!");
161 case ARM_AM::ia: return ARM::t2LDMIA;
162 case ARM_AM::db: return ARM::t2LDMDB;
168 default: llvm_unreachable("Unhandled submode!");
169 case ARM_AM::ia: return ARM::t2STMIA;
170 case ARM_AM::db: return ARM::t2STMDB;
175 default: llvm_unreachable("Unhandled submode!");
176 case ARM_AM::ia: return ARM::VLDMSIA;
177 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
182 default: llvm_unreachable("Unhandled submode!");
183 case ARM_AM::ia: return ARM::VSTMSIA;
184 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
189 default: llvm_unreachable("Unhandled submode!");
190 case ARM_AM::ia: return ARM::VLDMDIA;
191 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
196 default: llvm_unreachable("Unhandled submode!");
197 case ARM_AM::ia: return ARM::VSTMDIA;
198 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
206 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
208 default: llvm_unreachable("Unhandled opcode!");
214 case ARM::t2LDMIA_RET:
216 case ARM::t2LDMIA_UPD:
218 case ARM::t2STMIA_UPD:
220 case ARM::VLDMSIA_UPD:
222 case ARM::VSTMSIA_UPD:
224 case ARM::VLDMDIA_UPD:
226 case ARM::VSTMDIA_UPD:
240 case ARM::t2LDMDB_UPD:
242 case ARM::t2STMDB_UPD:
243 case ARM::VLDMSDB_UPD:
244 case ARM::VSTMSDB_UPD:
245 case ARM::VLDMDDB_UPD:
246 case ARM::VSTMDDB_UPD:
257 } // end namespace ARM_AM
258 } // end namespace llvm
260 static bool isT2i32Load(unsigned Opc) {
261 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
264 static bool isi32Load(unsigned Opc) {
265 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
268 static bool isT2i32Store(unsigned Opc) {
269 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
272 static bool isi32Store(unsigned Opc) {
273 return Opc == ARM::STRi12 || isT2i32Store(Opc);
276 /// MergeOps - Create and insert a LDM or STM with Base as base register and
277 /// registers in Regs as the register operands that would be loaded / stored.
278 /// It returns true if the transformation is done.
280 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
281 MachineBasicBlock::iterator MBBI,
282 int Offset, unsigned Base, bool BaseKill,
283 int Opcode, ARMCC::CondCodes Pred,
284 unsigned PredReg, unsigned Scratch, DebugLoc dl,
285 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
286 // Only a single register to load / store. Don't bother.
287 unsigned NumRegs = Regs.size();
291 ARM_AM::AMSubMode Mode = ARM_AM::ia;
292 // VFP and Thumb2 do not support IB or DA modes.
293 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
294 bool haveIBAndDA = isNotVFP && !isThumb2;
295 if (Offset == 4 && haveIBAndDA)
297 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
299 else if (Offset == -4 * (int)NumRegs && isNotVFP)
300 // VLDM/VSTM do not support DB mode without also updating the base reg.
302 else if (Offset != 0) {
303 // Check if this is a supported opcode before we insert instructions to
304 // calculate a new base register.
305 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
307 // If starting offset isn't zero, insert a MI to materialize a new base.
308 // But only do so if it is cost effective, i.e. merging more than two
314 if (isi32Load(Opcode))
315 // If it is a load, then just use one of the destination register to
316 // use as the new base.
317 NewBase = Regs[NumRegs-1].first;
319 // Use the scratch register to use as a new base.
324 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
326 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
329 int ImmedOffset = isThumb2
330 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
331 if (ImmedOffset == -1)
332 // FIXME: Try t2ADDri12 or t2SUBri12?
333 return false; // Probably not worth it then.
335 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
336 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
337 .addImm(Pred).addReg(PredReg).addReg(0);
339 BaseKill = true; // New base is always killed right its use.
342 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
343 Opcode == ARM::VLDRD);
344 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
345 if (!Opcode) return false;
346 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
347 .addReg(Base, getKillRegState(BaseKill))
348 .addImm(Pred).addReg(PredReg);
349 for (unsigned i = 0; i != NumRegs; ++i)
350 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
351 | getKillRegState(Regs[i].second));
356 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
358 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
360 unsigned memOpsBegin, unsigned memOpsEnd,
361 unsigned insertAfter, int Offset,
362 unsigned Base, bool BaseKill,
364 ARMCC::CondCodes Pred, unsigned PredReg,
367 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
368 // First calculate which of the registers should be killed by the merged
370 const unsigned insertPos = memOps[insertAfter].Position;
371 SmallSet<unsigned, 4> KilledRegs;
372 DenseMap<unsigned, unsigned> Killer;
373 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
374 if (i == memOpsBegin) {
379 if (memOps[i].Position < insertPos && memOps[i].isKill) {
380 unsigned Reg = memOps[i].Reg;
381 KilledRegs.insert(Reg);
386 SmallVector<std::pair<unsigned, bool>, 8> Regs;
387 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
388 unsigned Reg = memOps[i].Reg;
389 // If we are inserting the merged operation after an operation that
390 // uses the same register, make sure to transfer any kill flag.
391 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
392 Regs.push_back(std::make_pair(Reg, isKill));
395 // Try to do the merge.
396 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
398 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
399 Pred, PredReg, Scratch, dl, Regs))
402 // Merge succeeded, update records.
403 Merges.push_back(prior(Loc));
404 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
405 // Remove kill flags from any memops that come before insertPos.
406 if (Regs[i-memOpsBegin].second) {
407 unsigned Reg = Regs[i-memOpsBegin].first;
408 if (KilledRegs.count(Reg)) {
409 unsigned j = Killer[Reg];
410 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
411 assert(Idx >= 0 && "Cannot find killing operand");
412 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
413 memOps[j].isKill = false;
415 memOps[i].isKill = true;
417 MBB.erase(memOps[i].MBBI);
418 // Update this memop to refer to the merged instruction.
419 // We may need to move kill flags again.
420 memOps[i].Merged = true;
421 memOps[i].MBBI = Merges.back();
422 memOps[i].Position = insertPos;
426 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
427 /// load / store multiple instructions.
429 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
430 unsigned Base, int Opcode, unsigned Size,
431 ARMCC::CondCodes Pred, unsigned PredReg,
432 unsigned Scratch, MemOpQueue &MemOps,
433 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
434 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
435 int Offset = MemOps[SIndex].Offset;
436 int SOffset = Offset;
437 unsigned insertAfter = SIndex;
438 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
439 DebugLoc dl = Loc->getDebugLoc();
440 const MachineOperand &PMO = Loc->getOperand(0);
441 unsigned PReg = PMO.getReg();
442 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
443 : getARMRegisterNumbering(PReg);
445 unsigned Limit = ~0U;
447 // vldm / vstm limit are 32 for S variants, 16 for D variants.
465 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
466 int NewOffset = MemOps[i].Offset;
467 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
468 unsigned Reg = MO.getReg();
469 unsigned RegNum = MO.isUndef() ? UINT_MAX
470 : getARMRegisterNumbering(Reg);
471 // Register numbers must be in ascending order. For VFP / NEON load and
472 // store multiples, the registers must also be consecutive and within the
473 // limit on the number of registers per instruction.
474 if (Reg != ARM::SP &&
475 NewOffset == Offset + (int)Size &&
476 ((isNotVFP && RegNum > PRegNum) ||
477 ((Count < Limit) && RegNum == PRegNum+1))) {
482 // Can't merge this in. Try merge the earlier ones first.
483 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
484 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
485 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
490 if (MemOps[i].Position > MemOps[insertAfter].Position)
494 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
495 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
496 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
500 static bool definesCPSR(MachineInstr *MI) {
501 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
502 const MachineOperand &MO = MI->getOperand(i);
505 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
506 // If the instruction has live CPSR def, then it's not safe to fold it
507 // into load / store.
514 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
515 unsigned Bytes, unsigned Limit,
516 ARMCC::CondCodes Pred, unsigned PredReg) {
517 unsigned MyPredReg = 0;
521 bool CheckCPSRDef = false;
522 switch (MI->getOpcode()) {
523 default: return false;
532 // Make sure the offset fits in 8 bits.
533 if (Bytes == 0 || (Limit && Bytes >= Limit))
536 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
537 if (!(MI->getOperand(0).getReg() == Base &&
538 MI->getOperand(1).getReg() == Base &&
539 (MI->getOperand(2).getImm()*Scale) == Bytes &&
540 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
541 MyPredReg == PredReg))
544 return CheckCPSRDef ? !definesCPSR(MI) : true;
547 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
548 unsigned Bytes, unsigned Limit,
549 ARMCC::CondCodes Pred, unsigned PredReg) {
550 unsigned MyPredReg = 0;
554 bool CheckCPSRDef = false;
555 switch (MI->getOpcode()) {
556 default: return false;
565 if (Bytes == 0 || (Limit && Bytes >= Limit))
566 // Make sure the offset fits in 8 bits.
569 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
570 if (!(MI->getOperand(0).getReg() == Base &&
571 MI->getOperand(1).getReg() == Base &&
572 (MI->getOperand(2).getImm()*Scale) == Bytes &&
573 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
574 MyPredReg == PredReg))
577 return CheckCPSRDef ? !definesCPSR(MI) : true;
580 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
581 switch (MI->getOpcode()) {
609 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
612 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
616 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
617 ARM_AM::AMSubMode Mode) {
619 default: llvm_unreachable("Unhandled opcode!");
625 default: llvm_unreachable("Unhandled submode!");
626 case ARM_AM::ia: return ARM::LDMIA_UPD;
627 case ARM_AM::ib: return ARM::LDMIB_UPD;
628 case ARM_AM::da: return ARM::LDMDA_UPD;
629 case ARM_AM::db: return ARM::LDMDB_UPD;
636 default: llvm_unreachable("Unhandled submode!");
637 case ARM_AM::ia: return ARM::STMIA_UPD;
638 case ARM_AM::ib: return ARM::STMIB_UPD;
639 case ARM_AM::da: return ARM::STMDA_UPD;
640 case ARM_AM::db: return ARM::STMDB_UPD;
645 default: llvm_unreachable("Unhandled submode!");
646 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
647 case ARM_AM::db: return ARM::t2LDMDB_UPD;
652 default: llvm_unreachable("Unhandled submode!");
653 case ARM_AM::ia: return ARM::t2STMIA_UPD;
654 case ARM_AM::db: return ARM::t2STMDB_UPD;
658 default: llvm_unreachable("Unhandled submode!");
659 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
660 case ARM_AM::db: return ARM::VLDMSDB_UPD;
664 default: llvm_unreachable("Unhandled submode!");
665 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
666 case ARM_AM::db: return ARM::VLDMDDB_UPD;
670 default: llvm_unreachable("Unhandled submode!");
671 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
672 case ARM_AM::db: return ARM::VSTMSDB_UPD;
676 default: llvm_unreachable("Unhandled submode!");
677 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
678 case ARM_AM::db: return ARM::VSTMDDB_UPD;
683 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
684 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
686 /// stmia rn, <ra, rb, rc>
687 /// rn := rn + 4 * 3;
689 /// stmia rn!, <ra, rb, rc>
691 /// rn := rn - 4 * 3;
692 /// ldmia rn, <ra, rb, rc>
694 /// ldmdb rn!, <ra, rb, rc>
695 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
696 MachineBasicBlock::iterator MBBI,
698 MachineBasicBlock::iterator &I) {
699 MachineInstr *MI = MBBI;
700 unsigned Base = MI->getOperand(0).getReg();
701 bool BaseKill = MI->getOperand(0).isKill();
702 unsigned Bytes = getLSMultipleTransferSize(MI);
703 unsigned PredReg = 0;
704 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
705 int Opcode = MI->getOpcode();
706 DebugLoc dl = MI->getDebugLoc();
708 // Can't use an updating ld/st if the base register is also a dest
709 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
710 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
711 if (MI->getOperand(i).getReg() == Base)
714 bool DoMerge = false;
715 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
717 // Try merging with the previous instruction.
718 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
719 if (MBBI != BeginMBBI) {
720 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
721 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
723 if (Mode == ARM_AM::ia &&
724 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
727 } else if (Mode == ARM_AM::ib &&
728 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
736 // Try merging with the next instruction.
737 MachineBasicBlock::iterator EndMBBI = MBB.end();
738 if (!DoMerge && MBBI != EndMBBI) {
739 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
740 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
742 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
743 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
745 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
746 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
761 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
762 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
763 .addReg(Base, getDefRegState(true)) // WB base register
764 .addReg(Base, getKillRegState(BaseKill))
765 .addImm(Pred).addReg(PredReg);
767 // Transfer the rest of operands.
768 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
769 MIB.addOperand(MI->getOperand(OpNum));
771 // Transfer memoperands.
772 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
778 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
779 ARM_AM::AddrOpc Mode) {
782 return ARM::LDR_PRE_IMM;
784 return ARM::STR_PRE_IMM;
786 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
788 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
790 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
792 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
795 return ARM::t2LDR_PRE;
798 return ARM::t2STR_PRE;
799 default: llvm_unreachable("Unhandled opcode!");
803 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
804 ARM_AM::AddrOpc Mode) {
807 return ARM::LDR_POST_IMM;
809 return ARM::STR_POST_IMM;
811 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
813 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
815 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
817 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
820 return ARM::t2LDR_POST;
823 return ARM::t2STR_POST;
824 default: llvm_unreachable("Unhandled opcode!");
828 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
829 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
830 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
831 MachineBasicBlock::iterator MBBI,
832 const TargetInstrInfo *TII,
834 MachineBasicBlock::iterator &I) {
835 MachineInstr *MI = MBBI;
836 unsigned Base = MI->getOperand(1).getReg();
837 bool BaseKill = MI->getOperand(1).isKill();
838 unsigned Bytes = getLSMultipleTransferSize(MI);
839 int Opcode = MI->getOpcode();
840 DebugLoc dl = MI->getDebugLoc();
841 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
842 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
843 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
844 if (isi32Load(Opcode) || isi32Store(Opcode))
845 if (MI->getOperand(2).getImm() != 0)
847 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
850 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
851 // Can't do the merge if the destination register is the same as the would-be
852 // writeback register.
853 if (isLd && MI->getOperand(0).getReg() == Base)
856 unsigned PredReg = 0;
857 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
858 bool DoMerge = false;
859 ARM_AM::AddrOpc AddSub = ARM_AM::add;
861 // AM2 - 12 bits, thumb2 - 8 bits.
862 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
864 // Try merging with the previous instruction.
865 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
866 if (MBBI != BeginMBBI) {
867 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
868 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
870 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
872 AddSub = ARM_AM::sub;
874 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
878 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
883 // Try merging with the next instruction.
884 MachineBasicBlock::iterator EndMBBI = MBB.end();
885 if (!DoMerge && MBBI != EndMBBI) {
886 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
887 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
890 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
892 AddSub = ARM_AM::sub;
893 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
897 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
910 // VLDM[SD}_UPD, VSTM[SD]_UPD
911 // (There are no base-updating versions of VLDR/VSTR instructions, but the
912 // updating load/store-multiple instructions can be used with only one
914 MachineOperand &MO = MI->getOperand(0);
915 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
916 .addReg(Base, getDefRegState(true)) // WB base register
917 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
918 .addImm(Pred).addReg(PredReg)
919 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
920 getKillRegState(MO.isKill())));
924 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
925 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
926 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
927 .addReg(Base, RegState::Define)
928 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
930 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
931 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
932 .addReg(Base, RegState::Define)
933 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
936 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
937 // t2LDR_PRE, t2LDR_POST
938 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
939 .addReg(Base, RegState::Define)
940 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
943 MachineOperand &MO = MI->getOperand(0);
944 // FIXME: post-indexed stores use am2offset_imm, which still encodes
945 // the vestigal zero-reg offset register. When that's fixed, this clause
946 // can be removed entirely.
947 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
948 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
950 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
951 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
952 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
954 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
955 // t2STR_PRE, t2STR_POST
956 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
957 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
958 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
966 /// isMemoryOp - Returns true if instruction is a memory operation that this
967 /// pass is capable of operating on.
968 static bool isMemoryOp(const MachineInstr *MI) {
969 // When no memory operands are present, conservatively assume unaligned,
970 // volatile, unfoldable.
971 if (!MI->hasOneMemOperand())
974 const MachineMemOperand *MMO = *MI->memoperands_begin();
976 // Don't touch volatile memory accesses - we may be changing their order.
977 if (MMO->isVolatile())
980 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
982 if (MMO->getAlignment() < 4)
985 // str <undef> could probably be eliminated entirely, but for now we just want
986 // to avoid making a mess of it.
987 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
988 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
989 MI->getOperand(0).isUndef())
992 // Likewise don't mess with references to undefined addresses.
993 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
994 MI->getOperand(1).isUndef())
997 int Opcode = MI->getOpcode();
1002 return MI->getOperand(1).isReg();
1005 return MI->getOperand(1).isReg();
1012 return MI->getOperand(1).isReg();
1017 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1018 /// op that is being merged.
1019 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1020 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1021 unsigned Position = MemOps[0].Position;
1022 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1023 if (MemOps[i].Position < Position) {
1024 Position = MemOps[i].Position;
1025 Loc = MemOps[i].MBBI;
1029 if (Loc != MBB.begin())
1030 RS->forward(prior(Loc));
1033 static int getMemoryOpOffset(const MachineInstr *MI) {
1034 int Opcode = MI->getOpcode();
1035 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1036 unsigned NumOperands = MI->getDesc().getNumOperands();
1037 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1039 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1040 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1041 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1042 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1045 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1046 : ARM_AM::getAM5Offset(OffField) * 4;
1048 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1051 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1057 static void InsertLDR_STR(MachineBasicBlock &MBB,
1058 MachineBasicBlock::iterator &MBBI,
1059 int Offset, bool isDef,
1060 DebugLoc dl, unsigned NewOpc,
1061 unsigned Reg, bool RegDeadKill, bool RegUndef,
1062 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1063 bool OffKill, bool OffUndef,
1064 ARMCC::CondCodes Pred, unsigned PredReg,
1065 const TargetInstrInfo *TII, bool isT2) {
1067 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1069 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1070 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1071 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1073 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1075 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1076 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1077 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1081 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1082 MachineBasicBlock::iterator &MBBI) {
1083 MachineInstr *MI = &*MBBI;
1084 unsigned Opcode = MI->getOpcode();
1085 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1086 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1087 const MachineOperand &BaseOp = MI->getOperand(2);
1088 unsigned BaseReg = BaseOp.getReg();
1089 unsigned EvenReg = MI->getOperand(0).getReg();
1090 unsigned OddReg = MI->getOperand(1).getReg();
1091 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1092 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1093 // ARM errata 602117: LDRD with base in list may result in incorrect base
1094 // register when interrupted or faulted.
1095 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1096 if (!Errata602117 &&
1097 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1100 MachineBasicBlock::iterator NewBBI = MBBI;
1101 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1102 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1103 bool EvenDeadKill = isLd ?
1104 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1105 bool EvenUndef = MI->getOperand(0).isUndef();
1106 bool OddDeadKill = isLd ?
1107 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1108 bool OddUndef = MI->getOperand(1).isUndef();
1109 bool BaseKill = BaseOp.isKill();
1110 bool BaseUndef = BaseOp.isUndef();
1111 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1112 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1113 int OffImm = getMemoryOpOffset(MI);
1114 unsigned PredReg = 0;
1115 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1117 if (OddRegNum > EvenRegNum && OffImm == 0) {
1118 // Ascending register numbers and no offset. It's safe to change it to a
1120 unsigned NewOpc = (isLd)
1121 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1122 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1124 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1125 .addReg(BaseReg, getKillRegState(BaseKill))
1126 .addImm(Pred).addReg(PredReg)
1127 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1128 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1131 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1132 .addReg(BaseReg, getKillRegState(BaseKill))
1133 .addImm(Pred).addReg(PredReg)
1135 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1137 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1140 NewBBI = llvm::prior(MBBI);
1142 // Split into two instructions.
1143 unsigned NewOpc = (isLd)
1144 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1145 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1146 DebugLoc dl = MBBI->getDebugLoc();
1147 // If this is a load and base register is killed, it may have been
1148 // re-defed by the load, make sure the first load does not clobber it.
1150 (BaseKill || OffKill) &&
1151 (TRI->regsOverlap(EvenReg, BaseReg))) {
1152 assert(!TRI->regsOverlap(OddReg, BaseReg));
1153 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1154 OddReg, OddDeadKill, false,
1155 BaseReg, false, BaseUndef, false, OffUndef,
1156 Pred, PredReg, TII, isT2);
1157 NewBBI = llvm::prior(MBBI);
1158 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1159 EvenReg, EvenDeadKill, false,
1160 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1161 Pred, PredReg, TII, isT2);
1163 if (OddReg == EvenReg && EvenDeadKill) {
1164 // If the two source operands are the same, the kill marker is
1165 // probably on the first one. e.g.
1166 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1167 EvenDeadKill = false;
1170 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1171 EvenReg, EvenDeadKill, EvenUndef,
1172 BaseReg, false, BaseUndef, false, OffUndef,
1173 Pred, PredReg, TII, isT2);
1174 NewBBI = llvm::prior(MBBI);
1175 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1176 OddReg, OddDeadKill, OddUndef,
1177 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1178 Pred, PredReg, TII, isT2);
1193 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1194 /// ops of the same base and incrementing offset into LDM / STM ops.
1195 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1196 unsigned NumMerges = 0;
1197 unsigned NumMemOps = 0;
1199 unsigned CurrBase = 0;
1201 unsigned CurrSize = 0;
1202 ARMCC::CondCodes CurrPred = ARMCC::AL;
1203 unsigned CurrPredReg = 0;
1204 unsigned Position = 0;
1205 SmallVector<MachineBasicBlock::iterator,4> Merges;
1207 RS->enterBasicBlock(&MBB);
1208 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1210 if (FixInvalidRegPairOp(MBB, MBBI))
1213 bool Advance = false;
1214 bool TryMerge = false;
1215 bool Clobber = false;
1217 bool isMemOp = isMemoryOp(MBBI);
1219 int Opcode = MBBI->getOpcode();
1220 unsigned Size = getLSMultipleTransferSize(MBBI);
1221 const MachineOperand &MO = MBBI->getOperand(0);
1222 unsigned Reg = MO.getReg();
1223 bool isKill = MO.isDef() ? false : MO.isKill();
1224 unsigned Base = MBBI->getOperand(1).getReg();
1225 unsigned PredReg = 0;
1226 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1227 int Offset = getMemoryOpOffset(MBBI);
1230 // r5 := ldr [r5, #4]
1231 // r6 := ldr [r5, #8]
1233 // The second ldr has effectively broken the chain even though it
1234 // looks like the later ldr(s) use the same base register. Try to
1235 // merge the ldr's so far, including this one. But don't try to
1236 // combine the following ldr(s).
1237 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1238 if (CurrBase == 0 && !Clobber) {
1239 // Start of a new chain.
1244 CurrPredReg = PredReg;
1245 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1254 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1255 // No need to match PredReg.
1256 // Continue adding to the queue.
1257 if (Offset > MemOps.back().Offset) {
1258 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1263 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1265 if (Offset < I->Offset) {
1266 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1271 } else if (Offset == I->Offset) {
1272 // Collision! This can't be merged!
1281 if (MBBI->isDebugValue()) {
1284 // Reach the end of the block, try merging the memory instructions.
1286 } else if (Advance) {
1290 // Reach the end of the block, try merging the memory instructions.
1296 if (NumMemOps > 1) {
1297 // Try to find a free register to use as a new base in case it's needed.
1298 // First advance to the instruction just before the start of the chain.
1299 AdvanceRS(MBB, MemOps);
1300 // Find a scratch register.
1301 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1302 // Process the load / store instructions.
1303 RS->forward(prior(MBBI));
1307 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1308 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1310 // Try folding preceding/trailing base inc/dec into the generated
1312 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1313 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1315 NumMerges += Merges.size();
1317 // Try folding preceding/trailing base inc/dec into those load/store
1318 // that were not merged to form LDM/STM ops.
1319 for (unsigned i = 0; i != NumMemOps; ++i)
1320 if (!MemOps[i].Merged)
1321 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1324 // RS may be pointing to an instruction that's deleted.
1325 RS->skipTo(prior(MBBI));
1326 } else if (NumMemOps == 1) {
1327 // Try folding preceding/trailing base inc/dec into the single
1329 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1331 RS->forward(prior(MBBI));
1338 CurrPred = ARMCC::AL;
1345 // If iterator hasn't been advanced and this is not a memory op, skip it.
1346 // It can't start a new chain anyway.
1347 if (!Advance && !isMemOp && MBBI != E) {
1353 return NumMerges > 0;
1356 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1357 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1358 /// directly restore the value of LR into pc.
1359 /// ldmfd sp!, {..., lr}
1362 /// ldmfd sp!, {..., lr}
1365 /// ldmfd sp!, {..., pc}
1366 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1367 if (MBB.empty()) return false;
1369 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1370 if (MBBI != MBB.begin() &&
1371 (MBBI->getOpcode() == ARM::BX_RET ||
1372 MBBI->getOpcode() == ARM::tBX_RET ||
1373 MBBI->getOpcode() == ARM::MOVPCLR)) {
1374 MachineInstr *PrevMI = prior(MBBI);
1375 unsigned Opcode = PrevMI->getOpcode();
1376 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1377 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1378 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1379 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1380 if (MO.getReg() != ARM::LR)
1382 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1383 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1384 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1385 PrevMI->setDesc(TII->get(NewOpc));
1387 PrevMI->copyImplicitOps(&*MBBI);
1395 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1396 const TargetMachine &TM = Fn.getTarget();
1397 AFI = Fn.getInfo<ARMFunctionInfo>();
1398 TII = TM.getInstrInfo();
1399 TRI = TM.getRegisterInfo();
1400 STI = &TM.getSubtarget<ARMSubtarget>();
1401 RS = new RegScavenger();
1402 isThumb2 = AFI->isThumb2Function();
1404 bool Modified = false;
1405 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1407 MachineBasicBlock &MBB = *MFI;
1408 Modified |= LoadStoreMultipleOpti(MBB);
1409 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1410 Modified |= MergeReturnIntoLDM(MBB);
1418 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1419 /// load / stores from consecutive locations close to make it more
1420 /// likely they will be combined later.
1423 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1425 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1427 const TargetData *TD;
1428 const TargetInstrInfo *TII;
1429 const TargetRegisterInfo *TRI;
1430 const ARMSubtarget *STI;
1431 MachineRegisterInfo *MRI;
1432 MachineFunction *MF;
1434 virtual bool runOnMachineFunction(MachineFunction &Fn);
1436 virtual const char *getPassName() const {
1437 return "ARM pre- register allocation load / store optimization pass";
1441 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1442 unsigned &NewOpc, unsigned &EvenReg,
1443 unsigned &OddReg, unsigned &BaseReg,
1445 unsigned &PredReg, ARMCC::CondCodes &Pred,
1447 bool RescheduleOps(MachineBasicBlock *MBB,
1448 SmallVector<MachineInstr*, 4> &Ops,
1449 unsigned Base, bool isLd,
1450 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1451 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1453 char ARMPreAllocLoadStoreOpt::ID = 0;
1456 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1457 TD = Fn.getTarget().getTargetData();
1458 TII = Fn.getTarget().getInstrInfo();
1459 TRI = Fn.getTarget().getRegisterInfo();
1460 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1461 MRI = &Fn.getRegInfo();
1464 bool Modified = false;
1465 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1467 Modified |= RescheduleLoadStoreInstrs(MFI);
1472 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1473 MachineBasicBlock::iterator I,
1474 MachineBasicBlock::iterator E,
1475 SmallPtrSet<MachineInstr*, 4> &MemOps,
1476 SmallSet<unsigned, 4> &MemRegs,
1477 const TargetRegisterInfo *TRI) {
1478 // Are there stores / loads / calls between them?
1479 // FIXME: This is overly conservative. We should make use of alias information
1481 SmallSet<unsigned, 4> AddedRegPressure;
1483 if (I->isDebugValue() || MemOps.count(&*I))
1485 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1487 if (isLd && I->mayStore())
1492 // It's not safe to move the first 'str' down.
1495 // str r4, [r0, #+4]
1499 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1500 MachineOperand &MO = I->getOperand(j);
1503 unsigned Reg = MO.getReg();
1504 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1506 if (Reg != Base && !MemRegs.count(Reg))
1507 AddedRegPressure.insert(Reg);
1511 // Estimate register pressure increase due to the transformation.
1512 if (MemRegs.size() <= 4)
1513 // Ok if we are moving small number of instructions.
1515 return AddedRegPressure.size() <= MemRegs.size() * 2;
1519 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1520 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1521 MachineInstr *Op1) {
1522 assert(MI->memoperands_empty() && "expected a new machineinstr");
1523 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1524 + (Op1->memoperands_end() - Op1->memoperands_begin());
1526 MachineFunction *MF = MI->getParent()->getParent();
1527 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1528 MachineSDNode::mmo_iterator MemEnd =
1529 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1531 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1532 MI->setMemRefs(MemBegin, MemEnd);
1536 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1538 unsigned &NewOpc, unsigned &EvenReg,
1539 unsigned &OddReg, unsigned &BaseReg,
1540 int &Offset, unsigned &PredReg,
1541 ARMCC::CondCodes &Pred,
1543 // Make sure we're allowed to generate LDRD/STRD.
1544 if (!STI->hasV5TEOps())
1547 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1549 unsigned Opcode = Op0->getOpcode();
1550 if (Opcode == ARM::LDRi12)
1552 else if (Opcode == ARM::STRi12)
1554 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1555 NewOpc = ARM::t2LDRDi8;
1558 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1559 NewOpc = ARM::t2STRDi8;
1565 // Make sure the base address satisfies i64 ld / st alignment requirement.
1566 if (!Op0->hasOneMemOperand() ||
1567 !(*Op0->memoperands_begin())->getValue() ||
1568 (*Op0->memoperands_begin())->isVolatile())
1571 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1572 const Function *Func = MF->getFunction();
1573 unsigned ReqAlign = STI->hasV6Ops()
1574 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1575 : 8; // Pre-v6 need 8-byte align
1576 if (Align < ReqAlign)
1579 // Then make sure the immediate offset fits.
1580 int OffImm = getMemoryOpOffset(Op0);
1582 int Limit = (1 << 8) * Scale;
1583 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1587 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1589 AddSub = ARM_AM::sub;
1592 int Limit = (1 << 8) * Scale;
1593 if (OffImm >= Limit || (OffImm & (Scale-1)))
1595 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1597 EvenReg = Op0->getOperand(0).getReg();
1598 OddReg = Op1->getOperand(0).getReg();
1599 if (EvenReg == OddReg)
1601 BaseReg = Op0->getOperand(1).getReg();
1602 Pred = llvm::getInstrPredicate(Op0, PredReg);
1603 dl = Op0->getDebugLoc();
1608 struct OffsetCompare {
1609 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1610 int LOffset = getMemoryOpOffset(LHS);
1611 int ROffset = getMemoryOpOffset(RHS);
1612 assert(LHS == RHS || LOffset != ROffset);
1613 return LOffset > ROffset;
1618 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1619 SmallVector<MachineInstr*, 4> &Ops,
1620 unsigned Base, bool isLd,
1621 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1622 bool RetVal = false;
1624 // Sort by offset (in reverse order).
1625 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1627 // The loads / stores of the same base are in order. Scan them from first to
1628 // last and check for the following:
1629 // 1. Any def of base.
1631 while (Ops.size() > 1) {
1632 unsigned FirstLoc = ~0U;
1633 unsigned LastLoc = 0;
1634 MachineInstr *FirstOp = 0;
1635 MachineInstr *LastOp = 0;
1637 unsigned LastOpcode = 0;
1638 unsigned LastBytes = 0;
1639 unsigned NumMove = 0;
1640 for (int i = Ops.size() - 1; i >= 0; --i) {
1641 MachineInstr *Op = Ops[i];
1642 unsigned Loc = MI2LocMap[Op];
1643 if (Loc <= FirstLoc) {
1647 if (Loc >= LastLoc) {
1653 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1654 if (LastOpcode && LSMOpcode != LastOpcode)
1657 int Offset = getMemoryOpOffset(Op);
1658 unsigned Bytes = getLSMultipleTransferSize(Op);
1660 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1663 LastOffset = Offset;
1665 LastOpcode = LSMOpcode;
1666 if (++NumMove == 8) // FIXME: Tune this limit.
1673 SmallPtrSet<MachineInstr*, 4> MemOps;
1674 SmallSet<unsigned, 4> MemRegs;
1675 for (int i = NumMove-1; i >= 0; --i) {
1676 MemOps.insert(Ops[i]);
1677 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1680 // Be conservative, if the instructions are too far apart, don't
1681 // move them. We want to limit the increase of register pressure.
1682 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1684 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1685 MemOps, MemRegs, TRI);
1687 for (unsigned i = 0; i != NumMove; ++i)
1690 // This is the new location for the loads / stores.
1691 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1692 while (InsertPos != MBB->end()
1693 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1696 // If we are moving a pair of loads / stores, see if it makes sense
1697 // to try to allocate a pair of registers that can form register pairs.
1698 MachineInstr *Op0 = Ops.back();
1699 MachineInstr *Op1 = Ops[Ops.size()-2];
1700 unsigned EvenReg = 0, OddReg = 0;
1701 unsigned BaseReg = 0, PredReg = 0;
1702 ARMCC::CondCodes Pred = ARMCC::AL;
1704 unsigned NewOpc = 0;
1707 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1708 EvenReg, OddReg, BaseReg,
1709 Offset, PredReg, Pred, isT2)) {
1713 const MCInstrDesc &MCID = TII->get(NewOpc);
1714 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
1715 MRI->constrainRegClass(EvenReg, TRC);
1716 MRI->constrainRegClass(OddReg, TRC);
1718 // Form the pair instruction.
1720 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1721 .addReg(EvenReg, RegState::Define)
1722 .addReg(OddReg, RegState::Define)
1724 // FIXME: We're converting from LDRi12 to an insn that still
1725 // uses addrmode2, so we need an explicit offset reg. It should
1726 // always by reg0 since we're transforming LDRi12s.
1729 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1730 concatenateMemOperands(MIB, Op0, Op1);
1731 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1734 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1738 // FIXME: We're converting from LDRi12 to an insn that still
1739 // uses addrmode2, so we need an explicit offset reg. It should
1740 // always by reg0 since we're transforming STRi12s.
1743 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1744 concatenateMemOperands(MIB, Op0, Op1);
1745 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1751 // Add register allocation hints to form register pairs.
1752 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1753 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1755 for (unsigned i = 0; i != NumMove; ++i) {
1756 MachineInstr *Op = Ops.back();
1758 MBB->splice(InsertPos, MBB, Op);
1762 NumLdStMoved += NumMove;
1772 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1773 bool RetVal = false;
1775 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1776 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1777 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1778 SmallVector<unsigned, 4> LdBases;
1779 SmallVector<unsigned, 4> StBases;
1782 MachineBasicBlock::iterator MBBI = MBB->begin();
1783 MachineBasicBlock::iterator E = MBB->end();
1785 for (; MBBI != E; ++MBBI) {
1786 MachineInstr *MI = MBBI;
1787 if (MI->isCall() || MI->isTerminator()) {
1788 // Stop at barriers.
1793 if (!MI->isDebugValue())
1794 MI2LocMap[MI] = ++Loc;
1796 if (!isMemoryOp(MI))
1798 unsigned PredReg = 0;
1799 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1802 int Opc = MI->getOpcode();
1803 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1804 unsigned Base = MI->getOperand(1).getReg();
1805 int Offset = getMemoryOpOffset(MI);
1807 bool StopHere = false;
1809 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1810 Base2LdsMap.find(Base);
1811 if (BI != Base2LdsMap.end()) {
1812 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1813 if (Offset == getMemoryOpOffset(BI->second[i])) {
1819 BI->second.push_back(MI);
1821 SmallVector<MachineInstr*, 4> MIs;
1823 Base2LdsMap[Base] = MIs;
1824 LdBases.push_back(Base);
1827 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1828 Base2StsMap.find(Base);
1829 if (BI != Base2StsMap.end()) {
1830 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1831 if (Offset == getMemoryOpOffset(BI->second[i])) {
1837 BI->second.push_back(MI);
1839 SmallVector<MachineInstr*, 4> MIs;
1841 Base2StsMap[Base] = MIs;
1842 StBases.push_back(Base);
1847 // Found a duplicate (a base+offset combination that's seen earlier).
1854 // Re-schedule loads.
1855 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1856 unsigned Base = LdBases[i];
1857 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1859 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1862 // Re-schedule stores.
1863 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1864 unsigned Base = StBases[i];
1865 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1867 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1871 Base2LdsMap.clear();
1872 Base2StsMap.clear();
1882 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1883 /// optimization pass.
1884 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1886 return new ARMPreAllocLoadStoreOpt();
1887 return new ARMLoadStoreOpt();