1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMRegisterInfo.h"
20 #include "MCTargetDesc/ARMAddressingModes.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/CodeGen/SelectionDAGNodes.h"
30 #include "llvm/Target/TargetData.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 #include "llvm/Target/TargetMachine.h"
33 #include "llvm/Target/TargetRegisterInfo.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "llvm/ADT/DenseMap.h"
38 #include "llvm/ADT/STLExtras.h"
39 #include "llvm/ADT/SmallPtrSet.h"
40 #include "llvm/ADT/SmallSet.h"
41 #include "llvm/ADT/SmallVector.h"
42 #include "llvm/ADT/Statistic.h"
45 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
46 STATISTIC(NumSTMGened , "Number of stm instructions generated");
47 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
48 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
49 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
50 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
51 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
52 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
53 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
54 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
55 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
57 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
58 /// load / store instructions to form ldm / stm instructions.
61 struct ARMLoadStoreOpt : public MachineFunctionPass {
63 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
65 const TargetInstrInfo *TII;
66 const TargetRegisterInfo *TRI;
67 const ARMSubtarget *STI;
72 virtual bool runOnMachineFunction(MachineFunction &Fn);
74 virtual const char *getPassName() const {
75 return "ARM load / store optimization pass";
79 struct MemOpQueueEntry {
84 MachineBasicBlock::iterator MBBI;
86 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
87 MachineBasicBlock::iterator i)
88 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
90 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
91 typedef MemOpQueue::iterator MemOpQueueIter;
93 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
94 int Offset, unsigned Base, bool BaseKill, int Opcode,
95 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
96 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
97 void MergeOpsUpdate(MachineBasicBlock &MBB,
101 unsigned insertAfter,
106 ARMCC::CondCodes Pred,
110 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
111 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
112 int Opcode, unsigned Size,
113 ARMCC::CondCodes Pred, unsigned PredReg,
114 unsigned Scratch, MemOpQueue &MemOps,
115 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
117 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
118 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
119 MachineBasicBlock::iterator &MBBI);
120 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
121 MachineBasicBlock::iterator MBBI,
122 const TargetInstrInfo *TII,
124 MachineBasicBlock::iterator &I);
125 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
126 MachineBasicBlock::iterator MBBI,
128 MachineBasicBlock::iterator &I);
129 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
130 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
132 char ARMLoadStoreOpt::ID = 0;
135 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
137 default: llvm_unreachable("Unhandled opcode!");
141 default: llvm_unreachable("Unhandled submode!");
142 case ARM_AM::ia: return ARM::LDMIA;
143 case ARM_AM::da: return ARM::LDMDA;
144 case ARM_AM::db: return ARM::LDMDB;
145 case ARM_AM::ib: return ARM::LDMIB;
150 default: llvm_unreachable("Unhandled submode!");
151 case ARM_AM::ia: return ARM::STMIA;
152 case ARM_AM::da: return ARM::STMDA;
153 case ARM_AM::db: return ARM::STMDB;
154 case ARM_AM::ib: return ARM::STMIB;
160 default: llvm_unreachable("Unhandled submode!");
161 case ARM_AM::ia: return ARM::t2LDMIA;
162 case ARM_AM::db: return ARM::t2LDMDB;
168 default: llvm_unreachable("Unhandled submode!");
169 case ARM_AM::ia: return ARM::t2STMIA;
170 case ARM_AM::db: return ARM::t2STMDB;
175 default: llvm_unreachable("Unhandled submode!");
176 case ARM_AM::ia: return ARM::VLDMSIA;
177 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
182 default: llvm_unreachable("Unhandled submode!");
183 case ARM_AM::ia: return ARM::VSTMSIA;
184 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
189 default: llvm_unreachable("Unhandled submode!");
190 case ARM_AM::ia: return ARM::VLDMDIA;
191 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
196 default: llvm_unreachable("Unhandled submode!");
197 case ARM_AM::ia: return ARM::VSTMDIA;
198 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
206 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
208 default: llvm_unreachable("Unhandled opcode!");
214 case ARM::t2LDMIA_RET:
216 case ARM::t2LDMIA_UPD:
218 case ARM::t2STMIA_UPD:
220 case ARM::VLDMSIA_UPD:
222 case ARM::VSTMSIA_UPD:
224 case ARM::VLDMDIA_UPD:
226 case ARM::VSTMDIA_UPD:
240 case ARM::t2LDMDB_UPD:
242 case ARM::t2STMDB_UPD:
243 case ARM::VLDMSDB_UPD:
244 case ARM::VSTMSDB_UPD:
245 case ARM::VLDMDDB_UPD:
246 case ARM::VSTMDDB_UPD:
257 } // end namespace ARM_AM
258 } // end namespace llvm
260 static bool isT2i32Load(unsigned Opc) {
261 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
264 static bool isi32Load(unsigned Opc) {
265 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
268 static bool isT2i32Store(unsigned Opc) {
269 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
272 static bool isi32Store(unsigned Opc) {
273 return Opc == ARM::STRi12 || isT2i32Store(Opc);
276 /// MergeOps - Create and insert a LDM or STM with Base as base register and
277 /// registers in Regs as the register operands that would be loaded / stored.
278 /// It returns true if the transformation is done.
280 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
281 MachineBasicBlock::iterator MBBI,
282 int Offset, unsigned Base, bool BaseKill,
283 int Opcode, ARMCC::CondCodes Pred,
284 unsigned PredReg, unsigned Scratch, DebugLoc dl,
285 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
286 // Only a single register to load / store. Don't bother.
287 unsigned NumRegs = Regs.size();
291 ARM_AM::AMSubMode Mode = ARM_AM::ia;
292 // VFP and Thumb2 do not support IB or DA modes.
293 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
294 bool haveIBAndDA = isNotVFP && !isThumb2;
295 if (Offset == 4 && haveIBAndDA)
297 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
299 else if (Offset == -4 * (int)NumRegs && isNotVFP)
300 // VLDM/VSTM do not support DB mode without also updating the base reg.
302 else if (Offset != 0) {
303 // Check if this is a supported opcode before we insert instructions to
304 // calculate a new base register.
305 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
307 // If starting offset isn't zero, insert a MI to materialize a new base.
308 // But only do so if it is cost effective, i.e. merging more than two
314 if (isi32Load(Opcode))
315 // If it is a load, then just use one of the destination register to
316 // use as the new base.
317 NewBase = Regs[NumRegs-1].first;
319 // Use the scratch register to use as a new base.
324 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
326 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
329 int ImmedOffset = isThumb2
330 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
331 if (ImmedOffset == -1)
332 // FIXME: Try t2ADDri12 or t2SUBri12?
333 return false; // Probably not worth it then.
335 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
336 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
337 .addImm(Pred).addReg(PredReg).addReg(0);
339 BaseKill = true; // New base is always killed right its use.
342 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
343 Opcode == ARM::VLDRD);
344 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
345 if (!Opcode) return false;
346 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
347 .addReg(Base, getKillRegState(BaseKill))
348 .addImm(Pred).addReg(PredReg);
349 for (unsigned i = 0; i != NumRegs; ++i)
350 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
351 | getKillRegState(Regs[i].second));
356 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
358 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
360 unsigned memOpsBegin, unsigned memOpsEnd,
361 unsigned insertAfter, int Offset,
362 unsigned Base, bool BaseKill,
364 ARMCC::CondCodes Pred, unsigned PredReg,
367 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
368 // First calculate which of the registers should be killed by the merged
370 const unsigned insertPos = memOps[insertAfter].Position;
371 SmallSet<unsigned, 4> KilledRegs;
372 DenseMap<unsigned, unsigned> Killer;
373 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
374 if (i == memOpsBegin) {
379 if (memOps[i].Position < insertPos && memOps[i].isKill) {
380 unsigned Reg = memOps[i].Reg;
381 KilledRegs.insert(Reg);
386 SmallVector<std::pair<unsigned, bool>, 8> Regs;
387 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
388 unsigned Reg = memOps[i].Reg;
389 // If we are inserting the merged operation after an operation that
390 // uses the same register, make sure to transfer any kill flag.
391 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
392 Regs.push_back(std::make_pair(Reg, isKill));
395 // Try to do the merge.
396 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
398 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
399 Pred, PredReg, Scratch, dl, Regs))
402 // Merge succeeded, update records.
403 Merges.push_back(prior(Loc));
404 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
405 // Remove kill flags from any memops that come before insertPos.
406 if (Regs[i-memOpsBegin].second) {
407 unsigned Reg = Regs[i-memOpsBegin].first;
408 if (KilledRegs.count(Reg)) {
409 unsigned j = Killer[Reg];
410 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
411 assert(Idx >= 0 && "Cannot find killing operand");
412 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
413 memOps[j].isKill = false;
415 memOps[i].isKill = true;
417 MBB.erase(memOps[i].MBBI);
418 // Update this memop to refer to the merged instruction.
419 // We may need to move kill flags again.
420 memOps[i].Merged = true;
421 memOps[i].MBBI = Merges.back();
422 memOps[i].Position = insertPos;
426 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
427 /// load / store multiple instructions.
429 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
430 unsigned Base, int Opcode, unsigned Size,
431 ARMCC::CondCodes Pred, unsigned PredReg,
432 unsigned Scratch, MemOpQueue &MemOps,
433 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
434 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
435 int Offset = MemOps[SIndex].Offset;
436 int SOffset = Offset;
437 unsigned insertAfter = SIndex;
438 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
439 DebugLoc dl = Loc->getDebugLoc();
440 const MachineOperand &PMO = Loc->getOperand(0);
441 unsigned PReg = PMO.getReg();
442 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
443 : getARMRegisterNumbering(PReg);
445 unsigned Limit = ~0U;
447 // vldm / vstm limit are 32 for S variants, 16 for D variants.
465 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
466 int NewOffset = MemOps[i].Offset;
467 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
468 unsigned Reg = MO.getReg();
469 unsigned RegNum = MO.isUndef() ? UINT_MAX
470 : getARMRegisterNumbering(Reg);
471 // Register numbers must be in ascending order. For VFP / NEON load and
472 // store multiples, the registers must also be consecutive and within the
473 // limit on the number of registers per instruction.
474 if (Reg != ARM::SP &&
475 NewOffset == Offset + (int)Size &&
476 ((isNotVFP && RegNum > PRegNum) ||
477 ((Count < Limit) && RegNum == PRegNum+1))) {
482 // Can't merge this in. Try merge the earlier ones first.
483 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
484 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
485 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
490 if (MemOps[i].Position > MemOps[insertAfter].Position)
494 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
495 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
496 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
500 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
501 unsigned Bytes, unsigned Limit,
502 ARMCC::CondCodes Pred, unsigned PredReg){
503 unsigned MyPredReg = 0;
506 if (MI->getOpcode() != ARM::t2SUBri &&
507 MI->getOpcode() != ARM::tSUBspi &&
508 MI->getOpcode() != ARM::SUBri)
511 // Make sure the offset fits in 8 bits.
512 if (Bytes == 0 || (Limit && Bytes >= Limit))
515 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
516 return (MI->getOperand(0).getReg() == Base &&
517 MI->getOperand(1).getReg() == Base &&
518 (MI->getOperand(2).getImm()*Scale) == Bytes &&
519 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
520 MyPredReg == PredReg);
523 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
524 unsigned Bytes, unsigned Limit,
525 ARMCC::CondCodes Pred, unsigned PredReg){
526 unsigned MyPredReg = 0;
529 if (MI->getOpcode() != ARM::t2ADDri &&
530 MI->getOpcode() != ARM::tADDspi &&
531 MI->getOpcode() != ARM::ADDri)
534 if (Bytes == 0 || (Limit && Bytes >= Limit))
535 // Make sure the offset fits in 8 bits.
538 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
539 return (MI->getOperand(0).getReg() == Base &&
540 MI->getOperand(1).getReg() == Base &&
541 (MI->getOperand(2).getImm()*Scale) == Bytes &&
542 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
543 MyPredReg == PredReg);
546 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
547 switch (MI->getOpcode()) {
575 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
578 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
582 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
583 ARM_AM::AMSubMode Mode) {
585 default: llvm_unreachable("Unhandled opcode!");
591 default: llvm_unreachable("Unhandled submode!");
592 case ARM_AM::ia: return ARM::LDMIA_UPD;
593 case ARM_AM::ib: return ARM::LDMIB_UPD;
594 case ARM_AM::da: return ARM::LDMDA_UPD;
595 case ARM_AM::db: return ARM::LDMDB_UPD;
602 default: llvm_unreachable("Unhandled submode!");
603 case ARM_AM::ia: return ARM::STMIA_UPD;
604 case ARM_AM::ib: return ARM::STMIB_UPD;
605 case ARM_AM::da: return ARM::STMDA_UPD;
606 case ARM_AM::db: return ARM::STMDB_UPD;
611 default: llvm_unreachable("Unhandled submode!");
612 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
613 case ARM_AM::db: return ARM::t2LDMDB_UPD;
618 default: llvm_unreachable("Unhandled submode!");
619 case ARM_AM::ia: return ARM::t2STMIA_UPD;
620 case ARM_AM::db: return ARM::t2STMDB_UPD;
624 default: llvm_unreachable("Unhandled submode!");
625 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
626 case ARM_AM::db: return ARM::VLDMSDB_UPD;
630 default: llvm_unreachable("Unhandled submode!");
631 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
632 case ARM_AM::db: return ARM::VLDMDDB_UPD;
636 default: llvm_unreachable("Unhandled submode!");
637 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
638 case ARM_AM::db: return ARM::VSTMSDB_UPD;
642 default: llvm_unreachable("Unhandled submode!");
643 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
644 case ARM_AM::db: return ARM::VSTMDDB_UPD;
649 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
650 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
652 /// stmia rn, <ra, rb, rc>
653 /// rn := rn + 4 * 3;
655 /// stmia rn!, <ra, rb, rc>
657 /// rn := rn - 4 * 3;
658 /// ldmia rn, <ra, rb, rc>
660 /// ldmdb rn!, <ra, rb, rc>
661 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
662 MachineBasicBlock::iterator MBBI,
664 MachineBasicBlock::iterator &I) {
665 MachineInstr *MI = MBBI;
666 unsigned Base = MI->getOperand(0).getReg();
667 bool BaseKill = MI->getOperand(0).isKill();
668 unsigned Bytes = getLSMultipleTransferSize(MI);
669 unsigned PredReg = 0;
670 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
671 int Opcode = MI->getOpcode();
672 DebugLoc dl = MI->getDebugLoc();
674 // Can't use an updating ld/st if the base register is also a dest
675 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
676 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
677 if (MI->getOperand(i).getReg() == Base)
680 bool DoMerge = false;
681 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
683 // Try merging with the previous instruction.
684 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
685 if (MBBI != BeginMBBI) {
686 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
687 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
689 if (Mode == ARM_AM::ia &&
690 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
693 } else if (Mode == ARM_AM::ib &&
694 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
702 // Try merging with the next instruction.
703 MachineBasicBlock::iterator EndMBBI = MBB.end();
704 if (!DoMerge && MBBI != EndMBBI) {
705 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
706 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
708 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
709 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
711 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
712 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
727 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
728 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
729 .addReg(Base, getDefRegState(true)) // WB base register
730 .addReg(Base, getKillRegState(BaseKill))
731 .addImm(Pred).addReg(PredReg);
733 // Transfer the rest of operands.
734 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
735 MIB.addOperand(MI->getOperand(OpNum));
737 // Transfer memoperands.
738 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
744 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
745 ARM_AM::AddrOpc Mode) {
748 return ARM::LDR_PRE_IMM;
750 return ARM::STR_PRE_IMM;
752 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
754 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
756 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
758 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
761 return ARM::t2LDR_PRE;
764 return ARM::t2STR_PRE;
765 default: llvm_unreachable("Unhandled opcode!");
769 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
770 ARM_AM::AddrOpc Mode) {
773 return ARM::LDR_POST_IMM;
775 return ARM::STR_POST_IMM;
777 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
779 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
781 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
783 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
786 return ARM::t2LDR_POST;
789 return ARM::t2STR_POST;
790 default: llvm_unreachable("Unhandled opcode!");
794 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
795 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
796 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
797 MachineBasicBlock::iterator MBBI,
798 const TargetInstrInfo *TII,
800 MachineBasicBlock::iterator &I) {
801 MachineInstr *MI = MBBI;
802 unsigned Base = MI->getOperand(1).getReg();
803 bool BaseKill = MI->getOperand(1).isKill();
804 unsigned Bytes = getLSMultipleTransferSize(MI);
805 int Opcode = MI->getOpcode();
806 DebugLoc dl = MI->getDebugLoc();
807 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
808 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
809 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
810 if (isi32Load(Opcode) || isi32Store(Opcode))
811 if (MI->getOperand(2).getImm() != 0)
813 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
816 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
817 // Can't do the merge if the destination register is the same as the would-be
818 // writeback register.
819 if (isLd && MI->getOperand(0).getReg() == Base)
822 unsigned PredReg = 0;
823 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
824 bool DoMerge = false;
825 ARM_AM::AddrOpc AddSub = ARM_AM::add;
827 // AM2 - 12 bits, thumb2 - 8 bits.
828 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
830 // Try merging with the previous instruction.
831 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
832 if (MBBI != BeginMBBI) {
833 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
834 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
836 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
838 AddSub = ARM_AM::sub;
840 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
844 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
849 // Try merging with the next instruction.
850 MachineBasicBlock::iterator EndMBBI = MBB.end();
851 if (!DoMerge && MBBI != EndMBBI) {
852 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
853 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
856 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
858 AddSub = ARM_AM::sub;
859 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
863 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
876 // VLDM[SD}_UPD, VSTM[SD]_UPD
877 // (There are no base-updating versions of VLDR/VSTR instructions, but the
878 // updating load/store-multiple instructions can be used with only one
880 MachineOperand &MO = MI->getOperand(0);
881 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
882 .addReg(Base, getDefRegState(true)) // WB base register
883 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
884 .addImm(Pred).addReg(PredReg)
885 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
886 getKillRegState(MO.isKill())));
890 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
891 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
892 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
893 .addReg(Base, RegState::Define)
894 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
896 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
897 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
898 .addReg(Base, RegState::Define)
899 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
902 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
903 // t2LDR_PRE, t2LDR_POST
904 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
905 .addReg(Base, RegState::Define)
906 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
909 MachineOperand &MO = MI->getOperand(0);
910 // FIXME: post-indexed stores use am2offset_imm, which still encodes
911 // the vestigal zero-reg offset register. When that's fixed, this clause
912 // can be removed entirely.
913 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
914 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
916 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
917 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
918 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
920 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
921 // t2STR_PRE, t2STR_POST
922 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
923 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
924 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
932 /// isMemoryOp - Returns true if instruction is a memory operation that this
933 /// pass is capable of operating on.
934 static bool isMemoryOp(const MachineInstr *MI) {
935 // When no memory operands are present, conservatively assume unaligned,
936 // volatile, unfoldable.
937 if (!MI->hasOneMemOperand())
940 const MachineMemOperand *MMO = *MI->memoperands_begin();
942 // Don't touch volatile memory accesses - we may be changing their order.
943 if (MMO->isVolatile())
946 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
948 if (MMO->getAlignment() < 4)
951 // str <undef> could probably be eliminated entirely, but for now we just want
952 // to avoid making a mess of it.
953 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
954 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
955 MI->getOperand(0).isUndef())
958 // Likewise don't mess with references to undefined addresses.
959 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
960 MI->getOperand(1).isUndef())
963 int Opcode = MI->getOpcode();
968 return MI->getOperand(1).isReg();
971 return MI->getOperand(1).isReg();
978 return MI->getOperand(1).isReg();
983 /// AdvanceRS - Advance register scavenger to just before the earliest memory
984 /// op that is being merged.
985 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
986 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
987 unsigned Position = MemOps[0].Position;
988 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
989 if (MemOps[i].Position < Position) {
990 Position = MemOps[i].Position;
991 Loc = MemOps[i].MBBI;
995 if (Loc != MBB.begin())
996 RS->forward(prior(Loc));
999 static int getMemoryOpOffset(const MachineInstr *MI) {
1000 int Opcode = MI->getOpcode();
1001 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1002 unsigned NumOperands = MI->getDesc().getNumOperands();
1003 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1005 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1006 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1007 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1008 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1011 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1012 : ARM_AM::getAM5Offset(OffField) * 4;
1014 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1017 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1023 static void InsertLDR_STR(MachineBasicBlock &MBB,
1024 MachineBasicBlock::iterator &MBBI,
1025 int Offset, bool isDef,
1026 DebugLoc dl, unsigned NewOpc,
1027 unsigned Reg, bool RegDeadKill, bool RegUndef,
1028 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1029 bool OffKill, bool OffUndef,
1030 ARMCC::CondCodes Pred, unsigned PredReg,
1031 const TargetInstrInfo *TII, bool isT2) {
1033 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1035 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1036 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1037 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1039 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1041 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1042 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1043 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1047 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1048 MachineBasicBlock::iterator &MBBI) {
1049 MachineInstr *MI = &*MBBI;
1050 unsigned Opcode = MI->getOpcode();
1051 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1052 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1053 const MachineOperand &BaseOp = MI->getOperand(2);
1054 unsigned BaseReg = BaseOp.getReg();
1055 unsigned EvenReg = MI->getOperand(0).getReg();
1056 unsigned OddReg = MI->getOperand(1).getReg();
1057 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1058 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1059 // ARM errata 602117: LDRD with base in list may result in incorrect base
1060 // register when interrupted or faulted.
1061 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1062 if (!Errata602117 &&
1063 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1066 MachineBasicBlock::iterator NewBBI = MBBI;
1067 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1068 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1069 bool EvenDeadKill = isLd ?
1070 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1071 bool EvenUndef = MI->getOperand(0).isUndef();
1072 bool OddDeadKill = isLd ?
1073 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1074 bool OddUndef = MI->getOperand(1).isUndef();
1075 bool BaseKill = BaseOp.isKill();
1076 bool BaseUndef = BaseOp.isUndef();
1077 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1078 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1079 int OffImm = getMemoryOpOffset(MI);
1080 unsigned PredReg = 0;
1081 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1083 if (OddRegNum > EvenRegNum && OffImm == 0) {
1084 // Ascending register numbers and no offset. It's safe to change it to a
1086 unsigned NewOpc = (isLd)
1087 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1088 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1090 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1091 .addReg(BaseReg, getKillRegState(BaseKill))
1092 .addImm(Pred).addReg(PredReg)
1093 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1094 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1097 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1098 .addReg(BaseReg, getKillRegState(BaseKill))
1099 .addImm(Pred).addReg(PredReg)
1101 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1103 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1106 NewBBI = llvm::prior(MBBI);
1108 // Split into two instructions.
1109 unsigned NewOpc = (isLd)
1110 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1111 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1112 DebugLoc dl = MBBI->getDebugLoc();
1113 // If this is a load and base register is killed, it may have been
1114 // re-defed by the load, make sure the first load does not clobber it.
1116 (BaseKill || OffKill) &&
1117 (TRI->regsOverlap(EvenReg, BaseReg))) {
1118 assert(!TRI->regsOverlap(OddReg, BaseReg));
1119 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1120 OddReg, OddDeadKill, false,
1121 BaseReg, false, BaseUndef, false, OffUndef,
1122 Pred, PredReg, TII, isT2);
1123 NewBBI = llvm::prior(MBBI);
1124 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1125 EvenReg, EvenDeadKill, false,
1126 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1127 Pred, PredReg, TII, isT2);
1129 if (OddReg == EvenReg && EvenDeadKill) {
1130 // If the two source operands are the same, the kill marker is
1131 // probably on the first one. e.g.
1132 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1133 EvenDeadKill = false;
1136 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1137 EvenReg, EvenDeadKill, EvenUndef,
1138 BaseReg, false, BaseUndef, false, OffUndef,
1139 Pred, PredReg, TII, isT2);
1140 NewBBI = llvm::prior(MBBI);
1141 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1142 OddReg, OddDeadKill, OddUndef,
1143 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1144 Pred, PredReg, TII, isT2);
1159 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1160 /// ops of the same base and incrementing offset into LDM / STM ops.
1161 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1162 unsigned NumMerges = 0;
1163 unsigned NumMemOps = 0;
1165 unsigned CurrBase = 0;
1167 unsigned CurrSize = 0;
1168 ARMCC::CondCodes CurrPred = ARMCC::AL;
1169 unsigned CurrPredReg = 0;
1170 unsigned Position = 0;
1171 SmallVector<MachineBasicBlock::iterator,4> Merges;
1173 RS->enterBasicBlock(&MBB);
1174 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1176 if (FixInvalidRegPairOp(MBB, MBBI))
1179 bool Advance = false;
1180 bool TryMerge = false;
1181 bool Clobber = false;
1183 bool isMemOp = isMemoryOp(MBBI);
1185 int Opcode = MBBI->getOpcode();
1186 unsigned Size = getLSMultipleTransferSize(MBBI);
1187 const MachineOperand &MO = MBBI->getOperand(0);
1188 unsigned Reg = MO.getReg();
1189 bool isKill = MO.isDef() ? false : MO.isKill();
1190 unsigned Base = MBBI->getOperand(1).getReg();
1191 unsigned PredReg = 0;
1192 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1193 int Offset = getMemoryOpOffset(MBBI);
1196 // r5 := ldr [r5, #4]
1197 // r6 := ldr [r5, #8]
1199 // The second ldr has effectively broken the chain even though it
1200 // looks like the later ldr(s) use the same base register. Try to
1201 // merge the ldr's so far, including this one. But don't try to
1202 // combine the following ldr(s).
1203 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1204 if (CurrBase == 0 && !Clobber) {
1205 // Start of a new chain.
1210 CurrPredReg = PredReg;
1211 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1220 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1221 // No need to match PredReg.
1222 // Continue adding to the queue.
1223 if (Offset > MemOps.back().Offset) {
1224 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1229 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1231 if (Offset < I->Offset) {
1232 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1237 } else if (Offset == I->Offset) {
1238 // Collision! This can't be merged!
1247 if (MBBI->isDebugValue()) {
1250 // Reach the end of the block, try merging the memory instructions.
1252 } else if (Advance) {
1256 // Reach the end of the block, try merging the memory instructions.
1262 if (NumMemOps > 1) {
1263 // Try to find a free register to use as a new base in case it's needed.
1264 // First advance to the instruction just before the start of the chain.
1265 AdvanceRS(MBB, MemOps);
1266 // Find a scratch register.
1267 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1268 // Process the load / store instructions.
1269 RS->forward(prior(MBBI));
1273 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1274 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1276 // Try folding preceding/trailing base inc/dec into the generated
1278 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1279 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1281 NumMerges += Merges.size();
1283 // Try folding preceding/trailing base inc/dec into those load/store
1284 // that were not merged to form LDM/STM ops.
1285 for (unsigned i = 0; i != NumMemOps; ++i)
1286 if (!MemOps[i].Merged)
1287 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1290 // RS may be pointing to an instruction that's deleted.
1291 RS->skipTo(prior(MBBI));
1292 } else if (NumMemOps == 1) {
1293 // Try folding preceding/trailing base inc/dec into the single
1295 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1297 RS->forward(prior(MBBI));
1304 CurrPred = ARMCC::AL;
1311 // If iterator hasn't been advanced and this is not a memory op, skip it.
1312 // It can't start a new chain anyway.
1313 if (!Advance && !isMemOp && MBBI != E) {
1319 return NumMerges > 0;
1322 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1323 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1324 /// directly restore the value of LR into pc.
1325 /// ldmfd sp!, {..., lr}
1328 /// ldmfd sp!, {..., lr}
1331 /// ldmfd sp!, {..., pc}
1332 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1333 if (MBB.empty()) return false;
1335 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1336 if (MBBI != MBB.begin() &&
1337 (MBBI->getOpcode() == ARM::BX_RET ||
1338 MBBI->getOpcode() == ARM::tBX_RET ||
1339 MBBI->getOpcode() == ARM::MOVPCLR)) {
1340 MachineInstr *PrevMI = prior(MBBI);
1341 unsigned Opcode = PrevMI->getOpcode();
1342 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1343 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1344 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1345 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1346 if (MO.getReg() != ARM::LR)
1348 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1349 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1350 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1351 PrevMI->setDesc(TII->get(NewOpc));
1353 PrevMI->copyImplicitOps(&*MBBI);
1361 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1362 const TargetMachine &TM = Fn.getTarget();
1363 AFI = Fn.getInfo<ARMFunctionInfo>();
1364 TII = TM.getInstrInfo();
1365 TRI = TM.getRegisterInfo();
1366 STI = &TM.getSubtarget<ARMSubtarget>();
1367 RS = new RegScavenger();
1368 isThumb2 = AFI->isThumb2Function();
1370 bool Modified = false;
1371 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1373 MachineBasicBlock &MBB = *MFI;
1374 Modified |= LoadStoreMultipleOpti(MBB);
1375 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1376 Modified |= MergeReturnIntoLDM(MBB);
1384 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1385 /// load / stores from consecutive locations close to make it more
1386 /// likely they will be combined later.
1389 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1391 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1393 const TargetData *TD;
1394 const TargetInstrInfo *TII;
1395 const TargetRegisterInfo *TRI;
1396 const ARMSubtarget *STI;
1397 MachineRegisterInfo *MRI;
1398 MachineFunction *MF;
1400 virtual bool runOnMachineFunction(MachineFunction &Fn);
1402 virtual const char *getPassName() const {
1403 return "ARM pre- register allocation load / store optimization pass";
1407 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1408 unsigned &NewOpc, unsigned &EvenReg,
1409 unsigned &OddReg, unsigned &BaseReg,
1411 unsigned &PredReg, ARMCC::CondCodes &Pred,
1413 bool RescheduleOps(MachineBasicBlock *MBB,
1414 SmallVector<MachineInstr*, 4> &Ops,
1415 unsigned Base, bool isLd,
1416 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1417 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1419 char ARMPreAllocLoadStoreOpt::ID = 0;
1422 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1423 TD = Fn.getTarget().getTargetData();
1424 TII = Fn.getTarget().getInstrInfo();
1425 TRI = Fn.getTarget().getRegisterInfo();
1426 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1427 MRI = &Fn.getRegInfo();
1430 bool Modified = false;
1431 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1433 Modified |= RescheduleLoadStoreInstrs(MFI);
1438 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1439 MachineBasicBlock::iterator I,
1440 MachineBasicBlock::iterator E,
1441 SmallPtrSet<MachineInstr*, 4> &MemOps,
1442 SmallSet<unsigned, 4> &MemRegs,
1443 const TargetRegisterInfo *TRI) {
1444 // Are there stores / loads / calls between them?
1445 // FIXME: This is overly conservative. We should make use of alias information
1447 SmallSet<unsigned, 4> AddedRegPressure;
1449 if (I->isDebugValue() || MemOps.count(&*I))
1451 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1453 if (isLd && I->mayStore())
1458 // It's not safe to move the first 'str' down.
1461 // str r4, [r0, #+4]
1465 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1466 MachineOperand &MO = I->getOperand(j);
1469 unsigned Reg = MO.getReg();
1470 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1472 if (Reg != Base && !MemRegs.count(Reg))
1473 AddedRegPressure.insert(Reg);
1477 // Estimate register pressure increase due to the transformation.
1478 if (MemRegs.size() <= 4)
1479 // Ok if we are moving small number of instructions.
1481 return AddedRegPressure.size() <= MemRegs.size() * 2;
1485 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1486 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1487 MachineInstr *Op1) {
1488 assert(MI->memoperands_empty() && "expected a new machineinstr");
1489 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1490 + (Op1->memoperands_end() - Op1->memoperands_begin());
1492 MachineFunction *MF = MI->getParent()->getParent();
1493 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1494 MachineSDNode::mmo_iterator MemEnd =
1495 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1497 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1498 MI->setMemRefs(MemBegin, MemEnd);
1502 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1504 unsigned &NewOpc, unsigned &EvenReg,
1505 unsigned &OddReg, unsigned &BaseReg,
1506 int &Offset, unsigned &PredReg,
1507 ARMCC::CondCodes &Pred,
1509 // Make sure we're allowed to generate LDRD/STRD.
1510 if (!STI->hasV5TEOps())
1513 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1515 unsigned Opcode = Op0->getOpcode();
1516 if (Opcode == ARM::LDRi12)
1518 else if (Opcode == ARM::STRi12)
1520 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1521 NewOpc = ARM::t2LDRDi8;
1524 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1525 NewOpc = ARM::t2STRDi8;
1531 // Make sure the base address satisfies i64 ld / st alignment requirement.
1532 if (!Op0->hasOneMemOperand() ||
1533 !(*Op0->memoperands_begin())->getValue() ||
1534 (*Op0->memoperands_begin())->isVolatile())
1537 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1538 const Function *Func = MF->getFunction();
1539 unsigned ReqAlign = STI->hasV6Ops()
1540 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1541 : 8; // Pre-v6 need 8-byte align
1542 if (Align < ReqAlign)
1545 // Then make sure the immediate offset fits.
1546 int OffImm = getMemoryOpOffset(Op0);
1548 int Limit = (1 << 8) * Scale;
1549 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1553 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1555 AddSub = ARM_AM::sub;
1558 int Limit = (1 << 8) * Scale;
1559 if (OffImm >= Limit || (OffImm & (Scale-1)))
1561 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1563 EvenReg = Op0->getOperand(0).getReg();
1564 OddReg = Op1->getOperand(0).getReg();
1565 if (EvenReg == OddReg)
1567 BaseReg = Op0->getOperand(1).getReg();
1568 Pred = llvm::getInstrPredicate(Op0, PredReg);
1569 dl = Op0->getDebugLoc();
1574 struct OffsetCompare {
1575 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1576 int LOffset = getMemoryOpOffset(LHS);
1577 int ROffset = getMemoryOpOffset(RHS);
1578 assert(LHS == RHS || LOffset != ROffset);
1579 return LOffset > ROffset;
1584 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1585 SmallVector<MachineInstr*, 4> &Ops,
1586 unsigned Base, bool isLd,
1587 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1588 bool RetVal = false;
1590 // Sort by offset (in reverse order).
1591 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1593 // The loads / stores of the same base are in order. Scan them from first to
1594 // last and check for the following:
1595 // 1. Any def of base.
1597 while (Ops.size() > 1) {
1598 unsigned FirstLoc = ~0U;
1599 unsigned LastLoc = 0;
1600 MachineInstr *FirstOp = 0;
1601 MachineInstr *LastOp = 0;
1603 unsigned LastOpcode = 0;
1604 unsigned LastBytes = 0;
1605 unsigned NumMove = 0;
1606 for (int i = Ops.size() - 1; i >= 0; --i) {
1607 MachineInstr *Op = Ops[i];
1608 unsigned Loc = MI2LocMap[Op];
1609 if (Loc <= FirstLoc) {
1613 if (Loc >= LastLoc) {
1619 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1620 if (LastOpcode && LSMOpcode != LastOpcode)
1623 int Offset = getMemoryOpOffset(Op);
1624 unsigned Bytes = getLSMultipleTransferSize(Op);
1626 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1629 LastOffset = Offset;
1631 LastOpcode = LSMOpcode;
1632 if (++NumMove == 8) // FIXME: Tune this limit.
1639 SmallPtrSet<MachineInstr*, 4> MemOps;
1640 SmallSet<unsigned, 4> MemRegs;
1641 for (int i = NumMove-1; i >= 0; --i) {
1642 MemOps.insert(Ops[i]);
1643 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1646 // Be conservative, if the instructions are too far apart, don't
1647 // move them. We want to limit the increase of register pressure.
1648 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1650 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1651 MemOps, MemRegs, TRI);
1653 for (unsigned i = 0; i != NumMove; ++i)
1656 // This is the new location for the loads / stores.
1657 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1658 while (InsertPos != MBB->end()
1659 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1662 // If we are moving a pair of loads / stores, see if it makes sense
1663 // to try to allocate a pair of registers that can form register pairs.
1664 MachineInstr *Op0 = Ops.back();
1665 MachineInstr *Op1 = Ops[Ops.size()-2];
1666 unsigned EvenReg = 0, OddReg = 0;
1667 unsigned BaseReg = 0, PredReg = 0;
1668 ARMCC::CondCodes Pred = ARMCC::AL;
1670 unsigned NewOpc = 0;
1673 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1674 EvenReg, OddReg, BaseReg,
1675 Offset, PredReg, Pred, isT2)) {
1679 const MCInstrDesc &MCID = TII->get(NewOpc);
1680 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
1681 MRI->constrainRegClass(EvenReg, TRC);
1682 MRI->constrainRegClass(OddReg, TRC);
1684 // Form the pair instruction.
1686 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1687 .addReg(EvenReg, RegState::Define)
1688 .addReg(OddReg, RegState::Define)
1690 // FIXME: We're converting from LDRi12 to an insn that still
1691 // uses addrmode2, so we need an explicit offset reg. It should
1692 // always by reg0 since we're transforming LDRi12s.
1695 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1696 concatenateMemOperands(MIB, Op0, Op1);
1697 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1700 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1704 // FIXME: We're converting from LDRi12 to an insn that still
1705 // uses addrmode2, so we need an explicit offset reg. It should
1706 // always by reg0 since we're transforming STRi12s.
1709 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1710 concatenateMemOperands(MIB, Op0, Op1);
1711 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1717 // Add register allocation hints to form register pairs.
1718 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1719 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1721 for (unsigned i = 0; i != NumMove; ++i) {
1722 MachineInstr *Op = Ops.back();
1724 MBB->splice(InsertPos, MBB, Op);
1728 NumLdStMoved += NumMove;
1738 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1739 bool RetVal = false;
1741 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1742 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1743 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1744 SmallVector<unsigned, 4> LdBases;
1745 SmallVector<unsigned, 4> StBases;
1748 MachineBasicBlock::iterator MBBI = MBB->begin();
1749 MachineBasicBlock::iterator E = MBB->end();
1751 for (; MBBI != E; ++MBBI) {
1752 MachineInstr *MI = MBBI;
1753 if (MI->isCall() || MI->isTerminator()) {
1754 // Stop at barriers.
1759 if (!MI->isDebugValue())
1760 MI2LocMap[MI] = ++Loc;
1762 if (!isMemoryOp(MI))
1764 unsigned PredReg = 0;
1765 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1768 int Opc = MI->getOpcode();
1769 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1770 unsigned Base = MI->getOperand(1).getReg();
1771 int Offset = getMemoryOpOffset(MI);
1773 bool StopHere = false;
1775 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1776 Base2LdsMap.find(Base);
1777 if (BI != Base2LdsMap.end()) {
1778 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1779 if (Offset == getMemoryOpOffset(BI->second[i])) {
1785 BI->second.push_back(MI);
1787 SmallVector<MachineInstr*, 4> MIs;
1789 Base2LdsMap[Base] = MIs;
1790 LdBases.push_back(Base);
1793 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1794 Base2StsMap.find(Base);
1795 if (BI != Base2StsMap.end()) {
1796 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1797 if (Offset == getMemoryOpOffset(BI->second[i])) {
1803 BI->second.push_back(MI);
1805 SmallVector<MachineInstr*, 4> MIs;
1807 Base2StsMap[Base] = MIs;
1808 StBases.push_back(Base);
1813 // Found a duplicate (a base+offset combination that's seen earlier).
1820 // Re-schedule loads.
1821 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1822 unsigned Base = LdBases[i];
1823 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1825 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1828 // Re-schedule stores.
1829 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1830 unsigned Base = StBases[i];
1831 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1833 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1837 Base2LdsMap.clear();
1838 Base2StsMap.clear();
1848 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1849 /// optimization pass.
1850 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1852 return new ARMPreAllocLoadStoreOpt();
1853 return new ARMLoadStoreOpt();