1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMRegisterInfo.h"
20 #include "MCTargetDesc/ARMAddressingModes.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Function.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/RegisterScavenging.h"
29 #include "llvm/CodeGen/SelectionDAGNodes.h"
30 #include "llvm/Target/TargetData.h"
31 #include "llvm/Target/TargetInstrInfo.h"
32 #include "llvm/Target/TargetMachine.h"
33 #include "llvm/Target/TargetRegisterInfo.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "llvm/ADT/DenseMap.h"
38 #include "llvm/ADT/STLExtras.h"
39 #include "llvm/ADT/SmallPtrSet.h"
40 #include "llvm/ADT/SmallSet.h"
41 #include "llvm/ADT/SmallVector.h"
42 #include "llvm/ADT/Statistic.h"
45 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
46 STATISTIC(NumSTMGened , "Number of stm instructions generated");
47 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
48 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
49 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
50 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
51 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
52 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
53 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
54 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
55 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
57 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
58 /// load / store instructions to form ldm / stm instructions.
61 struct ARMLoadStoreOpt : public MachineFunctionPass {
63 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
65 const TargetInstrInfo *TII;
66 const TargetRegisterInfo *TRI;
67 const ARMSubtarget *STI;
72 virtual bool runOnMachineFunction(MachineFunction &Fn);
74 virtual const char *getPassName() const {
75 return "ARM load / store optimization pass";
79 struct MemOpQueueEntry {
84 MachineBasicBlock::iterator MBBI;
86 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
87 MachineBasicBlock::iterator i)
88 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
90 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
91 typedef MemOpQueue::iterator MemOpQueueIter;
93 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
94 int Offset, unsigned Base, bool BaseKill, int Opcode,
95 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
96 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
97 void MergeOpsUpdate(MachineBasicBlock &MBB,
101 unsigned insertAfter,
106 ARMCC::CondCodes Pred,
110 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
111 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
112 int Opcode, unsigned Size,
113 ARMCC::CondCodes Pred, unsigned PredReg,
114 unsigned Scratch, MemOpQueue &MemOps,
115 SmallVector<MachineBasicBlock::iterator, 4> &Merges);
117 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
118 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
119 MachineBasicBlock::iterator &MBBI);
120 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
121 MachineBasicBlock::iterator MBBI,
122 const TargetInstrInfo *TII,
124 MachineBasicBlock::iterator &I);
125 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
126 MachineBasicBlock::iterator MBBI,
128 MachineBasicBlock::iterator &I);
129 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
130 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
132 char ARMLoadStoreOpt::ID = 0;
135 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
137 default: llvm_unreachable("Unhandled opcode!");
141 default: llvm_unreachable("Unhandled submode!");
142 case ARM_AM::ia: return ARM::LDMIA;
143 case ARM_AM::da: return ARM::LDMDA;
144 case ARM_AM::db: return ARM::LDMDB;
145 case ARM_AM::ib: return ARM::LDMIB;
151 default: llvm_unreachable("Unhandled submode!");
152 case ARM_AM::ia: return ARM::STMIA;
153 case ARM_AM::da: return ARM::STMDA;
154 case ARM_AM::db: return ARM::STMDB;
155 case ARM_AM::ib: return ARM::STMIB;
162 default: llvm_unreachable("Unhandled submode!");
163 case ARM_AM::ia: return ARM::t2LDMIA;
164 case ARM_AM::db: return ARM::t2LDMDB;
171 default: llvm_unreachable("Unhandled submode!");
172 case ARM_AM::ia: return ARM::t2STMIA;
173 case ARM_AM::db: return ARM::t2STMDB;
179 default: llvm_unreachable("Unhandled submode!");
180 case ARM_AM::ia: return ARM::VLDMSIA;
181 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
187 default: llvm_unreachable("Unhandled submode!");
188 case ARM_AM::ia: return ARM::VSTMSIA;
189 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
195 default: llvm_unreachable("Unhandled submode!");
196 case ARM_AM::ia: return ARM::VLDMDIA;
197 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
203 default: llvm_unreachable("Unhandled submode!");
204 case ARM_AM::ia: return ARM::VSTMDIA;
205 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
216 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
218 default: llvm_unreachable("Unhandled opcode!");
224 case ARM::t2LDMIA_RET:
226 case ARM::t2LDMIA_UPD:
228 case ARM::t2STMIA_UPD:
230 case ARM::VLDMSIA_UPD:
232 case ARM::VSTMSIA_UPD:
234 case ARM::VLDMDIA_UPD:
236 case ARM::VSTMDIA_UPD:
250 case ARM::t2LDMDB_UPD:
252 case ARM::t2STMDB_UPD:
253 case ARM::VLDMSDB_UPD:
254 case ARM::VSTMSDB_UPD:
255 case ARM::VLDMDDB_UPD:
256 case ARM::VSTMDDB_UPD:
266 return ARM_AM::bad_am_submode;
269 } // end namespace ARM_AM
270 } // end namespace llvm
272 static bool isT2i32Load(unsigned Opc) {
273 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
276 static bool isi32Load(unsigned Opc) {
277 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
280 static bool isT2i32Store(unsigned Opc) {
281 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
284 static bool isi32Store(unsigned Opc) {
285 return Opc == ARM::STRi12 || isT2i32Store(Opc);
288 /// MergeOps - Create and insert a LDM or STM with Base as base register and
289 /// registers in Regs as the register operands that would be loaded / stored.
290 /// It returns true if the transformation is done.
292 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
293 MachineBasicBlock::iterator MBBI,
294 int Offset, unsigned Base, bool BaseKill,
295 int Opcode, ARMCC::CondCodes Pred,
296 unsigned PredReg, unsigned Scratch, DebugLoc dl,
297 SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
298 // Only a single register to load / store. Don't bother.
299 unsigned NumRegs = Regs.size();
303 ARM_AM::AMSubMode Mode = ARM_AM::ia;
304 // VFP and Thumb2 do not support IB or DA modes.
305 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
306 bool haveIBAndDA = isNotVFP && !isThumb2;
307 if (Offset == 4 && haveIBAndDA)
309 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
311 else if (Offset == -4 * (int)NumRegs && isNotVFP)
312 // VLDM/VSTM do not support DB mode without also updating the base reg.
314 else if (Offset != 0) {
315 // Check if this is a supported opcode before we insert instructions to
316 // calculate a new base register.
317 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
319 // If starting offset isn't zero, insert a MI to materialize a new base.
320 // But only do so if it is cost effective, i.e. merging more than two
326 if (isi32Load(Opcode))
327 // If it is a load, then just use one of the destination register to
328 // use as the new base.
329 NewBase = Regs[NumRegs-1].first;
331 // Use the scratch register to use as a new base.
336 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
338 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
341 int ImmedOffset = isThumb2
342 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
343 if (ImmedOffset == -1)
344 // FIXME: Try t2ADDri12 or t2SUBri12?
345 return false; // Probably not worth it then.
347 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
348 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
349 .addImm(Pred).addReg(PredReg).addReg(0);
351 BaseKill = true; // New base is always killed right its use.
354 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
355 Opcode == ARM::VLDRD);
356 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
357 if (!Opcode) return false;
358 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
359 .addReg(Base, getKillRegState(BaseKill))
360 .addImm(Pred).addReg(PredReg);
361 for (unsigned i = 0; i != NumRegs; ++i)
362 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
363 | getKillRegState(Regs[i].second));
368 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
370 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
372 unsigned memOpsBegin, unsigned memOpsEnd,
373 unsigned insertAfter, int Offset,
374 unsigned Base, bool BaseKill,
376 ARMCC::CondCodes Pred, unsigned PredReg,
379 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
380 // First calculate which of the registers should be killed by the merged
382 const unsigned insertPos = memOps[insertAfter].Position;
383 SmallSet<unsigned, 4> KilledRegs;
384 DenseMap<unsigned, unsigned> Killer;
385 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
386 if (i == memOpsBegin) {
391 if (memOps[i].Position < insertPos && memOps[i].isKill) {
392 unsigned Reg = memOps[i].Reg;
393 KilledRegs.insert(Reg);
398 SmallVector<std::pair<unsigned, bool>, 8> Regs;
399 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
400 unsigned Reg = memOps[i].Reg;
401 // If we are inserting the merged operation after an operation that
402 // uses the same register, make sure to transfer any kill flag.
403 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
404 Regs.push_back(std::make_pair(Reg, isKill));
407 // Try to do the merge.
408 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
410 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
411 Pred, PredReg, Scratch, dl, Regs))
414 // Merge succeeded, update records.
415 Merges.push_back(prior(Loc));
416 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
417 // Remove kill flags from any memops that come before insertPos.
418 if (Regs[i-memOpsBegin].second) {
419 unsigned Reg = Regs[i-memOpsBegin].first;
420 if (KilledRegs.count(Reg)) {
421 unsigned j = Killer[Reg];
422 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
423 assert(Idx >= 0 && "Cannot find killing operand");
424 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
425 memOps[j].isKill = false;
427 memOps[i].isKill = true;
429 MBB.erase(memOps[i].MBBI);
430 // Update this memop to refer to the merged instruction.
431 // We may need to move kill flags again.
432 memOps[i].Merged = true;
433 memOps[i].MBBI = Merges.back();
434 memOps[i].Position = insertPos;
438 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
439 /// load / store multiple instructions.
441 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
442 unsigned Base, int Opcode, unsigned Size,
443 ARMCC::CondCodes Pred, unsigned PredReg,
444 unsigned Scratch, MemOpQueue &MemOps,
445 SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
446 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
447 int Offset = MemOps[SIndex].Offset;
448 int SOffset = Offset;
449 unsigned insertAfter = SIndex;
450 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
451 DebugLoc dl = Loc->getDebugLoc();
452 const MachineOperand &PMO = Loc->getOperand(0);
453 unsigned PReg = PMO.getReg();
454 unsigned PRegNum = PMO.isUndef() ? UINT_MAX
455 : getARMRegisterNumbering(PReg);
457 unsigned Limit = ~0U;
459 // vldm / vstm limit are 32 for S variants, 16 for D variants.
477 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
478 int NewOffset = MemOps[i].Offset;
479 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
480 unsigned Reg = MO.getReg();
481 unsigned RegNum = MO.isUndef() ? UINT_MAX
482 : getARMRegisterNumbering(Reg);
483 // Register numbers must be in ascending order. For VFP / NEON load and
484 // store multiples, the registers must also be consecutive and within the
485 // limit on the number of registers per instruction.
486 if (Reg != ARM::SP &&
487 NewOffset == Offset + (int)Size &&
488 ((isNotVFP && RegNum > PRegNum) ||
489 ((Count < Limit) && RegNum == PRegNum+1))) {
494 // Can't merge this in. Try merge the earlier ones first.
495 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
496 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
497 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
502 if (MemOps[i].Position > MemOps[insertAfter].Position)
506 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
507 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
508 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
512 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
513 unsigned Bytes, unsigned Limit,
514 ARMCC::CondCodes Pred, unsigned PredReg){
515 unsigned MyPredReg = 0;
518 if (MI->getOpcode() != ARM::t2SUBri &&
519 MI->getOpcode() != ARM::tSUBspi &&
520 MI->getOpcode() != ARM::SUBri)
523 // Make sure the offset fits in 8 bits.
524 if (Bytes == 0 || (Limit && Bytes >= Limit))
527 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
528 return (MI->getOperand(0).getReg() == Base &&
529 MI->getOperand(1).getReg() == Base &&
530 (MI->getOperand(2).getImm()*Scale) == Bytes &&
531 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
532 MyPredReg == PredReg);
535 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
536 unsigned Bytes, unsigned Limit,
537 ARMCC::CondCodes Pred, unsigned PredReg){
538 unsigned MyPredReg = 0;
541 if (MI->getOpcode() != ARM::t2ADDri &&
542 MI->getOpcode() != ARM::tADDspi &&
543 MI->getOpcode() != ARM::ADDri)
546 if (Bytes == 0 || (Limit && Bytes >= Limit))
547 // Make sure the offset fits in 8 bits.
550 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
551 return (MI->getOperand(0).getReg() == Base &&
552 MI->getOperand(1).getReg() == Base &&
553 (MI->getOperand(2).getImm()*Scale) == Bytes &&
554 llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
555 MyPredReg == PredReg);
558 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
559 switch (MI->getOpcode()) {
587 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
590 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
594 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
595 ARM_AM::AMSubMode Mode) {
597 default: llvm_unreachable("Unhandled opcode!");
603 default: llvm_unreachable("Unhandled submode!");
604 case ARM_AM::ia: return ARM::LDMIA_UPD;
605 case ARM_AM::ib: return ARM::LDMIB_UPD;
606 case ARM_AM::da: return ARM::LDMDA_UPD;
607 case ARM_AM::db: return ARM::LDMDB_UPD;
615 default: llvm_unreachable("Unhandled submode!");
616 case ARM_AM::ia: return ARM::STMIA_UPD;
617 case ARM_AM::ib: return ARM::STMIB_UPD;
618 case ARM_AM::da: return ARM::STMDA_UPD;
619 case ARM_AM::db: return ARM::STMDB_UPD;
625 default: llvm_unreachable("Unhandled submode!");
626 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
627 case ARM_AM::db: return ARM::t2LDMDB_UPD;
633 default: llvm_unreachable("Unhandled submode!");
634 case ARM_AM::ia: return ARM::t2STMIA_UPD;
635 case ARM_AM::db: return ARM::t2STMDB_UPD;
640 default: llvm_unreachable("Unhandled submode!");
641 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
642 case ARM_AM::db: return ARM::VLDMSDB_UPD;
647 default: llvm_unreachable("Unhandled submode!");
648 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
649 case ARM_AM::db: return ARM::VLDMDDB_UPD;
654 default: llvm_unreachable("Unhandled submode!");
655 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
656 case ARM_AM::db: return ARM::VSTMSDB_UPD;
661 default: llvm_unreachable("Unhandled submode!");
662 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
663 case ARM_AM::db: return ARM::VSTMDDB_UPD;
671 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
672 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
674 /// stmia rn, <ra, rb, rc>
675 /// rn := rn + 4 * 3;
677 /// stmia rn!, <ra, rb, rc>
679 /// rn := rn - 4 * 3;
680 /// ldmia rn, <ra, rb, rc>
682 /// ldmdb rn!, <ra, rb, rc>
683 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
684 MachineBasicBlock::iterator MBBI,
686 MachineBasicBlock::iterator &I) {
687 MachineInstr *MI = MBBI;
688 unsigned Base = MI->getOperand(0).getReg();
689 bool BaseKill = MI->getOperand(0).isKill();
690 unsigned Bytes = getLSMultipleTransferSize(MI);
691 unsigned PredReg = 0;
692 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
693 int Opcode = MI->getOpcode();
694 DebugLoc dl = MI->getDebugLoc();
696 // Can't use an updating ld/st if the base register is also a dest
697 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
698 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
699 if (MI->getOperand(i).getReg() == Base)
702 bool DoMerge = false;
703 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
705 // Try merging with the previous instruction.
706 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
707 if (MBBI != BeginMBBI) {
708 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
709 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
711 if (Mode == ARM_AM::ia &&
712 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
715 } else if (Mode == ARM_AM::ib &&
716 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
724 // Try merging with the next instruction.
725 MachineBasicBlock::iterator EndMBBI = MBB.end();
726 if (!DoMerge && MBBI != EndMBBI) {
727 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
728 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
730 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
731 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
733 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
734 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
749 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
750 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
751 .addReg(Base, getDefRegState(true)) // WB base register
752 .addReg(Base, getKillRegState(BaseKill))
753 .addImm(Pred).addReg(PredReg);
755 // Transfer the rest of operands.
756 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
757 MIB.addOperand(MI->getOperand(OpNum));
759 // Transfer memoperands.
760 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
766 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
767 ARM_AM::AddrOpc Mode) {
770 return ARM::LDR_PRE_IMM;
772 return ARM::STR_PRE_IMM;
774 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
776 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
778 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
780 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
783 return ARM::t2LDR_PRE;
786 return ARM::t2STR_PRE;
787 default: llvm_unreachable("Unhandled opcode!");
792 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
793 ARM_AM::AddrOpc Mode) {
796 return ARM::LDR_POST_IMM;
798 return ARM::STR_POST_IMM;
800 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
802 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
804 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
806 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
809 return ARM::t2LDR_POST;
812 return ARM::t2STR_POST;
813 default: llvm_unreachable("Unhandled opcode!");
818 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
819 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
820 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
821 MachineBasicBlock::iterator MBBI,
822 const TargetInstrInfo *TII,
824 MachineBasicBlock::iterator &I) {
825 MachineInstr *MI = MBBI;
826 unsigned Base = MI->getOperand(1).getReg();
827 bool BaseKill = MI->getOperand(1).isKill();
828 unsigned Bytes = getLSMultipleTransferSize(MI);
829 int Opcode = MI->getOpcode();
830 DebugLoc dl = MI->getDebugLoc();
831 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
832 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
833 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
834 if (isi32Load(Opcode) || isi32Store(Opcode))
835 if (MI->getOperand(2).getImm() != 0)
837 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
840 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
841 // Can't do the merge if the destination register is the same as the would-be
842 // writeback register.
843 if (isLd && MI->getOperand(0).getReg() == Base)
846 unsigned PredReg = 0;
847 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
848 bool DoMerge = false;
849 ARM_AM::AddrOpc AddSub = ARM_AM::add;
851 // AM2 - 12 bits, thumb2 - 8 bits.
852 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
854 // Try merging with the previous instruction.
855 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
856 if (MBBI != BeginMBBI) {
857 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
858 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
860 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
862 AddSub = ARM_AM::sub;
864 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
868 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
873 // Try merging with the next instruction.
874 MachineBasicBlock::iterator EndMBBI = MBB.end();
875 if (!DoMerge && MBBI != EndMBBI) {
876 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
877 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
880 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
882 AddSub = ARM_AM::sub;
883 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
887 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
900 // VLDM[SD}_UPD, VSTM[SD]_UPD
901 // (There are no base-updating versions of VLDR/VSTR instructions, but the
902 // updating load/store-multiple instructions can be used with only one
904 MachineOperand &MO = MI->getOperand(0);
905 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
906 .addReg(Base, getDefRegState(true)) // WB base register
907 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
908 .addImm(Pred).addReg(PredReg)
909 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
910 getKillRegState(MO.isKill())));
914 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
915 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
916 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
917 .addReg(Base, RegState::Define)
918 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
920 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
921 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
922 .addReg(Base, RegState::Define)
923 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
926 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
927 // t2LDR_PRE, t2LDR_POST
928 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
929 .addReg(Base, RegState::Define)
930 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
933 MachineOperand &MO = MI->getOperand(0);
934 // FIXME: post-indexed stores use am2offset_imm, which still encodes
935 // the vestigal zero-reg offset register. When that's fixed, this clause
936 // can be removed entirely.
937 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
938 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
940 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
941 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
942 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
944 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
945 // t2STR_PRE, t2STR_POST
946 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
947 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
948 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
956 /// isMemoryOp - Returns true if instruction is a memory operation that this
957 /// pass is capable of operating on.
958 static bool isMemoryOp(const MachineInstr *MI) {
959 // When no memory operands are present, conservatively assume unaligned,
960 // volatile, unfoldable.
961 if (!MI->hasOneMemOperand())
964 const MachineMemOperand *MMO = *MI->memoperands_begin();
966 // Don't touch volatile memory accesses - we may be changing their order.
967 if (MMO->isVolatile())
970 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
972 if (MMO->getAlignment() < 4)
975 // str <undef> could probably be eliminated entirely, but for now we just want
976 // to avoid making a mess of it.
977 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
978 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
979 MI->getOperand(0).isUndef())
982 // Likewise don't mess with references to undefined addresses.
983 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
984 MI->getOperand(1).isUndef())
987 int Opcode = MI->getOpcode();
992 return MI->getOperand(1).isReg();
995 return MI->getOperand(1).isReg();
1002 return MI->getOperand(1).isReg();
1007 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1008 /// op that is being merged.
1009 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1010 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1011 unsigned Position = MemOps[0].Position;
1012 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1013 if (MemOps[i].Position < Position) {
1014 Position = MemOps[i].Position;
1015 Loc = MemOps[i].MBBI;
1019 if (Loc != MBB.begin())
1020 RS->forward(prior(Loc));
1023 static int getMemoryOpOffset(const MachineInstr *MI) {
1024 int Opcode = MI->getOpcode();
1025 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1026 unsigned NumOperands = MI->getDesc().getNumOperands();
1027 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1029 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1030 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1031 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1032 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1035 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1036 : ARM_AM::getAM5Offset(OffField) * 4;
1038 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1041 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1047 static void InsertLDR_STR(MachineBasicBlock &MBB,
1048 MachineBasicBlock::iterator &MBBI,
1049 int Offset, bool isDef,
1050 DebugLoc dl, unsigned NewOpc,
1051 unsigned Reg, bool RegDeadKill, bool RegUndef,
1052 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1053 bool OffKill, bool OffUndef,
1054 ARMCC::CondCodes Pred, unsigned PredReg,
1055 const TargetInstrInfo *TII, bool isT2) {
1057 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1059 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1060 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1061 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1063 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1065 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1066 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1067 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1071 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1072 MachineBasicBlock::iterator &MBBI) {
1073 MachineInstr *MI = &*MBBI;
1074 unsigned Opcode = MI->getOpcode();
1075 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1076 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1077 const MachineOperand &BaseOp = MI->getOperand(2);
1078 unsigned BaseReg = BaseOp.getReg();
1079 unsigned EvenReg = MI->getOperand(0).getReg();
1080 unsigned OddReg = MI->getOperand(1).getReg();
1081 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1082 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1083 // ARM errata 602117: LDRD with base in list may result in incorrect base
1084 // register when interrupted or faulted.
1085 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1086 if (!Errata602117 &&
1087 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1090 MachineBasicBlock::iterator NewBBI = MBBI;
1091 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1092 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1093 bool EvenDeadKill = isLd ?
1094 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1095 bool EvenUndef = MI->getOperand(0).isUndef();
1096 bool OddDeadKill = isLd ?
1097 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1098 bool OddUndef = MI->getOperand(1).isUndef();
1099 bool BaseKill = BaseOp.isKill();
1100 bool BaseUndef = BaseOp.isUndef();
1101 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1102 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1103 int OffImm = getMemoryOpOffset(MI);
1104 unsigned PredReg = 0;
1105 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1107 if (OddRegNum > EvenRegNum && OffImm == 0) {
1108 // Ascending register numbers and no offset. It's safe to change it to a
1110 unsigned NewOpc = (isLd)
1111 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1112 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1114 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1115 .addReg(BaseReg, getKillRegState(BaseKill))
1116 .addImm(Pred).addReg(PredReg)
1117 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1118 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1121 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1122 .addReg(BaseReg, getKillRegState(BaseKill))
1123 .addImm(Pred).addReg(PredReg)
1125 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1127 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1130 NewBBI = llvm::prior(MBBI);
1132 // Split into two instructions.
1133 unsigned NewOpc = (isLd)
1134 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1135 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1136 DebugLoc dl = MBBI->getDebugLoc();
1137 // If this is a load and base register is killed, it may have been
1138 // re-defed by the load, make sure the first load does not clobber it.
1140 (BaseKill || OffKill) &&
1141 (TRI->regsOverlap(EvenReg, BaseReg))) {
1142 assert(!TRI->regsOverlap(OddReg, BaseReg));
1143 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1144 OddReg, OddDeadKill, false,
1145 BaseReg, false, BaseUndef, false, OffUndef,
1146 Pred, PredReg, TII, isT2);
1147 NewBBI = llvm::prior(MBBI);
1148 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1149 EvenReg, EvenDeadKill, false,
1150 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1151 Pred, PredReg, TII, isT2);
1153 if (OddReg == EvenReg && EvenDeadKill) {
1154 // If the two source operands are the same, the kill marker is
1155 // probably on the first one. e.g.
1156 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1157 EvenDeadKill = false;
1160 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1161 EvenReg, EvenDeadKill, EvenUndef,
1162 BaseReg, false, BaseUndef, false, OffUndef,
1163 Pred, PredReg, TII, isT2);
1164 NewBBI = llvm::prior(MBBI);
1165 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1166 OddReg, OddDeadKill, OddUndef,
1167 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1168 Pred, PredReg, TII, isT2);
1183 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1184 /// ops of the same base and incrementing offset into LDM / STM ops.
1185 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1186 unsigned NumMerges = 0;
1187 unsigned NumMemOps = 0;
1189 unsigned CurrBase = 0;
1191 unsigned CurrSize = 0;
1192 ARMCC::CondCodes CurrPred = ARMCC::AL;
1193 unsigned CurrPredReg = 0;
1194 unsigned Position = 0;
1195 SmallVector<MachineBasicBlock::iterator,4> Merges;
1197 RS->enterBasicBlock(&MBB);
1198 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1200 if (FixInvalidRegPairOp(MBB, MBBI))
1203 bool Advance = false;
1204 bool TryMerge = false;
1205 bool Clobber = false;
1207 bool isMemOp = isMemoryOp(MBBI);
1209 int Opcode = MBBI->getOpcode();
1210 unsigned Size = getLSMultipleTransferSize(MBBI);
1211 const MachineOperand &MO = MBBI->getOperand(0);
1212 unsigned Reg = MO.getReg();
1213 bool isKill = MO.isDef() ? false : MO.isKill();
1214 unsigned Base = MBBI->getOperand(1).getReg();
1215 unsigned PredReg = 0;
1216 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1217 int Offset = getMemoryOpOffset(MBBI);
1220 // r5 := ldr [r5, #4]
1221 // r6 := ldr [r5, #8]
1223 // The second ldr has effectively broken the chain even though it
1224 // looks like the later ldr(s) use the same base register. Try to
1225 // merge the ldr's so far, including this one. But don't try to
1226 // combine the following ldr(s).
1227 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1228 if (CurrBase == 0 && !Clobber) {
1229 // Start of a new chain.
1234 CurrPredReg = PredReg;
1235 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1244 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1245 // No need to match PredReg.
1246 // Continue adding to the queue.
1247 if (Offset > MemOps.back().Offset) {
1248 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1253 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1255 if (Offset < I->Offset) {
1256 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1261 } else if (Offset == I->Offset) {
1262 // Collision! This can't be merged!
1271 if (MBBI->isDebugValue()) {
1274 // Reach the end of the block, try merging the memory instructions.
1276 } else if (Advance) {
1280 // Reach the end of the block, try merging the memory instructions.
1286 if (NumMemOps > 1) {
1287 // Try to find a free register to use as a new base in case it's needed.
1288 // First advance to the instruction just before the start of the chain.
1289 AdvanceRS(MBB, MemOps);
1290 // Find a scratch register.
1291 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1292 // Process the load / store instructions.
1293 RS->forward(prior(MBBI));
1297 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1298 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1300 // Try folding preceding/trailing base inc/dec into the generated
1302 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1303 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1305 NumMerges += Merges.size();
1307 // Try folding preceding/trailing base inc/dec into those load/store
1308 // that were not merged to form LDM/STM ops.
1309 for (unsigned i = 0; i != NumMemOps; ++i)
1310 if (!MemOps[i].Merged)
1311 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1314 // RS may be pointing to an instruction that's deleted.
1315 RS->skipTo(prior(MBBI));
1316 } else if (NumMemOps == 1) {
1317 // Try folding preceding/trailing base inc/dec into the single
1319 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1321 RS->forward(prior(MBBI));
1328 CurrPred = ARMCC::AL;
1335 // If iterator hasn't been advanced and this is not a memory op, skip it.
1336 // It can't start a new chain anyway.
1337 if (!Advance && !isMemOp && MBBI != E) {
1343 return NumMerges > 0;
1346 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1347 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1348 /// directly restore the value of LR into pc.
1349 /// ldmfd sp!, {..., lr}
1352 /// ldmfd sp!, {..., lr}
1355 /// ldmfd sp!, {..., pc}
1356 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1357 if (MBB.empty()) return false;
1359 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1360 if (MBBI != MBB.begin() &&
1361 (MBBI->getOpcode() == ARM::BX_RET ||
1362 MBBI->getOpcode() == ARM::tBX_RET ||
1363 MBBI->getOpcode() == ARM::MOVPCLR)) {
1364 MachineInstr *PrevMI = prior(MBBI);
1365 unsigned Opcode = PrevMI->getOpcode();
1366 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1367 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1368 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1369 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1370 if (MO.getReg() != ARM::LR)
1372 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1373 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1374 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1375 PrevMI->setDesc(TII->get(NewOpc));
1377 PrevMI->copyImplicitOps(&*MBBI);
1385 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1386 const TargetMachine &TM = Fn.getTarget();
1387 AFI = Fn.getInfo<ARMFunctionInfo>();
1388 TII = TM.getInstrInfo();
1389 TRI = TM.getRegisterInfo();
1390 STI = &TM.getSubtarget<ARMSubtarget>();
1391 RS = new RegScavenger();
1392 isThumb2 = AFI->isThumb2Function();
1394 bool Modified = false;
1395 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1397 MachineBasicBlock &MBB = *MFI;
1398 Modified |= LoadStoreMultipleOpti(MBB);
1399 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1400 Modified |= MergeReturnIntoLDM(MBB);
1408 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1409 /// load / stores from consecutive locations close to make it more
1410 /// likely they will be combined later.
1413 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1415 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1417 const TargetData *TD;
1418 const TargetInstrInfo *TII;
1419 const TargetRegisterInfo *TRI;
1420 const ARMSubtarget *STI;
1421 MachineRegisterInfo *MRI;
1422 MachineFunction *MF;
1424 virtual bool runOnMachineFunction(MachineFunction &Fn);
1426 virtual const char *getPassName() const {
1427 return "ARM pre- register allocation load / store optimization pass";
1431 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1432 unsigned &NewOpc, unsigned &EvenReg,
1433 unsigned &OddReg, unsigned &BaseReg,
1435 unsigned &PredReg, ARMCC::CondCodes &Pred,
1437 bool RescheduleOps(MachineBasicBlock *MBB,
1438 SmallVector<MachineInstr*, 4> &Ops,
1439 unsigned Base, bool isLd,
1440 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1441 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1443 char ARMPreAllocLoadStoreOpt::ID = 0;
1446 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1447 TD = Fn.getTarget().getTargetData();
1448 TII = Fn.getTarget().getInstrInfo();
1449 TRI = Fn.getTarget().getRegisterInfo();
1450 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1451 MRI = &Fn.getRegInfo();
1454 bool Modified = false;
1455 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1457 Modified |= RescheduleLoadStoreInstrs(MFI);
1462 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1463 MachineBasicBlock::iterator I,
1464 MachineBasicBlock::iterator E,
1465 SmallPtrSet<MachineInstr*, 4> &MemOps,
1466 SmallSet<unsigned, 4> &MemRegs,
1467 const TargetRegisterInfo *TRI) {
1468 // Are there stores / loads / calls between them?
1469 // FIXME: This is overly conservative. We should make use of alias information
1471 SmallSet<unsigned, 4> AddedRegPressure;
1473 if (I->isDebugValue() || MemOps.count(&*I))
1475 const MCInstrDesc &MCID = I->getDesc();
1476 if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
1478 if (isLd && MCID.mayStore())
1483 // It's not safe to move the first 'str' down.
1486 // str r4, [r0, #+4]
1487 if (MCID.mayStore())
1490 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1491 MachineOperand &MO = I->getOperand(j);
1494 unsigned Reg = MO.getReg();
1495 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1497 if (Reg != Base && !MemRegs.count(Reg))
1498 AddedRegPressure.insert(Reg);
1502 // Estimate register pressure increase due to the transformation.
1503 if (MemRegs.size() <= 4)
1504 // Ok if we are moving small number of instructions.
1506 return AddedRegPressure.size() <= MemRegs.size() * 2;
1510 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1511 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1512 MachineInstr *Op1) {
1513 assert(MI->memoperands_empty() && "expected a new machineinstr");
1514 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1515 + (Op1->memoperands_end() - Op1->memoperands_begin());
1517 MachineFunction *MF = MI->getParent()->getParent();
1518 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1519 MachineSDNode::mmo_iterator MemEnd =
1520 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1522 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1523 MI->setMemRefs(MemBegin, MemEnd);
1527 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1529 unsigned &NewOpc, unsigned &EvenReg,
1530 unsigned &OddReg, unsigned &BaseReg,
1531 int &Offset, unsigned &PredReg,
1532 ARMCC::CondCodes &Pred,
1534 // Make sure we're allowed to generate LDRD/STRD.
1535 if (!STI->hasV5TEOps())
1538 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1540 unsigned Opcode = Op0->getOpcode();
1541 if (Opcode == ARM::LDRi12)
1543 else if (Opcode == ARM::STRi12)
1545 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1546 NewOpc = ARM::t2LDRDi8;
1549 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1550 NewOpc = ARM::t2STRDi8;
1556 // Make sure the base address satisfies i64 ld / st alignment requirement.
1557 if (!Op0->hasOneMemOperand() ||
1558 !(*Op0->memoperands_begin())->getValue() ||
1559 (*Op0->memoperands_begin())->isVolatile())
1562 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1563 const Function *Func = MF->getFunction();
1564 unsigned ReqAlign = STI->hasV6Ops()
1565 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1566 : 8; // Pre-v6 need 8-byte align
1567 if (Align < ReqAlign)
1570 // Then make sure the immediate offset fits.
1571 int OffImm = getMemoryOpOffset(Op0);
1573 int Limit = (1 << 8) * Scale;
1574 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1578 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1580 AddSub = ARM_AM::sub;
1583 int Limit = (1 << 8) * Scale;
1584 if (OffImm >= Limit || (OffImm & (Scale-1)))
1586 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1588 EvenReg = Op0->getOperand(0).getReg();
1589 OddReg = Op1->getOperand(0).getReg();
1590 if (EvenReg == OddReg)
1592 BaseReg = Op0->getOperand(1).getReg();
1593 Pred = llvm::getInstrPredicate(Op0, PredReg);
1594 dl = Op0->getDebugLoc();
1599 struct OffsetCompare {
1600 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1601 int LOffset = getMemoryOpOffset(LHS);
1602 int ROffset = getMemoryOpOffset(RHS);
1603 assert(LHS == RHS || LOffset != ROffset);
1604 return LOffset > ROffset;
1609 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1610 SmallVector<MachineInstr*, 4> &Ops,
1611 unsigned Base, bool isLd,
1612 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1613 bool RetVal = false;
1615 // Sort by offset (in reverse order).
1616 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1618 // The loads / stores of the same base are in order. Scan them from first to
1619 // last and check for the following:
1620 // 1. Any def of base.
1622 while (Ops.size() > 1) {
1623 unsigned FirstLoc = ~0U;
1624 unsigned LastLoc = 0;
1625 MachineInstr *FirstOp = 0;
1626 MachineInstr *LastOp = 0;
1628 unsigned LastOpcode = 0;
1629 unsigned LastBytes = 0;
1630 unsigned NumMove = 0;
1631 for (int i = Ops.size() - 1; i >= 0; --i) {
1632 MachineInstr *Op = Ops[i];
1633 unsigned Loc = MI2LocMap[Op];
1634 if (Loc <= FirstLoc) {
1638 if (Loc >= LastLoc) {
1643 unsigned Opcode = Op->getOpcode();
1644 if (LastOpcode && Opcode != LastOpcode)
1647 int Offset = getMemoryOpOffset(Op);
1648 unsigned Bytes = getLSMultipleTransferSize(Op);
1650 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1653 LastOffset = Offset;
1655 LastOpcode = Opcode;
1656 if (++NumMove == 8) // FIXME: Tune this limit.
1663 SmallPtrSet<MachineInstr*, 4> MemOps;
1664 SmallSet<unsigned, 4> MemRegs;
1665 for (int i = NumMove-1; i >= 0; --i) {
1666 MemOps.insert(Ops[i]);
1667 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1670 // Be conservative, if the instructions are too far apart, don't
1671 // move them. We want to limit the increase of register pressure.
1672 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1674 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1675 MemOps, MemRegs, TRI);
1677 for (unsigned i = 0; i != NumMove; ++i)
1680 // This is the new location for the loads / stores.
1681 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1682 while (InsertPos != MBB->end()
1683 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1686 // If we are moving a pair of loads / stores, see if it makes sense
1687 // to try to allocate a pair of registers that can form register pairs.
1688 MachineInstr *Op0 = Ops.back();
1689 MachineInstr *Op1 = Ops[Ops.size()-2];
1690 unsigned EvenReg = 0, OddReg = 0;
1691 unsigned BaseReg = 0, PredReg = 0;
1692 ARMCC::CondCodes Pred = ARMCC::AL;
1694 unsigned NewOpc = 0;
1697 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1698 EvenReg, OddReg, BaseReg,
1699 Offset, PredReg, Pred, isT2)) {
1703 const MCInstrDesc &MCID = TII->get(NewOpc);
1704 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
1705 MRI->constrainRegClass(EvenReg, TRC);
1706 MRI->constrainRegClass(OddReg, TRC);
1708 // Form the pair instruction.
1710 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1711 .addReg(EvenReg, RegState::Define)
1712 .addReg(OddReg, RegState::Define)
1714 // FIXME: We're converting from LDRi12 to an insn that still
1715 // uses addrmode2, so we need an explicit offset reg. It should
1716 // always by reg0 since we're transforming LDRi12s.
1719 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1720 concatenateMemOperands(MIB, Op0, Op1);
1721 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1724 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1728 // FIXME: We're converting from LDRi12 to an insn that still
1729 // uses addrmode2, so we need an explicit offset reg. It should
1730 // always by reg0 since we're transforming STRi12s.
1733 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1734 concatenateMemOperands(MIB, Op0, Op1);
1735 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1741 // Add register allocation hints to form register pairs.
1742 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1743 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1745 for (unsigned i = 0; i != NumMove; ++i) {
1746 MachineInstr *Op = Ops.back();
1748 MBB->splice(InsertPos, MBB, Op);
1752 NumLdStMoved += NumMove;
1762 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1763 bool RetVal = false;
1765 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1766 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1767 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1768 SmallVector<unsigned, 4> LdBases;
1769 SmallVector<unsigned, 4> StBases;
1772 MachineBasicBlock::iterator MBBI = MBB->begin();
1773 MachineBasicBlock::iterator E = MBB->end();
1775 for (; MBBI != E; ++MBBI) {
1776 MachineInstr *MI = MBBI;
1777 const MCInstrDesc &MCID = MI->getDesc();
1778 if (MCID.isCall() || MCID.isTerminator()) {
1779 // Stop at barriers.
1784 if (!MI->isDebugValue())
1785 MI2LocMap[MI] = ++Loc;
1787 if (!isMemoryOp(MI))
1789 unsigned PredReg = 0;
1790 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1793 int Opc = MI->getOpcode();
1794 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1795 unsigned Base = MI->getOperand(1).getReg();
1796 int Offset = getMemoryOpOffset(MI);
1798 bool StopHere = false;
1800 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1801 Base2LdsMap.find(Base);
1802 if (BI != Base2LdsMap.end()) {
1803 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1804 if (Offset == getMemoryOpOffset(BI->second[i])) {
1810 BI->second.push_back(MI);
1812 SmallVector<MachineInstr*, 4> MIs;
1814 Base2LdsMap[Base] = MIs;
1815 LdBases.push_back(Base);
1818 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1819 Base2StsMap.find(Base);
1820 if (BI != Base2StsMap.end()) {
1821 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1822 if (Offset == getMemoryOpOffset(BI->second[i])) {
1828 BI->second.push_back(MI);
1830 SmallVector<MachineInstr*, 4> MIs;
1832 Base2StsMap[Base] = MIs;
1833 StBases.push_back(Base);
1838 // Found a duplicate (a base+offset combination that's seen earlier).
1845 // Re-schedule loads.
1846 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1847 unsigned Base = LdBases[i];
1848 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1850 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1853 // Re-schedule stores.
1854 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1855 unsigned Base = StBases[i];
1856 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1858 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1862 Base2LdsMap.clear();
1863 Base2StsMap.clear();
1873 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1874 /// optimization pass.
1875 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1877 return new ARMPreAllocLoadStoreOpt();
1878 return new ARMLoadStoreOpt();