1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "arm-ldst-opt"
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMBaseRegisterInfo.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "MCTargetDesc/ARMAddressingModes.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/CodeGen/MachineBasicBlock.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstr.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/RegisterScavenging.h"
33 #include "llvm/CodeGen/SelectionDAGNodes.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Target/TargetInstrInfo.h"
41 #include "llvm/Target/TargetMachine.h"
42 #include "llvm/Target/TargetRegisterInfo.h"
45 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
46 STATISTIC(NumSTMGened , "Number of stm instructions generated");
47 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
48 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
49 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
50 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
51 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
52 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
53 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
54 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
55 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
57 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
58 /// load / store instructions to form ldm / stm instructions.
61 struct ARMLoadStoreOpt : public MachineFunctionPass {
63 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
65 const TargetInstrInfo *TII;
66 const TargetRegisterInfo *TRI;
67 const ARMSubtarget *STI;
72 virtual bool runOnMachineFunction(MachineFunction &Fn);
74 virtual const char *getPassName() const {
75 return "ARM load / store optimization pass";
79 struct MemOpQueueEntry {
84 MachineBasicBlock::iterator MBBI;
86 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
87 MachineBasicBlock::iterator i)
88 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
90 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
91 typedef MemOpQueue::iterator MemOpQueueIter;
93 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
94 int Offset, unsigned Base, bool BaseKill, int Opcode,
95 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
97 ArrayRef<std::pair<unsigned, bool> > Regs,
98 ArrayRef<unsigned> ImpDefs);
99 void MergeOpsUpdate(MachineBasicBlock &MBB,
101 unsigned memOpsBegin,
103 unsigned insertAfter,
108 ARMCC::CondCodes Pred,
112 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
113 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
114 int Opcode, unsigned Size,
115 ARMCC::CondCodes Pred, unsigned PredReg,
116 unsigned Scratch, MemOpQueue &MemOps,
117 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
119 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
120 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
121 MachineBasicBlock::iterator &MBBI);
122 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
123 MachineBasicBlock::iterator MBBI,
124 const TargetInstrInfo *TII,
126 MachineBasicBlock::iterator &I);
127 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
128 MachineBasicBlock::iterator MBBI,
130 MachineBasicBlock::iterator &I);
131 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
132 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
134 char ARMLoadStoreOpt::ID = 0;
137 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
139 default: llvm_unreachable("Unhandled opcode!");
143 default: llvm_unreachable("Unhandled submode!");
144 case ARM_AM::ia: return ARM::LDMIA;
145 case ARM_AM::da: return ARM::LDMDA;
146 case ARM_AM::db: return ARM::LDMDB;
147 case ARM_AM::ib: return ARM::LDMIB;
152 default: llvm_unreachable("Unhandled submode!");
153 case ARM_AM::ia: return ARM::STMIA;
154 case ARM_AM::da: return ARM::STMDA;
155 case ARM_AM::db: return ARM::STMDB;
156 case ARM_AM::ib: return ARM::STMIB;
162 default: llvm_unreachable("Unhandled submode!");
163 case ARM_AM::ia: return ARM::t2LDMIA;
164 case ARM_AM::db: return ARM::t2LDMDB;
170 default: llvm_unreachable("Unhandled submode!");
171 case ARM_AM::ia: return ARM::t2STMIA;
172 case ARM_AM::db: return ARM::t2STMDB;
177 default: llvm_unreachable("Unhandled submode!");
178 case ARM_AM::ia: return ARM::VLDMSIA;
179 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
184 default: llvm_unreachable("Unhandled submode!");
185 case ARM_AM::ia: return ARM::VSTMSIA;
186 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
191 default: llvm_unreachable("Unhandled submode!");
192 case ARM_AM::ia: return ARM::VLDMDIA;
193 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
198 default: llvm_unreachable("Unhandled submode!");
199 case ARM_AM::ia: return ARM::VSTMDIA;
200 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
208 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
210 default: llvm_unreachable("Unhandled opcode!");
216 case ARM::t2LDMIA_RET:
218 case ARM::t2LDMIA_UPD:
220 case ARM::t2STMIA_UPD:
222 case ARM::VLDMSIA_UPD:
224 case ARM::VSTMSIA_UPD:
226 case ARM::VLDMDIA_UPD:
228 case ARM::VSTMDIA_UPD:
242 case ARM::t2LDMDB_UPD:
244 case ARM::t2STMDB_UPD:
245 case ARM::VLDMSDB_UPD:
246 case ARM::VSTMSDB_UPD:
247 case ARM::VLDMDDB_UPD:
248 case ARM::VSTMDDB_UPD:
259 } // end namespace ARM_AM
260 } // end namespace llvm
262 static bool isT2i32Load(unsigned Opc) {
263 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
266 static bool isi32Load(unsigned Opc) {
267 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
270 static bool isT2i32Store(unsigned Opc) {
271 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
274 static bool isi32Store(unsigned Opc) {
275 return Opc == ARM::STRi12 || isT2i32Store(Opc);
278 /// MergeOps - Create and insert a LDM or STM with Base as base register and
279 /// registers in Regs as the register operands that would be loaded / stored.
280 /// It returns true if the transformation is done.
282 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
283 MachineBasicBlock::iterator MBBI,
284 int Offset, unsigned Base, bool BaseKill,
285 int Opcode, ARMCC::CondCodes Pred,
286 unsigned PredReg, unsigned Scratch, DebugLoc dl,
287 ArrayRef<std::pair<unsigned, bool> > Regs,
288 ArrayRef<unsigned> ImpDefs) {
289 // Only a single register to load / store. Don't bother.
290 unsigned NumRegs = Regs.size();
294 ARM_AM::AMSubMode Mode = ARM_AM::ia;
295 // VFP and Thumb2 do not support IB or DA modes.
296 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
297 bool haveIBAndDA = isNotVFP && !isThumb2;
298 if (Offset == 4 && haveIBAndDA)
300 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
302 else if (Offset == -4 * (int)NumRegs && isNotVFP)
303 // VLDM/VSTM do not support DB mode without also updating the base reg.
305 else if (Offset != 0) {
306 // Check if this is a supported opcode before we insert instructions to
307 // calculate a new base register.
308 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
310 // If starting offset isn't zero, insert a MI to materialize a new base.
311 // But only do so if it is cost effective, i.e. merging more than two
317 if (isi32Load(Opcode))
318 // If it is a load, then just use one of the destination register to
319 // use as the new base.
320 NewBase = Regs[NumRegs-1].first;
322 // Use the scratch register to use as a new base.
327 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
329 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
332 int ImmedOffset = isThumb2
333 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
334 if (ImmedOffset == -1)
335 // FIXME: Try t2ADDri12 or t2SUBri12?
336 return false; // Probably not worth it then.
338 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
339 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
340 .addImm(Pred).addReg(PredReg).addReg(0);
342 BaseKill = true; // New base is always killed right its use.
345 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
346 Opcode == ARM::VLDRD);
347 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
348 if (!Opcode) return false;
349 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
350 .addReg(Base, getKillRegState(BaseKill))
351 .addImm(Pred).addReg(PredReg);
352 for (unsigned i = 0; i != NumRegs; ++i)
353 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
354 | getKillRegState(Regs[i].second));
356 // Add implicit defs for super-registers.
357 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
358 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
363 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
365 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
367 unsigned memOpsBegin, unsigned memOpsEnd,
368 unsigned insertAfter, int Offset,
369 unsigned Base, bool BaseKill,
371 ARMCC::CondCodes Pred, unsigned PredReg,
374 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
375 // First calculate which of the registers should be killed by the merged
377 const unsigned insertPos = memOps[insertAfter].Position;
378 SmallSet<unsigned, 4> KilledRegs;
379 DenseMap<unsigned, unsigned> Killer;
380 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
381 if (i == memOpsBegin) {
386 if (memOps[i].Position < insertPos && memOps[i].isKill) {
387 unsigned Reg = memOps[i].Reg;
388 KilledRegs.insert(Reg);
393 SmallVector<std::pair<unsigned, bool>, 8> Regs;
394 SmallVector<unsigned, 8> ImpDefs;
395 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
396 unsigned Reg = memOps[i].Reg;
397 // If we are inserting the merged operation after an operation that
398 // uses the same register, make sure to transfer any kill flag.
399 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
400 Regs.push_back(std::make_pair(Reg, isKill));
402 // Collect any implicit defs of super-registers. They must be preserved.
403 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
404 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
406 unsigned DefReg = MO->getReg();
407 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
408 ImpDefs.push_back(DefReg);
412 // Try to do the merge.
413 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
415 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
416 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
419 // Merge succeeded, update records.
420 Merges.push_back(prior(Loc));
421 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
422 // Remove kill flags from any memops that come before insertPos.
423 if (Regs[i-memOpsBegin].second) {
424 unsigned Reg = Regs[i-memOpsBegin].first;
425 if (KilledRegs.count(Reg)) {
426 unsigned j = Killer[Reg];
427 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
428 assert(Idx >= 0 && "Cannot find killing operand");
429 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
430 memOps[j].isKill = false;
432 memOps[i].isKill = true;
434 MBB.erase(memOps[i].MBBI);
435 // Update this memop to refer to the merged instruction.
436 // We may need to move kill flags again.
437 memOps[i].Merged = true;
438 memOps[i].MBBI = Merges.back();
439 memOps[i].Position = insertPos;
443 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
444 /// load / store multiple instructions.
446 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
447 unsigned Base, int Opcode, unsigned Size,
448 ARMCC::CondCodes Pred, unsigned PredReg,
449 unsigned Scratch, MemOpQueue &MemOps,
450 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
451 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
452 int Offset = MemOps[SIndex].Offset;
453 int SOffset = Offset;
454 unsigned insertAfter = SIndex;
455 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
456 DebugLoc dl = Loc->getDebugLoc();
457 const MachineOperand &PMO = Loc->getOperand(0);
458 unsigned PReg = PMO.getReg();
459 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
461 unsigned Limit = ~0U;
463 // vldm / vstm limit are 32 for S variants, 16 for D variants.
481 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
482 int NewOffset = MemOps[i].Offset;
483 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
484 unsigned Reg = MO.getReg();
485 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
486 // Register numbers must be in ascending order. For VFP / NEON load and
487 // store multiples, the registers must also be consecutive and within the
488 // limit on the number of registers per instruction.
489 if (Reg != ARM::SP &&
490 NewOffset == Offset + (int)Size &&
491 ((isNotVFP && RegNum > PRegNum) ||
492 ((Count < Limit) && RegNum == PRegNum+1))) {
497 // Can't merge this in. Try merge the earlier ones first.
498 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
499 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
500 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
505 if (MemOps[i].Position > MemOps[insertAfter].Position)
509 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
510 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
511 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
515 static bool definesCPSR(MachineInstr *MI) {
516 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
517 const MachineOperand &MO = MI->getOperand(i);
520 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
521 // If the instruction has live CPSR def, then it's not safe to fold it
522 // into load / store.
529 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
530 unsigned Bytes, unsigned Limit,
531 ARMCC::CondCodes Pred, unsigned PredReg) {
532 unsigned MyPredReg = 0;
536 bool CheckCPSRDef = false;
537 switch (MI->getOpcode()) {
538 default: return false;
547 // Make sure the offset fits in 8 bits.
548 if (Bytes == 0 || (Limit && Bytes >= Limit))
551 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
552 if (!(MI->getOperand(0).getReg() == Base &&
553 MI->getOperand(1).getReg() == Base &&
554 (MI->getOperand(2).getImm()*Scale) == Bytes &&
555 getInstrPredicate(MI, MyPredReg) == Pred &&
556 MyPredReg == PredReg))
559 return CheckCPSRDef ? !definesCPSR(MI) : true;
562 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
563 unsigned Bytes, unsigned Limit,
564 ARMCC::CondCodes Pred, unsigned PredReg) {
565 unsigned MyPredReg = 0;
569 bool CheckCPSRDef = false;
570 switch (MI->getOpcode()) {
571 default: return false;
580 if (Bytes == 0 || (Limit && Bytes >= Limit))
581 // Make sure the offset fits in 8 bits.
584 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
585 if (!(MI->getOperand(0).getReg() == Base &&
586 MI->getOperand(1).getReg() == Base &&
587 (MI->getOperand(2).getImm()*Scale) == Bytes &&
588 getInstrPredicate(MI, MyPredReg) == Pred &&
589 MyPredReg == PredReg))
592 return CheckCPSRDef ? !definesCPSR(MI) : true;
595 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
596 switch (MI->getOpcode()) {
624 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
627 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
631 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
632 ARM_AM::AMSubMode Mode) {
634 default: llvm_unreachable("Unhandled opcode!");
640 default: llvm_unreachable("Unhandled submode!");
641 case ARM_AM::ia: return ARM::LDMIA_UPD;
642 case ARM_AM::ib: return ARM::LDMIB_UPD;
643 case ARM_AM::da: return ARM::LDMDA_UPD;
644 case ARM_AM::db: return ARM::LDMDB_UPD;
651 default: llvm_unreachable("Unhandled submode!");
652 case ARM_AM::ia: return ARM::STMIA_UPD;
653 case ARM_AM::ib: return ARM::STMIB_UPD;
654 case ARM_AM::da: return ARM::STMDA_UPD;
655 case ARM_AM::db: return ARM::STMDB_UPD;
660 default: llvm_unreachable("Unhandled submode!");
661 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
662 case ARM_AM::db: return ARM::t2LDMDB_UPD;
667 default: llvm_unreachable("Unhandled submode!");
668 case ARM_AM::ia: return ARM::t2STMIA_UPD;
669 case ARM_AM::db: return ARM::t2STMDB_UPD;
673 default: llvm_unreachable("Unhandled submode!");
674 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
675 case ARM_AM::db: return ARM::VLDMSDB_UPD;
679 default: llvm_unreachable("Unhandled submode!");
680 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
681 case ARM_AM::db: return ARM::VLDMDDB_UPD;
685 default: llvm_unreachable("Unhandled submode!");
686 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
687 case ARM_AM::db: return ARM::VSTMSDB_UPD;
691 default: llvm_unreachable("Unhandled submode!");
692 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
693 case ARM_AM::db: return ARM::VSTMDDB_UPD;
698 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
699 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
701 /// stmia rn, <ra, rb, rc>
702 /// rn := rn + 4 * 3;
704 /// stmia rn!, <ra, rb, rc>
706 /// rn := rn - 4 * 3;
707 /// ldmia rn, <ra, rb, rc>
709 /// ldmdb rn!, <ra, rb, rc>
710 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
711 MachineBasicBlock::iterator MBBI,
713 MachineBasicBlock::iterator &I) {
714 MachineInstr *MI = MBBI;
715 unsigned Base = MI->getOperand(0).getReg();
716 bool BaseKill = MI->getOperand(0).isKill();
717 unsigned Bytes = getLSMultipleTransferSize(MI);
718 unsigned PredReg = 0;
719 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
720 int Opcode = MI->getOpcode();
721 DebugLoc dl = MI->getDebugLoc();
723 // Can't use an updating ld/st if the base register is also a dest
724 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
725 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
726 if (MI->getOperand(i).getReg() == Base)
729 bool DoMerge = false;
730 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
732 // Try merging with the previous instruction.
733 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
734 if (MBBI != BeginMBBI) {
735 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
736 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
738 if (Mode == ARM_AM::ia &&
739 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
742 } else if (Mode == ARM_AM::ib &&
743 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
751 // Try merging with the next instruction.
752 MachineBasicBlock::iterator EndMBBI = MBB.end();
753 if (!DoMerge && MBBI != EndMBBI) {
754 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
755 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
757 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
758 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
760 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
761 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
776 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
777 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
778 .addReg(Base, getDefRegState(true)) // WB base register
779 .addReg(Base, getKillRegState(BaseKill))
780 .addImm(Pred).addReg(PredReg);
782 // Transfer the rest of operands.
783 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
784 MIB.addOperand(MI->getOperand(OpNum));
786 // Transfer memoperands.
787 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
793 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
794 ARM_AM::AddrOpc Mode) {
797 return ARM::LDR_PRE_IMM;
799 return ARM::STR_PRE_IMM;
801 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
803 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
805 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
807 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
810 return ARM::t2LDR_PRE;
813 return ARM::t2STR_PRE;
814 default: llvm_unreachable("Unhandled opcode!");
818 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
819 ARM_AM::AddrOpc Mode) {
822 return ARM::LDR_POST_IMM;
824 return ARM::STR_POST_IMM;
826 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
828 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
830 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
832 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
835 return ARM::t2LDR_POST;
838 return ARM::t2STR_POST;
839 default: llvm_unreachable("Unhandled opcode!");
843 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
844 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
845 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
846 MachineBasicBlock::iterator MBBI,
847 const TargetInstrInfo *TII,
849 MachineBasicBlock::iterator &I) {
850 MachineInstr *MI = MBBI;
851 unsigned Base = MI->getOperand(1).getReg();
852 bool BaseKill = MI->getOperand(1).isKill();
853 unsigned Bytes = getLSMultipleTransferSize(MI);
854 int Opcode = MI->getOpcode();
855 DebugLoc dl = MI->getDebugLoc();
856 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
857 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
858 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
859 if (isi32Load(Opcode) || isi32Store(Opcode))
860 if (MI->getOperand(2).getImm() != 0)
862 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
865 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
866 // Can't do the merge if the destination register is the same as the would-be
867 // writeback register.
868 if (MI->getOperand(0).getReg() == Base)
871 unsigned PredReg = 0;
872 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
873 bool DoMerge = false;
874 ARM_AM::AddrOpc AddSub = ARM_AM::add;
876 // AM2 - 12 bits, thumb2 - 8 bits.
877 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
879 // Try merging with the previous instruction.
880 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
881 if (MBBI != BeginMBBI) {
882 MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
883 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
885 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
887 AddSub = ARM_AM::sub;
889 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
893 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
898 // Try merging with the next instruction.
899 MachineBasicBlock::iterator EndMBBI = MBB.end();
900 if (!DoMerge && MBBI != EndMBBI) {
901 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
902 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
905 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
907 AddSub = ARM_AM::sub;
908 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
912 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
925 // VLDM[SD}_UPD, VSTM[SD]_UPD
926 // (There are no base-updating versions of VLDR/VSTR instructions, but the
927 // updating load/store-multiple instructions can be used with only one
929 MachineOperand &MO = MI->getOperand(0);
930 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
931 .addReg(Base, getDefRegState(true)) // WB base register
932 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
933 .addImm(Pred).addReg(PredReg)
934 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
935 getKillRegState(MO.isKill())));
939 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
940 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
941 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
942 .addReg(Base, RegState::Define)
943 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
945 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
946 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
947 .addReg(Base, RegState::Define)
948 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
951 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
952 // t2LDR_PRE, t2LDR_POST
953 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
954 .addReg(Base, RegState::Define)
955 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
958 MachineOperand &MO = MI->getOperand(0);
959 // FIXME: post-indexed stores use am2offset_imm, which still encodes
960 // the vestigal zero-reg offset register. When that's fixed, this clause
961 // can be removed entirely.
962 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
963 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
965 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
966 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
967 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
969 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
970 // t2STR_PRE, t2STR_POST
971 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
972 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
973 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
981 /// isMemoryOp - Returns true if instruction is a memory operation that this
982 /// pass is capable of operating on.
983 static bool isMemoryOp(const MachineInstr *MI) {
984 // When no memory operands are present, conservatively assume unaligned,
985 // volatile, unfoldable.
986 if (!MI->hasOneMemOperand())
989 const MachineMemOperand *MMO = *MI->memoperands_begin();
991 // Don't touch volatile memory accesses - we may be changing their order.
992 if (MMO->isVolatile())
995 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
997 if (MMO->getAlignment() < 4)
1000 // str <undef> could probably be eliminated entirely, but for now we just want
1001 // to avoid making a mess of it.
1002 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1003 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1004 MI->getOperand(0).isUndef())
1007 // Likewise don't mess with references to undefined addresses.
1008 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1009 MI->getOperand(1).isUndef())
1012 int Opcode = MI->getOpcode();
1017 return MI->getOperand(1).isReg();
1020 return MI->getOperand(1).isReg();
1027 return MI->getOperand(1).isReg();
1032 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1033 /// op that is being merged.
1034 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1035 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1036 unsigned Position = MemOps[0].Position;
1037 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1038 if (MemOps[i].Position < Position) {
1039 Position = MemOps[i].Position;
1040 Loc = MemOps[i].MBBI;
1044 if (Loc != MBB.begin())
1045 RS->forward(prior(Loc));
1048 static int getMemoryOpOffset(const MachineInstr *MI) {
1049 int Opcode = MI->getOpcode();
1050 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1051 unsigned NumOperands = MI->getDesc().getNumOperands();
1052 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1054 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1055 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1056 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1057 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1060 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1061 : ARM_AM::getAM5Offset(OffField) * 4;
1063 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1066 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1072 static void InsertLDR_STR(MachineBasicBlock &MBB,
1073 MachineBasicBlock::iterator &MBBI,
1074 int Offset, bool isDef,
1075 DebugLoc dl, unsigned NewOpc,
1076 unsigned Reg, bool RegDeadKill, bool RegUndef,
1077 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1078 bool OffKill, bool OffUndef,
1079 ARMCC::CondCodes Pred, unsigned PredReg,
1080 const TargetInstrInfo *TII, bool isT2) {
1082 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1084 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1085 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1086 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1088 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1090 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1091 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1092 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1096 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1097 MachineBasicBlock::iterator &MBBI) {
1098 MachineInstr *MI = &*MBBI;
1099 unsigned Opcode = MI->getOpcode();
1100 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1101 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1102 const MachineOperand &BaseOp = MI->getOperand(2);
1103 unsigned BaseReg = BaseOp.getReg();
1104 unsigned EvenReg = MI->getOperand(0).getReg();
1105 unsigned OddReg = MI->getOperand(1).getReg();
1106 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1107 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1108 // ARM errata 602117: LDRD with base in list may result in incorrect base
1109 // register when interrupted or faulted.
1110 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1111 if (!Errata602117 &&
1112 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1115 MachineBasicBlock::iterator NewBBI = MBBI;
1116 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1117 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1118 bool EvenDeadKill = isLd ?
1119 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1120 bool EvenUndef = MI->getOperand(0).isUndef();
1121 bool OddDeadKill = isLd ?
1122 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1123 bool OddUndef = MI->getOperand(1).isUndef();
1124 bool BaseKill = BaseOp.isKill();
1125 bool BaseUndef = BaseOp.isUndef();
1126 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1127 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1128 int OffImm = getMemoryOpOffset(MI);
1129 unsigned PredReg = 0;
1130 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1132 if (OddRegNum > EvenRegNum && OffImm == 0) {
1133 // Ascending register numbers and no offset. It's safe to change it to a
1135 unsigned NewOpc = (isLd)
1136 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1137 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1139 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1140 .addReg(BaseReg, getKillRegState(BaseKill))
1141 .addImm(Pred).addReg(PredReg)
1142 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1143 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1146 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1147 .addReg(BaseReg, getKillRegState(BaseKill))
1148 .addImm(Pred).addReg(PredReg)
1150 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1152 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1155 NewBBI = llvm::prior(MBBI);
1157 // Split into two instructions.
1158 unsigned NewOpc = (isLd)
1159 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1160 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1161 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1162 // so adjust and use t2LDRi12 here for that.
1163 unsigned NewOpc2 = (isLd)
1164 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1165 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1166 DebugLoc dl = MBBI->getDebugLoc();
1167 // If this is a load and base register is killed, it may have been
1168 // re-defed by the load, make sure the first load does not clobber it.
1170 (BaseKill || OffKill) &&
1171 (TRI->regsOverlap(EvenReg, BaseReg))) {
1172 assert(!TRI->regsOverlap(OddReg, BaseReg));
1173 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1174 OddReg, OddDeadKill, false,
1175 BaseReg, false, BaseUndef, false, OffUndef,
1176 Pred, PredReg, TII, isT2);
1177 NewBBI = llvm::prior(MBBI);
1178 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1179 EvenReg, EvenDeadKill, false,
1180 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1181 Pred, PredReg, TII, isT2);
1183 if (OddReg == EvenReg && EvenDeadKill) {
1184 // If the two source operands are the same, the kill marker is
1185 // probably on the first one. e.g.
1186 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1187 EvenDeadKill = false;
1190 // Never kill the base register in the first instruction.
1191 if (EvenReg == BaseReg)
1192 EvenDeadKill = false;
1193 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1194 EvenReg, EvenDeadKill, EvenUndef,
1195 BaseReg, false, BaseUndef, false, OffUndef,
1196 Pred, PredReg, TII, isT2);
1197 NewBBI = llvm::prior(MBBI);
1198 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1199 OddReg, OddDeadKill, OddUndef,
1200 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1201 Pred, PredReg, TII, isT2);
1216 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1217 /// ops of the same base and incrementing offset into LDM / STM ops.
1218 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1219 unsigned NumMerges = 0;
1220 unsigned NumMemOps = 0;
1222 unsigned CurrBase = 0;
1224 unsigned CurrSize = 0;
1225 ARMCC::CondCodes CurrPred = ARMCC::AL;
1226 unsigned CurrPredReg = 0;
1227 unsigned Position = 0;
1228 SmallVector<MachineBasicBlock::iterator,4> Merges;
1230 RS->enterBasicBlock(&MBB);
1231 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1233 if (FixInvalidRegPairOp(MBB, MBBI))
1236 bool Advance = false;
1237 bool TryMerge = false;
1238 bool Clobber = false;
1240 bool isMemOp = isMemoryOp(MBBI);
1242 int Opcode = MBBI->getOpcode();
1243 unsigned Size = getLSMultipleTransferSize(MBBI);
1244 const MachineOperand &MO = MBBI->getOperand(0);
1245 unsigned Reg = MO.getReg();
1246 bool isKill = MO.isDef() ? false : MO.isKill();
1247 unsigned Base = MBBI->getOperand(1).getReg();
1248 unsigned PredReg = 0;
1249 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1250 int Offset = getMemoryOpOffset(MBBI);
1253 // r5 := ldr [r5, #4]
1254 // r6 := ldr [r5, #8]
1256 // The second ldr has effectively broken the chain even though it
1257 // looks like the later ldr(s) use the same base register. Try to
1258 // merge the ldr's so far, including this one. But don't try to
1259 // combine the following ldr(s).
1260 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1263 // r4 := ldr [r0, #8]
1264 // r4 := ldr [r0, #4]
1266 // The optimization may reorder the second ldr in front of the first
1267 // ldr, which violates write after write(WAW) dependence. The same as
1268 // str. Try to merge inst(s) already in MemOps.
1269 bool Overlap = false;
1270 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1271 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1277 if (CurrBase == 0 && !Clobber) {
1278 // Start of a new chain.
1283 CurrPredReg = PredReg;
1284 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1287 } else if (!Overlap) {
1293 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1294 // No need to match PredReg.
1295 // Continue adding to the queue.
1296 if (Offset > MemOps.back().Offset) {
1297 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1302 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1304 if (Offset < I->Offset) {
1305 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1310 } else if (Offset == I->Offset) {
1311 // Collision! This can't be merged!
1320 if (MBBI->isDebugValue()) {
1323 // Reach the end of the block, try merging the memory instructions.
1325 } else if (Advance) {
1329 // Reach the end of the block, try merging the memory instructions.
1335 if (NumMemOps > 1) {
1336 // Try to find a free register to use as a new base in case it's needed.
1337 // First advance to the instruction just before the start of the chain.
1338 AdvanceRS(MBB, MemOps);
1339 // Find a scratch register.
1340 unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
1341 // Process the load / store instructions.
1342 RS->forward(prior(MBBI));
1346 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1347 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1349 // Try folding preceding/trailing base inc/dec into the generated
1351 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1352 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1354 NumMerges += Merges.size();
1356 // Try folding preceding/trailing base inc/dec into those load/store
1357 // that were not merged to form LDM/STM ops.
1358 for (unsigned i = 0; i != NumMemOps; ++i)
1359 if (!MemOps[i].Merged)
1360 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1363 // RS may be pointing to an instruction that's deleted.
1364 RS->skipTo(prior(MBBI));
1365 } else if (NumMemOps == 1) {
1366 // Try folding preceding/trailing base inc/dec into the single
1368 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1370 RS->forward(prior(MBBI));
1377 CurrPred = ARMCC::AL;
1384 // If iterator hasn't been advanced and this is not a memory op, skip it.
1385 // It can't start a new chain anyway.
1386 if (!Advance && !isMemOp && MBBI != E) {
1392 return NumMerges > 0;
1395 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1396 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1397 /// directly restore the value of LR into pc.
1398 /// ldmfd sp!, {..., lr}
1401 /// ldmfd sp!, {..., lr}
1404 /// ldmfd sp!, {..., pc}
1405 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1406 if (MBB.empty()) return false;
1408 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1409 if (MBBI != MBB.begin() &&
1410 (MBBI->getOpcode() == ARM::BX_RET ||
1411 MBBI->getOpcode() == ARM::tBX_RET ||
1412 MBBI->getOpcode() == ARM::MOVPCLR)) {
1413 MachineInstr *PrevMI = prior(MBBI);
1414 unsigned Opcode = PrevMI->getOpcode();
1415 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1416 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1417 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1418 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1419 if (MO.getReg() != ARM::LR)
1421 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1422 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1423 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1424 PrevMI->setDesc(TII->get(NewOpc));
1426 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1434 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1435 const TargetMachine &TM = Fn.getTarget();
1436 AFI = Fn.getInfo<ARMFunctionInfo>();
1437 TII = TM.getInstrInfo();
1438 TRI = TM.getRegisterInfo();
1439 STI = &TM.getSubtarget<ARMSubtarget>();
1440 RS = new RegScavenger();
1441 isThumb2 = AFI->isThumb2Function();
1443 bool Modified = false;
1444 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1446 MachineBasicBlock &MBB = *MFI;
1447 Modified |= LoadStoreMultipleOpti(MBB);
1448 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1449 Modified |= MergeReturnIntoLDM(MBB);
1457 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1458 /// load / stores from consecutive locations close to make it more
1459 /// likely they will be combined later.
1462 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1464 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1466 const DataLayout *TD;
1467 const TargetInstrInfo *TII;
1468 const TargetRegisterInfo *TRI;
1469 const ARMSubtarget *STI;
1470 MachineRegisterInfo *MRI;
1471 MachineFunction *MF;
1473 virtual bool runOnMachineFunction(MachineFunction &Fn);
1475 virtual const char *getPassName() const {
1476 return "ARM pre- register allocation load / store optimization pass";
1480 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1481 unsigned &NewOpc, unsigned &EvenReg,
1482 unsigned &OddReg, unsigned &BaseReg,
1484 unsigned &PredReg, ARMCC::CondCodes &Pred,
1486 bool RescheduleOps(MachineBasicBlock *MBB,
1487 SmallVectorImpl<MachineInstr *> &Ops,
1488 unsigned Base, bool isLd,
1489 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1490 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1492 char ARMPreAllocLoadStoreOpt::ID = 0;
1495 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1496 TD = Fn.getTarget().getDataLayout();
1497 TII = Fn.getTarget().getInstrInfo();
1498 TRI = Fn.getTarget().getRegisterInfo();
1499 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1500 MRI = &Fn.getRegInfo();
1503 bool Modified = false;
1504 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1506 Modified |= RescheduleLoadStoreInstrs(MFI);
1511 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1512 MachineBasicBlock::iterator I,
1513 MachineBasicBlock::iterator E,
1514 SmallPtrSet<MachineInstr*, 4> &MemOps,
1515 SmallSet<unsigned, 4> &MemRegs,
1516 const TargetRegisterInfo *TRI) {
1517 // Are there stores / loads / calls between them?
1518 // FIXME: This is overly conservative. We should make use of alias information
1520 SmallSet<unsigned, 4> AddedRegPressure;
1522 if (I->isDebugValue() || MemOps.count(&*I))
1524 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1526 if (isLd && I->mayStore())
1531 // It's not safe to move the first 'str' down.
1534 // str r4, [r0, #+4]
1538 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1539 MachineOperand &MO = I->getOperand(j);
1542 unsigned Reg = MO.getReg();
1543 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1545 if (Reg != Base && !MemRegs.count(Reg))
1546 AddedRegPressure.insert(Reg);
1550 // Estimate register pressure increase due to the transformation.
1551 if (MemRegs.size() <= 4)
1552 // Ok if we are moving small number of instructions.
1554 return AddedRegPressure.size() <= MemRegs.size() * 2;
1558 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1559 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1560 MachineInstr *Op1) {
1561 assert(MI->memoperands_empty() && "expected a new machineinstr");
1562 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1563 + (Op1->memoperands_end() - Op1->memoperands_begin());
1565 MachineFunction *MF = MI->getParent()->getParent();
1566 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1567 MachineSDNode::mmo_iterator MemEnd =
1568 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1570 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1571 MI->setMemRefs(MemBegin, MemEnd);
1575 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1577 unsigned &NewOpc, unsigned &EvenReg,
1578 unsigned &OddReg, unsigned &BaseReg,
1579 int &Offset, unsigned &PredReg,
1580 ARMCC::CondCodes &Pred,
1582 // Make sure we're allowed to generate LDRD/STRD.
1583 if (!STI->hasV5TEOps())
1586 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1588 unsigned Opcode = Op0->getOpcode();
1589 if (Opcode == ARM::LDRi12)
1591 else if (Opcode == ARM::STRi12)
1593 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1594 NewOpc = ARM::t2LDRDi8;
1597 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1598 NewOpc = ARM::t2STRDi8;
1604 // Make sure the base address satisfies i64 ld / st alignment requirement.
1605 // At the moment, we ignore the memoryoperand's value.
1606 // If we want to use AliasAnalysis, we should check it accordingly.
1607 if (!Op0->hasOneMemOperand() ||
1608 (*Op0->memoperands_begin())->isVolatile())
1611 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1612 const Function *Func = MF->getFunction();
1613 unsigned ReqAlign = STI->hasV6Ops()
1614 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1615 : 8; // Pre-v6 need 8-byte align
1616 if (Align < ReqAlign)
1619 // Then make sure the immediate offset fits.
1620 int OffImm = getMemoryOpOffset(Op0);
1622 int Limit = (1 << 8) * Scale;
1623 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1627 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1629 AddSub = ARM_AM::sub;
1632 int Limit = (1 << 8) * Scale;
1633 if (OffImm >= Limit || (OffImm & (Scale-1)))
1635 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1637 EvenReg = Op0->getOperand(0).getReg();
1638 OddReg = Op1->getOperand(0).getReg();
1639 if (EvenReg == OddReg)
1641 BaseReg = Op0->getOperand(1).getReg();
1642 Pred = getInstrPredicate(Op0, PredReg);
1643 dl = Op0->getDebugLoc();
1648 struct OffsetCompare {
1649 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1650 int LOffset = getMemoryOpOffset(LHS);
1651 int ROffset = getMemoryOpOffset(RHS);
1652 assert(LHS == RHS || LOffset != ROffset);
1653 return LOffset > ROffset;
1658 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1659 SmallVectorImpl<MachineInstr *> &Ops,
1660 unsigned Base, bool isLd,
1661 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1662 bool RetVal = false;
1664 // Sort by offset (in reverse order).
1665 std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1667 // The loads / stores of the same base are in order. Scan them from first to
1668 // last and check for the following:
1669 // 1. Any def of base.
1671 while (Ops.size() > 1) {
1672 unsigned FirstLoc = ~0U;
1673 unsigned LastLoc = 0;
1674 MachineInstr *FirstOp = 0;
1675 MachineInstr *LastOp = 0;
1677 unsigned LastOpcode = 0;
1678 unsigned LastBytes = 0;
1679 unsigned NumMove = 0;
1680 for (int i = Ops.size() - 1; i >= 0; --i) {
1681 MachineInstr *Op = Ops[i];
1682 unsigned Loc = MI2LocMap[Op];
1683 if (Loc <= FirstLoc) {
1687 if (Loc >= LastLoc) {
1693 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1694 if (LastOpcode && LSMOpcode != LastOpcode)
1697 int Offset = getMemoryOpOffset(Op);
1698 unsigned Bytes = getLSMultipleTransferSize(Op);
1700 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1703 LastOffset = Offset;
1705 LastOpcode = LSMOpcode;
1706 if (++NumMove == 8) // FIXME: Tune this limit.
1713 SmallPtrSet<MachineInstr*, 4> MemOps;
1714 SmallSet<unsigned, 4> MemRegs;
1715 for (int i = NumMove-1; i >= 0; --i) {
1716 MemOps.insert(Ops[i]);
1717 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1720 // Be conservative, if the instructions are too far apart, don't
1721 // move them. We want to limit the increase of register pressure.
1722 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1724 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1725 MemOps, MemRegs, TRI);
1727 for (unsigned i = 0; i != NumMove; ++i)
1730 // This is the new location for the loads / stores.
1731 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1732 while (InsertPos != MBB->end()
1733 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1736 // If we are moving a pair of loads / stores, see if it makes sense
1737 // to try to allocate a pair of registers that can form register pairs.
1738 MachineInstr *Op0 = Ops.back();
1739 MachineInstr *Op1 = Ops[Ops.size()-2];
1740 unsigned EvenReg = 0, OddReg = 0;
1741 unsigned BaseReg = 0, PredReg = 0;
1742 ARMCC::CondCodes Pred = ARMCC::AL;
1744 unsigned NewOpc = 0;
1747 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1748 EvenReg, OddReg, BaseReg,
1749 Offset, PredReg, Pred, isT2)) {
1753 const MCInstrDesc &MCID = TII->get(NewOpc);
1754 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
1755 MRI->constrainRegClass(EvenReg, TRC);
1756 MRI->constrainRegClass(OddReg, TRC);
1758 // Form the pair instruction.
1760 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1761 .addReg(EvenReg, RegState::Define)
1762 .addReg(OddReg, RegState::Define)
1764 // FIXME: We're converting from LDRi12 to an insn that still
1765 // uses addrmode2, so we need an explicit offset reg. It should
1766 // always by reg0 since we're transforming LDRi12s.
1769 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1770 concatenateMemOperands(MIB, Op0, Op1);
1771 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1774 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1778 // FIXME: We're converting from LDRi12 to an insn that still
1779 // uses addrmode2, so we need an explicit offset reg. It should
1780 // always by reg0 since we're transforming STRi12s.
1783 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1784 concatenateMemOperands(MIB, Op0, Op1);
1785 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1791 // Add register allocation hints to form register pairs.
1792 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1793 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1795 for (unsigned i = 0; i != NumMove; ++i) {
1796 MachineInstr *Op = Ops.back();
1798 MBB->splice(InsertPos, MBB, Op);
1802 NumLdStMoved += NumMove;
1812 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1813 bool RetVal = false;
1815 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1816 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1817 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1818 SmallVector<unsigned, 4> LdBases;
1819 SmallVector<unsigned, 4> StBases;
1822 MachineBasicBlock::iterator MBBI = MBB->begin();
1823 MachineBasicBlock::iterator E = MBB->end();
1825 for (; MBBI != E; ++MBBI) {
1826 MachineInstr *MI = MBBI;
1827 if (MI->isCall() || MI->isTerminator()) {
1828 // Stop at barriers.
1833 if (!MI->isDebugValue())
1834 MI2LocMap[MI] = ++Loc;
1836 if (!isMemoryOp(MI))
1838 unsigned PredReg = 0;
1839 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
1842 int Opc = MI->getOpcode();
1843 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1844 unsigned Base = MI->getOperand(1).getReg();
1845 int Offset = getMemoryOpOffset(MI);
1847 bool StopHere = false;
1849 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1850 Base2LdsMap.find(Base);
1851 if (BI != Base2LdsMap.end()) {
1852 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1853 if (Offset == getMemoryOpOffset(BI->second[i])) {
1859 BI->second.push_back(MI);
1861 Base2LdsMap[Base].push_back(MI);
1862 LdBases.push_back(Base);
1865 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1866 Base2StsMap.find(Base);
1867 if (BI != Base2StsMap.end()) {
1868 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1869 if (Offset == getMemoryOpOffset(BI->second[i])) {
1875 BI->second.push_back(MI);
1877 Base2StsMap[Base].push_back(MI);
1878 StBases.push_back(Base);
1883 // Found a duplicate (a base+offset combination that's seen earlier).
1890 // Re-schedule loads.
1891 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1892 unsigned Base = LdBases[i];
1893 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
1895 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1898 // Re-schedule stores.
1899 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1900 unsigned Base = StBases[i];
1901 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
1903 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1907 Base2LdsMap.clear();
1908 Base2StsMap.clear();
1918 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1919 /// optimization pass.
1920 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1922 return new ARMPreAllocLoadStoreOpt();
1923 return new ARMLoadStoreOpt();