1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that performs load / store related peephole
11 // optimizations. This pass should be run after register allocation.
13 //===----------------------------------------------------------------------===//
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMMachineFunctionInfo.h"
19 #include "ARMSubtarget.h"
20 #include "MCTargetDesc/ARMAddressingModes.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/CodeGen/MachineBasicBlock.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstr.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/RegisterScavenging.h"
33 #include "llvm/CodeGen/SelectionDAGNodes.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Target/TargetInstrInfo.h"
40 #include "llvm/Target/TargetMachine.h"
41 #include "llvm/Target/TargetRegisterInfo.h"
44 #define DEBUG_TYPE "arm-ldst-opt"
46 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
47 STATISTIC(NumSTMGened , "Number of stm instructions generated");
48 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
49 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
50 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
51 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
52 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
53 STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
54 STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
55 STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
56 STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
58 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
59 /// load / store instructions to form ldm / stm instructions.
62 struct ARMLoadStoreOpt : public MachineFunctionPass {
64 ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
66 const TargetInstrInfo *TII;
67 const TargetRegisterInfo *TRI;
68 const ARMSubtarget *STI;
73 bool runOnMachineFunction(MachineFunction &Fn) override;
75 const char *getPassName() const override {
76 return "ARM load / store optimization pass";
80 struct MemOpQueueEntry {
85 MachineBasicBlock::iterator MBBI;
87 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
88 MachineBasicBlock::iterator i)
89 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
91 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
92 typedef MemOpQueue::iterator MemOpQueueIter;
94 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
95 const MemOpQueue &MemOps, unsigned DefReg,
96 unsigned RangeBegin, unsigned RangeEnd);
98 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
99 int Offset, unsigned Base, bool BaseKill, int Opcode,
100 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
102 ArrayRef<std::pair<unsigned, bool> > Regs,
103 ArrayRef<unsigned> ImpDefs);
104 void MergeOpsUpdate(MachineBasicBlock &MBB,
106 unsigned memOpsBegin,
108 unsigned insertAfter,
113 ARMCC::CondCodes Pred,
117 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
118 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
119 int Opcode, unsigned Size,
120 ARMCC::CondCodes Pred, unsigned PredReg,
121 unsigned Scratch, MemOpQueue &MemOps,
122 SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
124 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
125 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
126 MachineBasicBlock::iterator &MBBI);
127 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
128 MachineBasicBlock::iterator MBBI,
129 const TargetInstrInfo *TII,
131 MachineBasicBlock::iterator &I);
132 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
133 MachineBasicBlock::iterator MBBI,
135 MachineBasicBlock::iterator &I);
136 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
137 bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
139 char ARMLoadStoreOpt::ID = 0;
142 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
144 default: llvm_unreachable("Unhandled opcode!");
148 default: llvm_unreachable("Unhandled submode!");
149 case ARM_AM::ia: return ARM::LDMIA;
150 case ARM_AM::da: return ARM::LDMDA;
151 case ARM_AM::db: return ARM::LDMDB;
152 case ARM_AM::ib: return ARM::LDMIB;
157 default: llvm_unreachable("Unhandled submode!");
158 case ARM_AM::ia: return ARM::STMIA;
159 case ARM_AM::da: return ARM::STMDA;
160 case ARM_AM::db: return ARM::STMDB;
161 case ARM_AM::ib: return ARM::STMIB;
167 default: llvm_unreachable("Unhandled submode!");
168 case ARM_AM::ia: return ARM::t2LDMIA;
169 case ARM_AM::db: return ARM::t2LDMDB;
175 default: llvm_unreachable("Unhandled submode!");
176 case ARM_AM::ia: return ARM::t2STMIA;
177 case ARM_AM::db: return ARM::t2STMDB;
182 default: llvm_unreachable("Unhandled submode!");
183 case ARM_AM::ia: return ARM::VLDMSIA;
184 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
189 default: llvm_unreachable("Unhandled submode!");
190 case ARM_AM::ia: return ARM::VSTMSIA;
191 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
196 default: llvm_unreachable("Unhandled submode!");
197 case ARM_AM::ia: return ARM::VLDMDIA;
198 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
203 default: llvm_unreachable("Unhandled submode!");
204 case ARM_AM::ia: return ARM::VSTMDIA;
205 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
213 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
215 default: llvm_unreachable("Unhandled opcode!");
221 case ARM::t2LDMIA_RET:
223 case ARM::t2LDMIA_UPD:
225 case ARM::t2STMIA_UPD:
227 case ARM::VLDMSIA_UPD:
229 case ARM::VSTMSIA_UPD:
231 case ARM::VLDMDIA_UPD:
233 case ARM::VSTMDIA_UPD:
247 case ARM::t2LDMDB_UPD:
249 case ARM::t2STMDB_UPD:
250 case ARM::VLDMSDB_UPD:
251 case ARM::VSTMSDB_UPD:
252 case ARM::VLDMDDB_UPD:
253 case ARM::VSTMDDB_UPD:
264 } // end namespace ARM_AM
265 } // end namespace llvm
267 static bool isT2i32Load(unsigned Opc) {
268 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
271 static bool isi32Load(unsigned Opc) {
272 return Opc == ARM::LDRi12 || isT2i32Load(Opc);
275 static bool isT2i32Store(unsigned Opc) {
276 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
279 static bool isi32Store(unsigned Opc) {
280 return Opc == ARM::STRi12 || isT2i32Store(Opc);
283 /// MergeOps - Create and insert a LDM or STM with Base as base register and
284 /// registers in Regs as the register operands that would be loaded / stored.
285 /// It returns true if the transformation is done.
287 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
288 MachineBasicBlock::iterator MBBI,
289 int Offset, unsigned Base, bool BaseKill,
290 int Opcode, ARMCC::CondCodes Pred,
291 unsigned PredReg, unsigned Scratch, DebugLoc dl,
292 ArrayRef<std::pair<unsigned, bool> > Regs,
293 ArrayRef<unsigned> ImpDefs) {
294 // Only a single register to load / store. Don't bother.
295 unsigned NumRegs = Regs.size();
299 ARM_AM::AMSubMode Mode = ARM_AM::ia;
300 // VFP and Thumb2 do not support IB or DA modes.
301 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
302 bool haveIBAndDA = isNotVFP && !isThumb2;
303 if (Offset == 4 && haveIBAndDA)
305 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
307 else if (Offset == -4 * (int)NumRegs && isNotVFP)
308 // VLDM/VSTM do not support DB mode without also updating the base reg.
310 else if (Offset != 0) {
311 // Check if this is a supported opcode before we insert instructions to
312 // calculate a new base register.
313 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
315 // If starting offset isn't zero, insert a MI to materialize a new base.
316 // But only do so if it is cost effective, i.e. merging more than two
322 if (isi32Load(Opcode))
323 // If it is a load, then just use one of the destination register to
324 // use as the new base.
325 NewBase = Regs[NumRegs-1].first;
327 // Use the scratch register to use as a new base.
332 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
334 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
337 int ImmedOffset = isThumb2
338 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
339 if (ImmedOffset == -1)
340 // FIXME: Try t2ADDri12 or t2SUBri12?
341 return false; // Probably not worth it then.
343 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
344 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
345 .addImm(Pred).addReg(PredReg).addReg(0);
347 BaseKill = true; // New base is always killed right its use.
350 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
351 Opcode == ARM::VLDRD);
352 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
353 if (!Opcode) return false;
354 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
355 .addReg(Base, getKillRegState(BaseKill))
356 .addImm(Pred).addReg(PredReg);
357 for (unsigned i = 0; i != NumRegs; ++i)
358 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
359 | getKillRegState(Regs[i].second));
361 // Add implicit defs for super-registers.
362 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
363 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
368 /// \brief Find all instructions using a given imp-def within a range.
370 /// We are trying to combine a range of instructions, one of which (located at
371 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
372 /// be placed at RangeEnd, and so any uses of this definition between RangeStart
373 /// and RangeEnd must be modified to use an undefined value.
375 /// The live range continues until we find a second definition or one of the
376 /// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
377 /// we must consider all uses and decide which are relevant in a second pass.
378 void ARMLoadStoreOpt::findUsesOfImpDef(
379 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
380 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
381 std::map<unsigned, MachineOperand *> Uses;
382 unsigned LastLivePos = RangeEnd;
384 // First we find all uses of this register with Position between RangeBegin
385 // and RangeEnd, any or all of these could be uses of a definition at
386 // RangeBegin. We also record the latest position a definition at RangeBegin
387 // would be considered live.
388 for (unsigned i = 0; i < MemOps.size(); ++i) {
389 MachineInstr &MI = *MemOps[i].MBBI;
390 unsigned MIPosition = MemOps[i].Position;
391 if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
394 // If this instruction defines the register, then any later use will be of
395 // that definition rather than ours.
396 if (MI.definesRegister(DefReg))
397 LastLivePos = std::min(LastLivePos, MIPosition);
399 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
403 // If this instruction kills the register then (assuming liveness is
404 // correct when we start) we don't need to think about anything after here.
406 LastLivePos = std::min(LastLivePos, MIPosition);
408 Uses[MIPosition] = UseOp;
411 // Now we traverse the list of all uses, and append the ones that actually use
412 // our definition to the requested list.
413 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
416 // List is sorted by position so once we've found one out of range there
417 // will be no more to consider.
418 if (I->first > LastLivePos)
420 UsesOfImpDefs.push_back(I->second);
424 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
426 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
428 unsigned memOpsBegin, unsigned memOpsEnd,
429 unsigned insertAfter, int Offset,
430 unsigned Base, bool BaseKill,
432 ARMCC::CondCodes Pred, unsigned PredReg,
435 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
436 // First calculate which of the registers should be killed by the merged
438 const unsigned insertPos = memOps[insertAfter].Position;
439 SmallSet<unsigned, 4> KilledRegs;
440 DenseMap<unsigned, unsigned> Killer;
441 for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
442 if (i == memOpsBegin) {
447 if (memOps[i].Position < insertPos && memOps[i].isKill) {
448 unsigned Reg = memOps[i].Reg;
449 KilledRegs.insert(Reg);
454 SmallVector<std::pair<unsigned, bool>, 8> Regs;
455 SmallVector<unsigned, 8> ImpDefs;
456 SmallVector<MachineOperand *, 8> UsesOfImpDefs;
457 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
458 unsigned Reg = memOps[i].Reg;
459 // If we are inserting the merged operation after an operation that
460 // uses the same register, make sure to transfer any kill flag.
461 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
462 Regs.push_back(std::make_pair(Reg, isKill));
464 // Collect any implicit defs of super-registers. They must be preserved.
465 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
466 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
468 unsigned DefReg = MO->getReg();
469 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
470 ImpDefs.push_back(DefReg);
472 // There may be other uses of the definition between this instruction and
473 // the eventual LDM/STM position. These should be marked undef if the
474 // merge takes place.
475 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
480 // Try to do the merge.
481 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
483 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
484 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
487 // Merge succeeded, update records.
488 Merges.push_back(std::prev(Loc));
490 // In gathering loads together, we may have moved the imp-def of a register
491 // past one of its uses. This is OK, since we know better than the rest of
492 // LLVM what's OK with ARM loads and stores; but we still have to adjust the
494 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
495 E = UsesOfImpDefs.end();
499 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
500 // Remove kill flags from any memops that come before insertPos.
501 if (Regs[i-memOpsBegin].second) {
502 unsigned Reg = Regs[i-memOpsBegin].first;
503 if (KilledRegs.count(Reg)) {
504 unsigned j = Killer[Reg];
505 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
506 assert(Idx >= 0 && "Cannot find killing operand");
507 memOps[j].MBBI->getOperand(Idx).setIsKill(false);
508 memOps[j].isKill = false;
510 memOps[i].isKill = true;
512 MBB.erase(memOps[i].MBBI);
513 // Update this memop to refer to the merged instruction.
514 // We may need to move kill flags again.
515 memOps[i].Merged = true;
516 memOps[i].MBBI = Merges.back();
517 memOps[i].Position = insertPos;
521 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
522 /// load / store multiple instructions.
524 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
525 unsigned Base, int Opcode, unsigned Size,
526 ARMCC::CondCodes Pred, unsigned PredReg,
527 unsigned Scratch, MemOpQueue &MemOps,
528 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
529 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
530 int Offset = MemOps[SIndex].Offset;
531 int SOffset = Offset;
532 unsigned insertAfter = SIndex;
533 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
534 DebugLoc dl = Loc->getDebugLoc();
535 const MachineOperand &PMO = Loc->getOperand(0);
536 unsigned PReg = PMO.getReg();
537 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
539 unsigned Limit = ~0U;
541 // vldm / vstm limit are 32 for S variants, 16 for D variants.
559 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
560 int NewOffset = MemOps[i].Offset;
561 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
562 unsigned Reg = MO.getReg();
563 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
564 // Register numbers must be in ascending order. For VFP / NEON load and
565 // store multiples, the registers must also be consecutive and within the
566 // limit on the number of registers per instruction.
567 if (Reg != ARM::SP &&
568 NewOffset == Offset + (int)Size &&
569 ((isNotVFP && RegNum > PRegNum) ||
570 ((Count < Limit) && RegNum == PRegNum+1)) &&
571 // On Swift we don't want vldm/vstm to start with a odd register num
572 // because Q register unaligned vldm/vstm need more uops.
573 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
578 // Can't merge this in. Try merge the earlier ones first.
579 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
580 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
581 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
586 if (MemOps[i].Position > MemOps[insertAfter].Position)
590 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
591 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
592 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
596 static bool definesCPSR(MachineInstr *MI) {
597 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
598 const MachineOperand &MO = MI->getOperand(i);
601 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
602 // If the instruction has live CPSR def, then it's not safe to fold it
603 // into load / store.
610 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
611 unsigned Bytes, unsigned Limit,
612 ARMCC::CondCodes Pred, unsigned PredReg) {
613 unsigned MyPredReg = 0;
617 bool CheckCPSRDef = false;
618 switch (MI->getOpcode()) {
619 default: return false;
628 // Make sure the offset fits in 8 bits.
629 if (Bytes == 0 || (Limit && Bytes >= Limit))
632 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
633 if (!(MI->getOperand(0).getReg() == Base &&
634 MI->getOperand(1).getReg() == Base &&
635 (MI->getOperand(2).getImm()*Scale) == Bytes &&
636 getInstrPredicate(MI, MyPredReg) == Pred &&
637 MyPredReg == PredReg))
640 return CheckCPSRDef ? !definesCPSR(MI) : true;
643 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
644 unsigned Bytes, unsigned Limit,
645 ARMCC::CondCodes Pred, unsigned PredReg) {
646 unsigned MyPredReg = 0;
650 bool CheckCPSRDef = false;
651 switch (MI->getOpcode()) {
652 default: return false;
661 if (Bytes == 0 || (Limit && Bytes >= Limit))
662 // Make sure the offset fits in 8 bits.
665 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
666 if (!(MI->getOperand(0).getReg() == Base &&
667 MI->getOperand(1).getReg() == Base &&
668 (MI->getOperand(2).getImm()*Scale) == Bytes &&
669 getInstrPredicate(MI, MyPredReg) == Pred &&
670 MyPredReg == PredReg))
673 return CheckCPSRDef ? !definesCPSR(MI) : true;
676 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
677 switch (MI->getOpcode()) {
705 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
708 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
712 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
713 ARM_AM::AMSubMode Mode) {
715 default: llvm_unreachable("Unhandled opcode!");
721 default: llvm_unreachable("Unhandled submode!");
722 case ARM_AM::ia: return ARM::LDMIA_UPD;
723 case ARM_AM::ib: return ARM::LDMIB_UPD;
724 case ARM_AM::da: return ARM::LDMDA_UPD;
725 case ARM_AM::db: return ARM::LDMDB_UPD;
732 default: llvm_unreachable("Unhandled submode!");
733 case ARM_AM::ia: return ARM::STMIA_UPD;
734 case ARM_AM::ib: return ARM::STMIB_UPD;
735 case ARM_AM::da: return ARM::STMDA_UPD;
736 case ARM_AM::db: return ARM::STMDB_UPD;
741 default: llvm_unreachable("Unhandled submode!");
742 case ARM_AM::ia: return ARM::t2LDMIA_UPD;
743 case ARM_AM::db: return ARM::t2LDMDB_UPD;
748 default: llvm_unreachable("Unhandled submode!");
749 case ARM_AM::ia: return ARM::t2STMIA_UPD;
750 case ARM_AM::db: return ARM::t2STMDB_UPD;
754 default: llvm_unreachable("Unhandled submode!");
755 case ARM_AM::ia: return ARM::VLDMSIA_UPD;
756 case ARM_AM::db: return ARM::VLDMSDB_UPD;
760 default: llvm_unreachable("Unhandled submode!");
761 case ARM_AM::ia: return ARM::VLDMDIA_UPD;
762 case ARM_AM::db: return ARM::VLDMDDB_UPD;
766 default: llvm_unreachable("Unhandled submode!");
767 case ARM_AM::ia: return ARM::VSTMSIA_UPD;
768 case ARM_AM::db: return ARM::VSTMSDB_UPD;
772 default: llvm_unreachable("Unhandled submode!");
773 case ARM_AM::ia: return ARM::VSTMDIA_UPD;
774 case ARM_AM::db: return ARM::VSTMDDB_UPD;
779 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
780 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
782 /// stmia rn, <ra, rb, rc>
783 /// rn := rn + 4 * 3;
785 /// stmia rn!, <ra, rb, rc>
787 /// rn := rn - 4 * 3;
788 /// ldmia rn, <ra, rb, rc>
790 /// ldmdb rn!, <ra, rb, rc>
791 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
792 MachineBasicBlock::iterator MBBI,
794 MachineBasicBlock::iterator &I) {
795 MachineInstr *MI = MBBI;
796 unsigned Base = MI->getOperand(0).getReg();
797 bool BaseKill = MI->getOperand(0).isKill();
798 unsigned Bytes = getLSMultipleTransferSize(MI);
799 unsigned PredReg = 0;
800 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
801 int Opcode = MI->getOpcode();
802 DebugLoc dl = MI->getDebugLoc();
804 // Can't use an updating ld/st if the base register is also a dest
805 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
806 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
807 if (MI->getOperand(i).getReg() == Base)
810 bool DoMerge = false;
811 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
813 // Try merging with the previous instruction.
814 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
815 if (MBBI != BeginMBBI) {
816 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
817 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
819 if (Mode == ARM_AM::ia &&
820 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
823 } else if (Mode == ARM_AM::ib &&
824 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
832 // Try merging with the next instruction.
833 MachineBasicBlock::iterator EndMBBI = MBB.end();
834 if (!DoMerge && MBBI != EndMBBI) {
835 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
836 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
838 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
839 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
841 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
842 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
857 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
858 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
859 .addReg(Base, getDefRegState(true)) // WB base register
860 .addReg(Base, getKillRegState(BaseKill))
861 .addImm(Pred).addReg(PredReg);
863 // Transfer the rest of operands.
864 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
865 MIB.addOperand(MI->getOperand(OpNum));
867 // Transfer memoperands.
868 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
874 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
875 ARM_AM::AddrOpc Mode) {
878 return ARM::LDR_PRE_IMM;
880 return ARM::STR_PRE_IMM;
882 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
884 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
886 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
888 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
891 return ARM::t2LDR_PRE;
894 return ARM::t2STR_PRE;
895 default: llvm_unreachable("Unhandled opcode!");
899 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
900 ARM_AM::AddrOpc Mode) {
903 return ARM::LDR_POST_IMM;
905 return ARM::STR_POST_IMM;
907 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
909 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
911 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
913 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
916 return ARM::t2LDR_POST;
919 return ARM::t2STR_POST;
920 default: llvm_unreachable("Unhandled opcode!");
924 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
925 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
926 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
927 MachineBasicBlock::iterator MBBI,
928 const TargetInstrInfo *TII,
930 MachineBasicBlock::iterator &I) {
931 MachineInstr *MI = MBBI;
932 unsigned Base = MI->getOperand(1).getReg();
933 bool BaseKill = MI->getOperand(1).isKill();
934 unsigned Bytes = getLSMultipleTransferSize(MI);
935 int Opcode = MI->getOpcode();
936 DebugLoc dl = MI->getDebugLoc();
937 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
938 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
939 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
940 if (isi32Load(Opcode) || isi32Store(Opcode))
941 if (MI->getOperand(2).getImm() != 0)
943 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
946 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
947 // Can't do the merge if the destination register is the same as the would-be
948 // writeback register.
949 if (MI->getOperand(0).getReg() == Base)
952 unsigned PredReg = 0;
953 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
954 bool DoMerge = false;
955 ARM_AM::AddrOpc AddSub = ARM_AM::add;
957 // AM2 - 12 bits, thumb2 - 8 bits.
958 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
960 // Try merging with the previous instruction.
961 MachineBasicBlock::iterator BeginMBBI = MBB.begin();
962 if (MBBI != BeginMBBI) {
963 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
964 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
966 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
968 AddSub = ARM_AM::sub;
970 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
974 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
979 // Try merging with the next instruction.
980 MachineBasicBlock::iterator EndMBBI = MBB.end();
981 if (!DoMerge && MBBI != EndMBBI) {
982 MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
983 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
986 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
988 AddSub = ARM_AM::sub;
989 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
993 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1006 // VLDM[SD}_UPD, VSTM[SD]_UPD
1007 // (There are no base-updating versions of VLDR/VSTR instructions, but the
1008 // updating load/store-multiple instructions can be used with only one
1010 MachineOperand &MO = MI->getOperand(0);
1011 BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1012 .addReg(Base, getDefRegState(true)) // WB base register
1013 .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1014 .addImm(Pred).addReg(PredReg)
1015 .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1016 getKillRegState(MO.isKill())));
1019 // LDR_PRE, LDR_POST
1020 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1021 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1022 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1023 .addReg(Base, RegState::Define)
1024 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1026 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1027 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1028 .addReg(Base, RegState::Define)
1029 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1032 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1033 // t2LDR_PRE, t2LDR_POST
1034 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1035 .addReg(Base, RegState::Define)
1036 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1039 MachineOperand &MO = MI->getOperand(0);
1040 // FIXME: post-indexed stores use am2offset_imm, which still encodes
1041 // the vestigal zero-reg offset register. When that's fixed, this clause
1042 // can be removed entirely.
1043 if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1044 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1045 // STR_PRE, STR_POST
1046 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1047 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1048 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1050 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1051 // t2STR_PRE, t2STR_POST
1052 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1053 .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1054 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1062 /// isMemoryOp - Returns true if instruction is a memory operation that this
1063 /// pass is capable of operating on.
1064 static bool isMemoryOp(const MachineInstr *MI) {
1065 // When no memory operands are present, conservatively assume unaligned,
1066 // volatile, unfoldable.
1067 if (!MI->hasOneMemOperand())
1070 const MachineMemOperand *MMO = *MI->memoperands_begin();
1072 // Don't touch volatile memory accesses - we may be changing their order.
1073 if (MMO->isVolatile())
1076 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1078 if (MMO->getAlignment() < 4)
1081 // str <undef> could probably be eliminated entirely, but for now we just want
1082 // to avoid making a mess of it.
1083 // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1084 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1085 MI->getOperand(0).isUndef())
1088 // Likewise don't mess with references to undefined addresses.
1089 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1090 MI->getOperand(1).isUndef())
1093 int Opcode = MI->getOpcode();
1098 return MI->getOperand(1).isReg();
1101 return MI->getOperand(1).isReg();
1108 return MI->getOperand(1).isReg();
1113 /// AdvanceRS - Advance register scavenger to just before the earliest memory
1114 /// op that is being merged.
1115 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1116 MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1117 unsigned Position = MemOps[0].Position;
1118 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1119 if (MemOps[i].Position < Position) {
1120 Position = MemOps[i].Position;
1121 Loc = MemOps[i].MBBI;
1125 if (Loc != MBB.begin())
1126 RS->forward(std::prev(Loc));
1129 static int getMemoryOpOffset(const MachineInstr *MI) {
1130 int Opcode = MI->getOpcode();
1131 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1132 unsigned NumOperands = MI->getDesc().getNumOperands();
1133 unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1135 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1136 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1137 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1138 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
1141 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1142 : ARM_AM::getAM5Offset(OffField) * 4;
1144 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1147 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1153 static void InsertLDR_STR(MachineBasicBlock &MBB,
1154 MachineBasicBlock::iterator &MBBI,
1155 int Offset, bool isDef,
1156 DebugLoc dl, unsigned NewOpc,
1157 unsigned Reg, bool RegDeadKill, bool RegUndef,
1158 unsigned BaseReg, bool BaseKill, bool BaseUndef,
1159 bool OffKill, bool OffUndef,
1160 ARMCC::CondCodes Pred, unsigned PredReg,
1161 const TargetInstrInfo *TII, bool isT2) {
1163 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1165 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1166 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1167 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1169 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1171 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1172 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1173 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1177 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1178 MachineBasicBlock::iterator &MBBI) {
1179 MachineInstr *MI = &*MBBI;
1180 unsigned Opcode = MI->getOpcode();
1181 if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1182 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1183 const MachineOperand &BaseOp = MI->getOperand(2);
1184 unsigned BaseReg = BaseOp.getReg();
1185 unsigned EvenReg = MI->getOperand(0).getReg();
1186 unsigned OddReg = MI->getOperand(1).getReg();
1187 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1188 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
1189 // ARM errata 602117: LDRD with base in list may result in incorrect base
1190 // register when interrupted or faulted.
1191 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1192 if (!Errata602117 &&
1193 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1196 MachineBasicBlock::iterator NewBBI = MBBI;
1197 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1198 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1199 bool EvenDeadKill = isLd ?
1200 MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1201 bool EvenUndef = MI->getOperand(0).isUndef();
1202 bool OddDeadKill = isLd ?
1203 MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1204 bool OddUndef = MI->getOperand(1).isUndef();
1205 bool BaseKill = BaseOp.isKill();
1206 bool BaseUndef = BaseOp.isUndef();
1207 bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1208 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1209 int OffImm = getMemoryOpOffset(MI);
1210 unsigned PredReg = 0;
1211 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1213 if (OddRegNum > EvenRegNum && OffImm == 0) {
1214 // Ascending register numbers and no offset. It's safe to change it to a
1216 unsigned NewOpc = (isLd)
1217 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1218 : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1220 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1221 .addReg(BaseReg, getKillRegState(BaseKill))
1222 .addImm(Pred).addReg(PredReg)
1223 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1224 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1227 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1228 .addReg(BaseReg, getKillRegState(BaseKill))
1229 .addImm(Pred).addReg(PredReg)
1231 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1233 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
1236 NewBBI = std::prev(MBBI);
1238 // Split into two instructions.
1239 unsigned NewOpc = (isLd)
1240 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1241 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1242 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1243 // so adjust and use t2LDRi12 here for that.
1244 unsigned NewOpc2 = (isLd)
1245 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1246 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1247 DebugLoc dl = MBBI->getDebugLoc();
1248 // If this is a load and base register is killed, it may have been
1249 // re-defed by the load, make sure the first load does not clobber it.
1251 (BaseKill || OffKill) &&
1252 (TRI->regsOverlap(EvenReg, BaseReg))) {
1253 assert(!TRI->regsOverlap(OddReg, BaseReg));
1254 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1255 OddReg, OddDeadKill, false,
1256 BaseReg, false, BaseUndef, false, OffUndef,
1257 Pred, PredReg, TII, isT2);
1258 NewBBI = std::prev(MBBI);
1259 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1260 EvenReg, EvenDeadKill, false,
1261 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1262 Pred, PredReg, TII, isT2);
1264 if (OddReg == EvenReg && EvenDeadKill) {
1265 // If the two source operands are the same, the kill marker is
1266 // probably on the first one. e.g.
1267 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1268 EvenDeadKill = false;
1271 // Never kill the base register in the first instruction.
1272 if (EvenReg == BaseReg)
1273 EvenDeadKill = false;
1274 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1275 EvenReg, EvenDeadKill, EvenUndef,
1276 BaseReg, false, BaseUndef, false, OffUndef,
1277 Pred, PredReg, TII, isT2);
1278 NewBBI = std::prev(MBBI);
1279 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1280 OddReg, OddDeadKill, OddUndef,
1281 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1282 Pred, PredReg, TII, isT2);
1297 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1298 /// ops of the same base and incrementing offset into LDM / STM ops.
1299 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1300 unsigned NumMerges = 0;
1301 unsigned NumMemOps = 0;
1303 unsigned CurrBase = 0;
1305 unsigned CurrSize = 0;
1306 ARMCC::CondCodes CurrPred = ARMCC::AL;
1307 unsigned CurrPredReg = 0;
1308 unsigned Position = 0;
1309 SmallVector<MachineBasicBlock::iterator,4> Merges;
1311 RS->enterBasicBlock(&MBB);
1312 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1314 if (FixInvalidRegPairOp(MBB, MBBI))
1317 bool Advance = false;
1318 bool TryMerge = false;
1319 bool Clobber = false;
1321 bool isMemOp = isMemoryOp(MBBI);
1323 int Opcode = MBBI->getOpcode();
1324 unsigned Size = getLSMultipleTransferSize(MBBI);
1325 const MachineOperand &MO = MBBI->getOperand(0);
1326 unsigned Reg = MO.getReg();
1327 bool isKill = MO.isDef() ? false : MO.isKill();
1328 unsigned Base = MBBI->getOperand(1).getReg();
1329 unsigned PredReg = 0;
1330 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1331 int Offset = getMemoryOpOffset(MBBI);
1334 // r5 := ldr [r5, #4]
1335 // r6 := ldr [r5, #8]
1337 // The second ldr has effectively broken the chain even though it
1338 // looks like the later ldr(s) use the same base register. Try to
1339 // merge the ldr's so far, including this one. But don't try to
1340 // combine the following ldr(s).
1341 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1344 // r4 := ldr [r0, #8]
1345 // r4 := ldr [r0, #4]
1347 // The optimization may reorder the second ldr in front of the first
1348 // ldr, which violates write after write(WAW) dependence. The same as
1349 // str. Try to merge inst(s) already in MemOps.
1350 bool Overlap = false;
1351 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1352 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1358 if (CurrBase == 0 && !Clobber) {
1359 // Start of a new chain.
1364 CurrPredReg = PredReg;
1365 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1368 } else if (!Overlap) {
1374 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1375 // No need to match PredReg.
1376 // Continue adding to the queue.
1377 if (Offset > MemOps.back().Offset) {
1378 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1383 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1385 if (Offset < I->Offset) {
1386 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1391 } else if (Offset == I->Offset) {
1392 // Collision! This can't be merged!
1401 if (MBBI->isDebugValue()) {
1404 // Reach the end of the block, try merging the memory instructions.
1406 } else if (Advance) {
1410 // Reach the end of the block, try merging the memory instructions.
1416 if (NumMemOps > 1) {
1417 // Try to find a free register to use as a new base in case it's needed.
1418 // First advance to the instruction just before the start of the chain.
1419 AdvanceRS(MBB, MemOps);
1420 // Find a scratch register.
1421 unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
1422 // Process the load / store instructions.
1423 RS->forward(std::prev(MBBI));
1427 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1428 CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1430 // Try folding preceding/trailing base inc/dec into the generated
1432 for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1433 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1435 NumMerges += Merges.size();
1437 // Try folding preceding/trailing base inc/dec into those load/store
1438 // that were not merged to form LDM/STM ops.
1439 for (unsigned i = 0; i != NumMemOps; ++i)
1440 if (!MemOps[i].Merged)
1441 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1444 // RS may be pointing to an instruction that's deleted.
1445 RS->skipTo(std::prev(MBBI));
1446 } else if (NumMemOps == 1) {
1447 // Try folding preceding/trailing base inc/dec into the single
1449 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1451 RS->forward(std::prev(MBBI));
1458 CurrPred = ARMCC::AL;
1465 // If iterator hasn't been advanced and this is not a memory op, skip it.
1466 // It can't start a new chain anyway.
1467 if (!Advance && !isMemOp && MBBI != E) {
1473 return NumMerges > 0;
1476 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1477 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1478 /// directly restore the value of LR into pc.
1479 /// ldmfd sp!, {..., lr}
1482 /// ldmfd sp!, {..., lr}
1485 /// ldmfd sp!, {..., pc}
1486 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1487 if (MBB.empty()) return false;
1489 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1490 if (MBBI != MBB.begin() &&
1491 (MBBI->getOpcode() == ARM::BX_RET ||
1492 MBBI->getOpcode() == ARM::tBX_RET ||
1493 MBBI->getOpcode() == ARM::MOVPCLR)) {
1494 MachineInstr *PrevMI = std::prev(MBBI);
1495 unsigned Opcode = PrevMI->getOpcode();
1496 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1497 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1498 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1499 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1500 if (MO.getReg() != ARM::LR)
1502 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1503 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1504 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1505 PrevMI->setDesc(TII->get(NewOpc));
1507 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1515 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1516 const TargetMachine &TM = Fn.getTarget();
1517 AFI = Fn.getInfo<ARMFunctionInfo>();
1518 TII = TM.getInstrInfo();
1519 TRI = TM.getRegisterInfo();
1520 STI = &TM.getSubtarget<ARMSubtarget>();
1521 RS = new RegScavenger();
1522 isThumb2 = AFI->isThumb2Function();
1524 bool Modified = false;
1525 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1527 MachineBasicBlock &MBB = *MFI;
1528 Modified |= LoadStoreMultipleOpti(MBB);
1529 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1530 Modified |= MergeReturnIntoLDM(MBB);
1538 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1539 /// load / stores from consecutive locations close to make it more
1540 /// likely they will be combined later.
1543 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1545 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1547 const DataLayout *TD;
1548 const TargetInstrInfo *TII;
1549 const TargetRegisterInfo *TRI;
1550 const ARMSubtarget *STI;
1551 MachineRegisterInfo *MRI;
1552 MachineFunction *MF;
1554 bool runOnMachineFunction(MachineFunction &Fn) override;
1556 const char *getPassName() const override {
1557 return "ARM pre- register allocation load / store optimization pass";
1561 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1562 unsigned &NewOpc, unsigned &EvenReg,
1563 unsigned &OddReg, unsigned &BaseReg,
1565 unsigned &PredReg, ARMCC::CondCodes &Pred,
1567 bool RescheduleOps(MachineBasicBlock *MBB,
1568 SmallVectorImpl<MachineInstr *> &Ops,
1569 unsigned Base, bool isLd,
1570 DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1571 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1573 char ARMPreAllocLoadStoreOpt::ID = 0;
1576 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1577 TD = Fn.getTarget().getDataLayout();
1578 TII = Fn.getTarget().getInstrInfo();
1579 TRI = Fn.getTarget().getRegisterInfo();
1580 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1581 MRI = &Fn.getRegInfo();
1584 bool Modified = false;
1585 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1587 Modified |= RescheduleLoadStoreInstrs(MFI);
1592 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1593 MachineBasicBlock::iterator I,
1594 MachineBasicBlock::iterator E,
1595 SmallPtrSet<MachineInstr*, 4> &MemOps,
1596 SmallSet<unsigned, 4> &MemRegs,
1597 const TargetRegisterInfo *TRI) {
1598 // Are there stores / loads / calls between them?
1599 // FIXME: This is overly conservative. We should make use of alias information
1601 SmallSet<unsigned, 4> AddedRegPressure;
1603 if (I->isDebugValue() || MemOps.count(&*I))
1605 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1607 if (isLd && I->mayStore())
1612 // It's not safe to move the first 'str' down.
1615 // str r4, [r0, #+4]
1619 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1620 MachineOperand &MO = I->getOperand(j);
1623 unsigned Reg = MO.getReg();
1624 if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1626 if (Reg != Base && !MemRegs.count(Reg))
1627 AddedRegPressure.insert(Reg);
1631 // Estimate register pressure increase due to the transformation.
1632 if (MemRegs.size() <= 4)
1633 // Ok if we are moving small number of instructions.
1635 return AddedRegPressure.size() <= MemRegs.size() * 2;
1639 /// Copy Op0 and Op1 operands into a new array assigned to MI.
1640 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1641 MachineInstr *Op1) {
1642 assert(MI->memoperands_empty() && "expected a new machineinstr");
1643 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1644 + (Op1->memoperands_end() - Op1->memoperands_begin());
1646 MachineFunction *MF = MI->getParent()->getParent();
1647 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1648 MachineSDNode::mmo_iterator MemEnd =
1649 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1651 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1652 MI->setMemRefs(MemBegin, MemEnd);
1656 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1658 unsigned &NewOpc, unsigned &EvenReg,
1659 unsigned &OddReg, unsigned &BaseReg,
1660 int &Offset, unsigned &PredReg,
1661 ARMCC::CondCodes &Pred,
1663 // Make sure we're allowed to generate LDRD/STRD.
1664 if (!STI->hasV5TEOps())
1667 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1669 unsigned Opcode = Op0->getOpcode();
1670 if (Opcode == ARM::LDRi12)
1672 else if (Opcode == ARM::STRi12)
1674 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1675 NewOpc = ARM::t2LDRDi8;
1678 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1679 NewOpc = ARM::t2STRDi8;
1685 // Make sure the base address satisfies i64 ld / st alignment requirement.
1686 // At the moment, we ignore the memoryoperand's value.
1687 // If we want to use AliasAnalysis, we should check it accordingly.
1688 if (!Op0->hasOneMemOperand() ||
1689 (*Op0->memoperands_begin())->isVolatile())
1692 unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1693 const Function *Func = MF->getFunction();
1694 unsigned ReqAlign = STI->hasV6Ops()
1695 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1696 : 8; // Pre-v6 need 8-byte align
1697 if (Align < ReqAlign)
1700 // Then make sure the immediate offset fits.
1701 int OffImm = getMemoryOpOffset(Op0);
1703 int Limit = (1 << 8) * Scale;
1704 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1708 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1710 AddSub = ARM_AM::sub;
1713 int Limit = (1 << 8) * Scale;
1714 if (OffImm >= Limit || (OffImm & (Scale-1)))
1716 Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1718 EvenReg = Op0->getOperand(0).getReg();
1719 OddReg = Op1->getOperand(0).getReg();
1720 if (EvenReg == OddReg)
1722 BaseReg = Op0->getOperand(1).getReg();
1723 Pred = getInstrPredicate(Op0, PredReg);
1724 dl = Op0->getDebugLoc();
1728 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1729 SmallVectorImpl<MachineInstr *> &Ops,
1730 unsigned Base, bool isLd,
1731 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1732 bool RetVal = false;
1734 // Sort by offset (in reverse order).
1735 std::sort(Ops.begin(), Ops.end(),
1736 [](const MachineInstr *LHS, const MachineInstr *RHS) {
1737 int LOffset = getMemoryOpOffset(LHS);
1738 int ROffset = getMemoryOpOffset(RHS);
1739 assert(LHS == RHS || LOffset != ROffset);
1740 return LOffset > ROffset;
1743 // The loads / stores of the same base are in order. Scan them from first to
1744 // last and check for the following:
1745 // 1. Any def of base.
1747 while (Ops.size() > 1) {
1748 unsigned FirstLoc = ~0U;
1749 unsigned LastLoc = 0;
1750 MachineInstr *FirstOp = nullptr;
1751 MachineInstr *LastOp = nullptr;
1753 unsigned LastOpcode = 0;
1754 unsigned LastBytes = 0;
1755 unsigned NumMove = 0;
1756 for (int i = Ops.size() - 1; i >= 0; --i) {
1757 MachineInstr *Op = Ops[i];
1758 unsigned Loc = MI2LocMap[Op];
1759 if (Loc <= FirstLoc) {
1763 if (Loc >= LastLoc) {
1769 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1770 if (LastOpcode && LSMOpcode != LastOpcode)
1773 int Offset = getMemoryOpOffset(Op);
1774 unsigned Bytes = getLSMultipleTransferSize(Op);
1776 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1779 LastOffset = Offset;
1781 LastOpcode = LSMOpcode;
1782 if (++NumMove == 8) // FIXME: Tune this limit.
1789 SmallPtrSet<MachineInstr*, 4> MemOps;
1790 SmallSet<unsigned, 4> MemRegs;
1791 for (int i = NumMove-1; i >= 0; --i) {
1792 MemOps.insert(Ops[i]);
1793 MemRegs.insert(Ops[i]->getOperand(0).getReg());
1796 // Be conservative, if the instructions are too far apart, don't
1797 // move them. We want to limit the increase of register pressure.
1798 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1800 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1801 MemOps, MemRegs, TRI);
1803 for (unsigned i = 0; i != NumMove; ++i)
1806 // This is the new location for the loads / stores.
1807 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1808 while (InsertPos != MBB->end()
1809 && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1812 // If we are moving a pair of loads / stores, see if it makes sense
1813 // to try to allocate a pair of registers that can form register pairs.
1814 MachineInstr *Op0 = Ops.back();
1815 MachineInstr *Op1 = Ops[Ops.size()-2];
1816 unsigned EvenReg = 0, OddReg = 0;
1817 unsigned BaseReg = 0, PredReg = 0;
1818 ARMCC::CondCodes Pred = ARMCC::AL;
1820 unsigned NewOpc = 0;
1823 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1824 EvenReg, OddReg, BaseReg,
1825 Offset, PredReg, Pred, isT2)) {
1829 const MCInstrDesc &MCID = TII->get(NewOpc);
1830 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
1831 MRI->constrainRegClass(EvenReg, TRC);
1832 MRI->constrainRegClass(OddReg, TRC);
1834 // Form the pair instruction.
1836 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1837 .addReg(EvenReg, RegState::Define)
1838 .addReg(OddReg, RegState::Define)
1840 // FIXME: We're converting from LDRi12 to an insn that still
1841 // uses addrmode2, so we need an explicit offset reg. It should
1842 // always by reg0 since we're transforming LDRi12s.
1845 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1846 concatenateMemOperands(MIB, Op0, Op1);
1847 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1850 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1854 // FIXME: We're converting from LDRi12 to an insn that still
1855 // uses addrmode2, so we need an explicit offset reg. It should
1856 // always by reg0 since we're transforming STRi12s.
1859 MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1860 concatenateMemOperands(MIB, Op0, Op1);
1861 DEBUG(dbgs() << "Formed " << *MIB << "\n");
1867 // Add register allocation hints to form register pairs.
1868 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1869 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
1871 for (unsigned i = 0; i != NumMove; ++i) {
1872 MachineInstr *Op = Ops.back();
1874 MBB->splice(InsertPos, MBB, Op);
1878 NumLdStMoved += NumMove;
1888 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1889 bool RetVal = false;
1891 DenseMap<MachineInstr*, unsigned> MI2LocMap;
1892 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1893 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1894 SmallVector<unsigned, 4> LdBases;
1895 SmallVector<unsigned, 4> StBases;
1898 MachineBasicBlock::iterator MBBI = MBB->begin();
1899 MachineBasicBlock::iterator E = MBB->end();
1901 for (; MBBI != E; ++MBBI) {
1902 MachineInstr *MI = MBBI;
1903 if (MI->isCall() || MI->isTerminator()) {
1904 // Stop at barriers.
1909 if (!MI->isDebugValue())
1910 MI2LocMap[MI] = ++Loc;
1912 if (!isMemoryOp(MI))
1914 unsigned PredReg = 0;
1915 if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
1918 int Opc = MI->getOpcode();
1919 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1920 unsigned Base = MI->getOperand(1).getReg();
1921 int Offset = getMemoryOpOffset(MI);
1923 bool StopHere = false;
1925 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1926 Base2LdsMap.find(Base);
1927 if (BI != Base2LdsMap.end()) {
1928 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1929 if (Offset == getMemoryOpOffset(BI->second[i])) {
1935 BI->second.push_back(MI);
1937 Base2LdsMap[Base].push_back(MI);
1938 LdBases.push_back(Base);
1941 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1942 Base2StsMap.find(Base);
1943 if (BI != Base2StsMap.end()) {
1944 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1945 if (Offset == getMemoryOpOffset(BI->second[i])) {
1951 BI->second.push_back(MI);
1953 Base2StsMap[Base].push_back(MI);
1954 StBases.push_back(Base);
1959 // Found a duplicate (a base+offset combination that's seen earlier).
1966 // Re-schedule loads.
1967 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1968 unsigned Base = LdBases[i];
1969 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
1971 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1974 // Re-schedule stores.
1975 for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1976 unsigned Base = StBases[i];
1977 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
1979 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1983 Base2LdsMap.clear();
1984 Base2StsMap.clear();
1994 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1995 /// optimization pass.
1996 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1998 return new ARMPreAllocLoadStoreOpt();
1999 return new ARMLoadStoreOpt();