#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
static char ID;
ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
+ const MachineFunction *MF;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
const ARMSubtarget *STI;
const TargetLowering *TL;
ARMFunctionInfo *AFI;
- RegScavenger *RS;
+ LivePhysRegs LiveRegs;
+ RegisterClassInfo RegClassInfo;
+ MachineBasicBlock::const_iterator LiveRegPos;
+ bool LiveRegsValid;
+ bool RegClassInfoValid;
bool isThumb1, isThumb2;
bool runOnMachineFunction(MachineFunction &Fn) override;
}
private:
+ /// A set of load/store MachineInstrs with same base register sorted by
+ /// offset.
struct MemOpQueueEntry {
- int Offset;
- unsigned Reg;
- bool isKill;
- unsigned Position;
- MachineBasicBlock::iterator MBBI;
- bool Merged;
- MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
- MachineBasicBlock::iterator i)
- : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
+ MachineInstr *MI;
+ int Offset; ///< Load/Store offset.
+ unsigned Position; ///< Position as counted from end of basic block.
+ MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
+ : MI(MI), Offset(Offset), Position(Position) {}
};
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
- typedef MemOpQueue::iterator MemOpQueueIter;
- void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
- const MemOpQueue &MemOps, unsigned DefReg,
- unsigned RangeBegin, unsigned RangeEnd);
+ /// A set of MachineInstrs that fulfill (nearly all) conditions to get
+ /// merged into a LDM/STM.
+ struct MergeCandidate {
+ /// List of instructions ordered by load/store offset.
+ SmallVector<MachineInstr*, 4> Instrs;
+ /// Index in Instrs of the instruction being latest in the schedule.
+ unsigned LatestMIIdx;
+ /// Index in Instrs of the instruction being earliest in the schedule.
+ unsigned EarliestMIIdx;
+ /// Index into the basic block where the merged instruction will be
+ /// inserted. (See MemOpQueueEntry.Position)
+ unsigned InsertPos;
+ };
+ BumpPtrAllocator Allocator;
+ SmallVector<const MergeCandidate*,4> Candidates;
+
+ void moveLiveRegsBefore(const MachineBasicBlock &MBB,
+ MachineBasicBlock::const_iterator Before);
+ unsigned findFreeReg(const TargetRegisterClass &RegClass);
void UpdateBaseRegUses(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- DebugLoc dl, unsigned Base, unsigned WordOffset,
+ DebugLoc DL, unsigned Base, unsigned WordOffset,
ARMCC::CondCodes Pred, unsigned PredReg);
- bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
- ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
- DebugLoc dl,
- ArrayRef<std::pair<unsigned, bool> > Regs,
- ArrayRef<unsigned> ImpDefs);
- void MergeOpsUpdate(MachineBasicBlock &MBB,
- MemOpQueue &MemOps,
- unsigned memOpsBegin,
- unsigned memOpsEnd,
- unsigned insertAfter,
- int Offset,
- unsigned Base,
- bool BaseKill,
- unsigned Opcode,
- ARMCC::CondCodes Pred,
- unsigned PredReg,
- unsigned Scratch,
- DebugLoc dl,
- SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
- void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
- unsigned Opcode, unsigned Size,
- ARMCC::CondCodes Pred, unsigned PredReg,
- unsigned Scratch, MemOpQueue &MemOps,
- SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
- void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
+ MachineInstr *MergeOps(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, int Offset,
+ unsigned Base, bool BaseKill, unsigned Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg, DebugLoc DL,
+ ArrayRef<std::pair<unsigned, bool>> Regs);
+ void FormCandidates(const MemOpQueue &MemOps);
+ MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
- bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const TargetInstrInfo *TII,
- bool &Advance,
- MachineBasicBlock::iterator &I);
- bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- bool &Advance,
- MachineBasicBlock::iterator &I);
+ bool MergeBaseUpdateLoadStore(MachineInstr *MI);
+ bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
};
return Offset;
}
+static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
+ return MI.getOperand(1);
+}
+
+static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
+ return MI.getOperand(0);
+}
+
static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
switch (Opcode) {
default: llvm_unreachable("Unhandled opcode!");
return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
}
+static bool isLoadSingle(unsigned Opc) {
+ return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
+}
+
static unsigned getImmScale(unsigned Opc) {
switch (Opc) {
default: llvm_unreachable("Unhandled opcode!");
}
}
+static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: return 0;
+ case ARM::LDRi12:
+ case ARM::STRi12:
+ case ARM::tLDRi:
+ case ARM::tSTRi:
+ case ARM::tLDRspi:
+ case ARM::tSTRspi:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRi12:
+ case ARM::t2STRi8:
+ case ARM::t2STRi12:
+ case ARM::VLDRS:
+ case ARM::VSTRS:
+ return 4;
+ case ARM::VLDRD:
+ case ARM::VSTRD:
+ return 8;
+ case ARM::LDMIA:
+ case ARM::LDMDA:
+ case ARM::LDMDB:
+ case ARM::LDMIB:
+ case ARM::STMIA:
+ case ARM::STMDA:
+ case ARM::STMDB:
+ case ARM::STMIB:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
+ case ARM::t2LDMIA:
+ case ARM::t2LDMDB:
+ case ARM::t2STMIA:
+ case ARM::t2STMDB:
+ case ARM::VLDMSIA:
+ case ARM::VSTMSIA:
+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
+ case ARM::VLDMDIA:
+ case ARM::VSTMDIA:
+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
+ }
+}
+
/// Update future uses of the base register with the offset introduced
/// due to writeback. This function only works on Thumb1.
void
ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- DebugLoc dl, unsigned Base,
+ DebugLoc DL, unsigned Base,
unsigned WordOffset,
ARMCC::CondCodes Pred, unsigned PredReg) {
assert(isThumb1 && "Can only update base register uses for Thumb1!");
Offset = MO.getImm() - WordOffset * getImmScale(Opc);
// If storing the base register, it needs to be reset first.
- unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
+ unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
MO.setImm(Offset);
if (InsertSub) {
// An instruction above couldn't be updated, so insert a sub.
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
+ AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
.addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
return;
}
// See PR21029.
if (MBBI != MBB.end()) --MBBI;
AddDefaultT1CC(
- BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
.addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
}
}
+/// Return the first register of class \p RegClass that is not in \p Regs.
+unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
+ if (!RegClassInfoValid) {
+ RegClassInfo.runOnMachineFunction(*MF);
+ RegClassInfoValid = true;
+ }
+
+ for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
+ if (!LiveRegs.contains(Reg))
+ return Reg;
+ return 0;
+}
+
+/// Compute live registers just before instruction \p Before (in normal schedule
+/// direction). Computes backwards so multiple queries in the same block must
+/// come in reverse order.
+void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
+ MachineBasicBlock::const_iterator Before) {
+ // Initialize if we never queried in this block.
+ if (!LiveRegsValid) {
+ LiveRegs.init(TRI);
+ LiveRegs.addLiveOuts(&MBB, true);
+ LiveRegPos = MBB.end();
+ LiveRegsValid = true;
+ }
+ // Move backward just before the "Before" position.
+ while (LiveRegPos != Before) {
+ --LiveRegPos;
+ LiveRegs.stepBackward(*LiveRegPos);
+ }
+}
+
+static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
+ unsigned Reg) {
+ for (const std::pair<unsigned, bool> &R : Regs)
+ if (R.first == Reg)
+ return true;
+ return false;
+}
+
/// Create and insert a LDM or STM with Base as base register and registers in
/// Regs as the register operands that would be loaded / stored. It returns
/// true if the transformation is done.
-bool
+MachineInstr *
ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- int Offset, unsigned Base, bool BaseKill,
- unsigned Opcode, ARMCC::CondCodes Pred,
- unsigned PredReg, unsigned Scratch, DebugLoc dl,
- ArrayRef<std::pair<unsigned, bool> > Regs,
- ArrayRef<unsigned> ImpDefs) {
- // Only a single register to load / store. Don't bother.
+ MachineBasicBlock::iterator InsertBefore, int Offset,
+ unsigned Base, bool BaseKill, unsigned Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg, DebugLoc DL,
+ ArrayRef<std::pair<unsigned, bool>> Regs) {
unsigned NumRegs = Regs.size();
- if (NumRegs <= 1)
- return false;
+ assert(NumRegs > 1);
// For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
// Compute liveness information for that register to make the decision.
bool SafeToClobberCPSR = !isThumb1 ||
- (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
+ (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
MachineBasicBlock::LQR_Dead);
bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
// Exception: If the base register is in the input reglist, Thumb1 LDM is
// non-writeback.
// It's also not possible to merge an STR of the base register in Thumb1.
- if (isThumb1)
- for (const std::pair<unsigned, bool> &R : Regs)
- if (Base == R.first) {
- assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
- if (Opcode == ARM::tLDRi) {
- Writeback = false;
- break;
- } else if (Opcode == ARM::tSTRi) {
- return false;
- }
- }
+ if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
+ assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
+ if (Opcode == ARM::tLDRi) {
+ Writeback = false;
+ } else if (Opcode == ARM::tSTRi) {
+ return nullptr;
+ }
+ }
ARM_AM::AMSubMode Mode = ARM_AM::ia;
// VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
} else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
// Check if this is a supported opcode before inserting instructions to
// calculate a new base register.
- if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
+ if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
// If starting offset isn't zero, insert a MI to materialize a new base.
// But only do so if it is cost effective, i.e. merging more than two
// loads / stores.
if (NumRegs <= 2)
- return false;
+ return nullptr;
// On Thumb1, it's not worth materializing a new base register without
// clobbering the CPSR (i.e. not using ADDS/SUBS).
if (!SafeToClobberCPSR)
- return false;
+ return nullptr;
unsigned NewBase;
if (isi32Load(Opcode)) {
// use as the new base.
NewBase = Regs[NumRegs-1].first;
} else {
- // Use the scratch register to use as a new base.
- NewBase = Scratch;
+ // Find a free register that we can use as scratch register.
+ moveLiveRegsBefore(MBB, InsertBefore);
+ // The merged instruction does not exist yet but will use several Regs if
+ // it is a Store.
+ if (!isLoadSingle(Opcode))
+ for (const std::pair<unsigned, bool> &R : Regs)
+ LiveRegs.addReg(R.first);
+
+ NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
if (NewBase == 0)
- return false;
+ return nullptr;
}
int BaseOpc =
if (!TL->isLegalAddImmediate(Offset))
// FIXME: Try add with register operand?
- return false; // Probably not worth it then.
+ return nullptr; // Probably not worth it then.
+
+ // We can only append a kill flag to the add/sub input if the value is not
+ // used in the register list of the stm as well.
+ bool KillOldBase = BaseKill &&
+ (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
if (isThumb1) {
// Thumb1: depending on immediate size, use either
!STI->hasV6Ops()) {
// thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
if (Pred != ARMCC::AL)
- return false;
- BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
- .addReg(Base, getKillRegState(BaseKill));
+ return nullptr;
+ BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
+ .addReg(Base, getKillRegState(KillOldBase));
} else
- BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
- .addReg(Base, getKillRegState(BaseKill))
+ BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
+ .addReg(Base, getKillRegState(KillOldBase))
.addImm(Pred).addReg(PredReg);
- // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
+ // The following ADDS/SUBS becomes an update.
Base = NewBase;
- BaseKill = false;
+ KillOldBase = true;
}
if (BaseOpc == ARM::tADDrSPi) {
assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
- BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
- .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4)
+ BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
+ .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
.addImm(Pred).addReg(PredReg);
} else
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
- .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+ AddDefaultT1CC(
+ BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
+ .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
.addImm(Pred).addReg(PredReg);
} else {
- BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
- .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+ BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
+ .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
.addImm(Pred).addReg(PredReg).addReg(0);
}
Base = NewBase;
BaseKill = true; // New base is always killed straight away.
}
- bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
- Opcode == ARM::VLDRD);
+ bool isDef = isLoadSingle(Opcode);
// Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
// base register writeback.
Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
- if (!Opcode) return false;
+ if (!Opcode)
+ return nullptr;
// Check if a Thumb1 LDM/STM merge is safe. This is the case if:
// - There is no writeback (LDM of base register),
// It's safe to return here since the code to materialize a new base register
// above is also conditional on SafeToClobberCPSR.
if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
- return false;
+ return nullptr;
MachineInstrBuilder MIB;
// Update tLDMIA with writeback if necessary.
Opcode = ARM::tLDMIA_UPD;
- MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
+ MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
// Thumb1: we might need to set base writeback when building the MI.
MIB.addReg(Base, getDefRegState(true))
// The base isn't dead after a merged instruction with writeback.
// Insert a sub instruction after the newly formed instruction to reset.
if (!BaseKill)
- UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
+ UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
} else {
// No writeback, simply build the MachineInstr.
- MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
+ MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
MIB.addReg(Base, getKillRegState(BaseKill));
}
MIB.addImm(Pred).addReg(PredReg);
for (const std::pair<unsigned, bool> &R : Regs)
- MIB = MIB.addReg(R.first, getDefRegState(isDef)
- | getKillRegState(R.second));
-
- // Add implicit defs for super-registers.
- for (unsigned ImpDef : ImpDefs)
- MIB.addReg(ImpDef, RegState::ImplicitDefine);
-
- return true;
-}
-
-/// Find all instructions using a given imp-def within a range.
-///
-/// We are trying to combine a range of instructions, one of which (located at
-/// position RangeBegin) implicitly defines a register. The final LDM/STM will
-/// be placed at RangeEnd, and so any uses of this definition between RangeStart
-/// and RangeEnd must be modified to use an undefined value.
-///
-/// The live range continues until we find a second definition or one of the
-/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
-/// we must consider all uses and decide which are relevant in a second pass.
-void ARMLoadStoreOpt::findUsesOfImpDef(
- SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
- unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
- std::map<unsigned, MachineOperand *> Uses;
- unsigned LastLivePos = RangeEnd;
-
- // First we find all uses of this register with Position between RangeBegin
- // and RangeEnd, any or all of these could be uses of a definition at
- // RangeBegin. We also record the latest position a definition at RangeBegin
- // would be considered live.
- for (unsigned i = 0; i < MemOps.size(); ++i) {
- MachineInstr &MI = *MemOps[i].MBBI;
- unsigned MIPosition = MemOps[i].Position;
- if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
- continue;
-
- // If this instruction defines the register, then any later use will be of
- // that definition rather than ours.
- if (MI.definesRegister(DefReg))
- LastLivePos = std::min(LastLivePos, MIPosition);
+ MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
- MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
- if (!UseOp)
- continue;
-
- // If this instruction kills the register then (assuming liveness is
- // correct when we start) we don't need to think about anything after here.
- if (UseOp->isKill())
- LastLivePos = std::min(LastLivePos, MIPosition);
-
- Uses[MIPosition] = UseOp;
- }
-
- // Now we traverse the list of all uses, and append the ones that actually use
- // our definition to the requested list.
- for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
- E = Uses.end();
- I != E; ++I) {
- // List is sorted by position so once we've found one out of range there
- // will be no more to consider.
- if (I->first > LastLivePos)
- break;
- UsesOfImpDefs.push_back(I->second);
- }
+ return MIB.getInstr();
}
/// Call MergeOps and update MemOps and merges accordingly on success.
-void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
- MemOpQueue &memOps,
- unsigned memOpsBegin, unsigned memOpsEnd,
- unsigned insertAfter, int Offset,
- unsigned Base, bool BaseKill,
- unsigned Opcode,
- ARMCC::CondCodes Pred, unsigned PredReg,
- unsigned Scratch,
- DebugLoc dl,
- SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
- // First calculate which of the registers should be killed by the merged
- // instruction.
- const unsigned insertPos = memOps[insertAfter].Position;
- SmallSet<unsigned, 4> KilledRegs;
- DenseMap<unsigned, unsigned> Killer;
- for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
- if (i == memOpsBegin) {
- i = memOpsEnd;
- if (i == e)
- break;
- }
- if (memOps[i].Position < insertPos && memOps[i].isKill) {
- unsigned Reg = memOps[i].Reg;
+MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
+ const MachineInstr *First = Cand.Instrs.front();
+ unsigned Opcode = First->getOpcode();
+ bool IsLoad = isLoadSingle(Opcode);
+ SmallVector<std::pair<unsigned, bool>, 8> Regs;
+ SmallVector<unsigned, 4> ImpDefs;
+ DenseSet<unsigned> KilledRegs;
+ // Determine list of registers and list of implicit super-register defs.
+ for (const MachineInstr *MI : Cand.Instrs) {
+ const MachineOperand &MO = getLoadStoreRegOp(*MI);
+ unsigned Reg = MO.getReg();
+ bool IsKill = MO.isKill();
+ if (IsKill)
KilledRegs.insert(Reg);
- Killer[Reg] = i;
+ Regs.push_back(std::make_pair(Reg, IsKill));
+
+ if (IsLoad) {
+ // Collect any implicit defs of super-registers, after merging we can't
+ // be sure anymore that we properly preserved these live ranges and must
+ // removed these implicit operands.
+ for (const MachineOperand &MO : MI->implicit_operands()) {
+ if (!MO.isReg() || !MO.isDef() || MO.isDead())
+ continue;
+ assert(MO.isImplicit());
+ unsigned DefReg = MO.getReg();
+
+ if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
+ continue;
+ // We can ignore cases where the super-reg is read and written.
+ if (MI->readsRegister(DefReg))
+ continue;
+ ImpDefs.push_back(DefReg);
+ }
}
}
- for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
- MachineOperand &TransferOp = memOps[i].MBBI->getOperand(0);
- if (TransferOp.isUse() && TransferOp.getReg() == Base)
- BaseKill = false;
+ // Attempt the merge.
+ typedef MachineBasicBlock::iterator iterator;
+ MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
+ iterator InsertBefore = std::next(iterator(LatestMI));
+ MachineBasicBlock &MBB = *LatestMI->getParent();
+ unsigned Offset = getMemoryOpOffset(First);
+ unsigned Base = getLoadStoreBaseOp(*First).getReg();
+ bool BaseKill = LatestMI->killsRegister(Base);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
+ DebugLoc DL = First->getDebugLoc();
+ MachineInstr *Merged = MergeOps(MBB, InsertBefore, Offset, Base, BaseKill,
+ Opcode, Pred, PredReg, DL, Regs);
+ if (!Merged)
+ return nullptr;
+
+ // Determine earliest instruction that will get removed. We then keep an
+ // iterator just above it so the following erases don't invalidated it.
+ iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
+ bool EarliestAtBegin = false;
+ if (EarliestI == MBB.begin()) {
+ EarliestAtBegin = true;
+ } else {
+ EarliestI = std::prev(EarliestI);
}
- SmallVector<std::pair<unsigned, bool>, 8> Regs;
- SmallVector<unsigned, 8> ImpDefs;
- SmallVector<MachineOperand *, 8> UsesOfImpDefs;
- for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
- unsigned Reg = memOps[i].Reg;
- // If we are inserting the merged operation after an operation that
- // uses the same register, make sure to transfer any kill flag.
- bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
- Regs.push_back(std::make_pair(Reg, isKill));
-
- // Collect any implicit defs of super-registers. They must be preserved.
- for (const MachineOperand &MO : memOps[i].MBBI->operands()) {
- if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead())
- continue;
- unsigned DefReg = MO.getReg();
- if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
- ImpDefs.push_back(DefReg);
-
- // There may be other uses of the definition between this instruction and
- // the eventual LDM/STM position. These should be marked undef if the
- // merge takes place.
- findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
- insertPos);
+ // Remove instructions which have been merged.
+ for (MachineInstr *MI : Cand.Instrs)
+ MBB.erase(MI);
+
+ // Determine range between the earliest removed instruction and the new one.
+ if (EarliestAtBegin)
+ EarliestI = MBB.begin();
+ else
+ EarliestI = std::next(EarliestI);
+ auto FixupRange = make_range(EarliestI, iterator(Merged));
+
+ if (isLoadSingle(Opcode)) {
+ // If the previous loads defined a super-reg, then we have to mark earlier
+ // operands undef; Replicate the super-reg def on the merged instruction.
+ for (MachineInstr &MI : FixupRange) {
+ for (unsigned &ImpDefReg : ImpDefs) {
+ for (MachineOperand &MO : MI.implicit_operands()) {
+ if (!MO.isReg() || MO.getReg() != ImpDefReg)
+ continue;
+ if (MO.readsReg())
+ MO.setIsUndef();
+ else if (MO.isDef())
+ ImpDefReg = 0;
+ }
+ }
}
- }
- // Try to do the merge.
- MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
- ++Loc;
- if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
- Pred, PredReg, Scratch, dl, Regs, ImpDefs))
- return;
-
- // Merge succeeded, update records.
- Merges.push_back(std::prev(Loc));
-
- // In gathering loads together, we may have moved the imp-def of a register
- // past one of its uses. This is OK, since we know better than the rest of
- // LLVM what's OK with ARM loads and stores; but we still have to adjust the
- // affected uses.
- for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
- E = UsesOfImpDefs.end();
- I != E; ++I)
- (*I)->setIsUndef();
-
- for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
- // Remove kill flags from any memops that come before insertPos.
- if (Regs[i-memOpsBegin].second) {
- unsigned Reg = Regs[i-memOpsBegin].first;
- if (KilledRegs.count(Reg)) {
- unsigned j = Killer[Reg];
- int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
- assert(Idx >= 0 && "Cannot find killing operand");
- memOps[j].MBBI->getOperand(Idx).setIsKill(false);
- memOps[j].isKill = false;
+ MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
+ for (unsigned ImpDef : ImpDefs)
+ MIB.addReg(ImpDef, RegState::ImplicitDefine);
+ } else {
+ // Remove kill flags: We are possibly storing the values later now.
+ assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
+ for (MachineInstr &MI : FixupRange) {
+ for (MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || !MO.isKill())
+ continue;
+ if (KilledRegs.count(MO.getReg()))
+ MO.setIsKill(false);
}
- memOps[i].isKill = true;
}
- MBB.erase(memOps[i].MBBI);
- // Update this memop to refer to the merged instruction.
- // We may need to move kill flags again.
- memOps[i].Merged = true;
- memOps[i].MBBI = Merges.back();
- memOps[i].Position = insertPos;
+ assert(ImpDefs.empty());
}
- // Update memOps offsets, since they may have been modified by MergeOps.
- for (auto &MemOp : memOps) {
- MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
- }
+ return Merged;
}
-/// Merge a number of load / store instructions into one or more load / store
-/// multiple instructions.
-void
-ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
- unsigned Base, unsigned Opcode, unsigned Size,
- ARMCC::CondCodes Pred, unsigned PredReg,
- unsigned Scratch, MemOpQueue &MemOps,
- SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
+/// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
+void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
+ const MachineInstr *FirstMI = MemOps[0].MI;
+ unsigned Opcode = FirstMI->getOpcode();
bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
- int Offset = MemOps[SIndex].Offset;
- int SOffset = Offset;
- unsigned insertAfter = SIndex;
- MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
- DebugLoc dl = Loc->getDebugLoc();
- const MachineOperand &PMO = Loc->getOperand(0);
- unsigned PReg = PMO.getReg();
- unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
- unsigned Count = 1;
- unsigned Limit = ~0U;
- bool BaseKill = false;
+ unsigned Size = getLSMultipleTransferSize(FirstMI);
// vldm / vstm limit are 32 for S variants, 16 for D variants.
-
+ unsigned Limit;
switch (Opcode) {
- default: break;
+ default:
+ Limit = UINT_MAX;
+ break;
case ARM::VSTRS:
Limit = 32;
break;
break;
}
- for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
- int NewOffset = MemOps[i].Offset;
- const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
- unsigned Reg = MO.getReg();
- unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
- // Register numbers must be in ascending order. For VFP / NEON load and
- // store multiples, the registers must also be consecutive and within the
- // limit on the number of registers per instruction.
- if (Reg != ARM::SP &&
- NewOffset == Offset + (int)Size &&
- ((isNotVFP && RegNum > PRegNum) ||
- ((Count < Limit) && RegNum == PRegNum+1)) &&
- // On Swift we don't want vldm/vstm to start with a odd register num
- // because Q register unaligned vldm/vstm need more uops.
- (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
+ unsigned SIndex = 0;
+ unsigned EIndex = MemOps.size();
+ do {
+ // Look at the first instruction.
+ const MachineInstr *MI = MemOps[SIndex].MI;
+ int Offset = MemOps[SIndex].Offset;
+ const MachineOperand &PMO = getLoadStoreRegOp(*MI);
+ unsigned PReg = PMO.getReg();
+ unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
+ unsigned Latest = SIndex;
+ unsigned Earliest = SIndex;
+ unsigned Count = 1;
+
+ // Merge additional instructions fulfilling LDM/STM constraints.
+ for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
+ int NewOffset = MemOps[I].Offset;
+ if (NewOffset != Offset + (int)Size)
+ break;
+ const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
+ unsigned Reg = MO.getReg();
+ if (Reg == ARM::SP)
+ break;
+ unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
+ // Register numbers must be in ascending order.
+ if (RegNum <= PRegNum)
+ break;
+ // For VFP / NEON load/store multiples, the registers must be consecutive
+ // and within the limit on the number of registers per instruction.
+ if (!isNotVFP && RegNum != PRegNum+1)
+ break;
+ // On Swift we don't want vldm/vstm to start with a odd register num
+ // because Q register unaligned vldm/vstm need more uops.
+ if (!isNotVFP && STI->isSwift() && Count == 1 && (PRegNum % 2) == 1)
+ break;
+
+ // Track MemOp with latest and earliest position (Positions are
+ // counted in reverse).
+ unsigned Position = MemOps[I].Position;
+ if (Position < MemOps[Latest].Position)
+ Latest = I;
+ else if (Position > MemOps[Earliest].Position)
+ Earliest = I;
+ // Prepare for next MemOp.
Offset += Size;
PRegNum = RegNum;
- ++Count;
- } else {
- // Can't merge this in. Try merge the earlier ones first.
- // We need to compute BaseKill here because the MemOps may have been
- // reordered.
- BaseKill = Loc->killsRegister(Base);
-
- MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
- BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
- MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
- MemOps, Merges);
- return;
}
- if (MemOps[i].Position > MemOps[insertAfter].Position) {
- insertAfter = i;
- Loc = MemOps[i].MBBI;
- }
- }
-
- BaseKill = Loc->killsRegister(Base);
- MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
- Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
+ // Form a candidate from the Ops collected so far.
+ MergeCandidate *Candidate = new(Allocator) MergeCandidate;
+ for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
+ Candidate->Instrs.push_back(MemOps[C].MI);
+ Candidate->LatestMIIdx = Latest - SIndex;
+ Candidate->EarliestMIIdx = Earliest - SIndex;
+ Candidate->InsertPos = MemOps[Latest].Position;
+ Candidates.push_back(Candidate);
+ // Continue after the chain.
+ SIndex += Count;
+ } while (SIndex < EIndex);
}
static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
return CheckCPSRDef ? !definesCPSR(MI) : true;
}
-static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
- switch (MI->getOpcode()) {
- default: return 0;
- case ARM::LDRi12:
- case ARM::STRi12:
- case ARM::tLDRi:
- case ARM::tSTRi:
- case ARM::tLDRspi:
- case ARM::tSTRspi:
- case ARM::t2LDRi8:
- case ARM::t2LDRi12:
- case ARM::t2STRi8:
- case ARM::t2STRi12:
- case ARM::VLDRS:
- case ARM::VSTRS:
- return 4;
- case ARM::VLDRD:
- case ARM::VSTRD:
- return 8;
- case ARM::LDMIA:
- case ARM::LDMDA:
- case ARM::LDMDB:
- case ARM::LDMIB:
- case ARM::STMIA:
- case ARM::STMDA:
- case ARM::STMDB:
- case ARM::STMIB:
- case ARM::tLDMIA:
- case ARM::tLDMIA_UPD:
- case ARM::tSTMIA_UPD:
- case ARM::t2LDMIA:
- case ARM::t2LDMDB:
- case ARM::t2STMIA:
- case ARM::t2STMDB:
- case ARM::VLDMSIA:
- case ARM::VSTMSIA:
- return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
- case ARM::VLDMDIA:
- case ARM::VSTMDIA:
- return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
- }
-}
-
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
ARM_AM::AMSubMode Mode) {
switch (Opc) {
/// ldmia rn, <ra, rb, rc>
/// =>
/// ldmdb rn!, <ra, rb, rc>
-bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- bool &Advance,
- MachineBasicBlock::iterator &I) {
+bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
// Thumb1 is already using updating loads/stores.
if (isThumb1) return false;
- MachineInstr *MI = MBBI;
- unsigned Base = MI->getOperand(0).getReg();
- bool BaseKill = MI->getOperand(0).isKill();
+ const MachineOperand &BaseOP = MI->getOperand(0);
+ unsigned Base = BaseOP.getReg();
+ bool BaseKill = BaseOP.isKill();
unsigned Bytes = getLSMultipleTransferSize(MI);
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
unsigned Opcode = MI->getOpcode();
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc DL = MI->getDebugLoc();
// Can't use an updating ld/st if the base register is also a dest
// register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
// Try merging with the previous instruction.
+ MachineBasicBlock &MBB = *MI->getParent();
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ MachineBasicBlock::iterator MBBI(MI);
if (MBBI != BeginMBBI) {
MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
DoMerge = true;
}
- if (DoMerge) {
- if (NextMBBI == I) {
- Advance = true;
- ++I;
- }
+ if (DoMerge)
MBB.erase(NextMBBI);
- }
}
if (!DoMerge)
return false;
unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
.addReg(Base, getDefRegState(true)) // WB base register
.addReg(Base, getKillRegState(BaseKill))
.addImm(Pred).addReg(PredReg);
/// Fold proceeding/trailing inc/dec of base register into the
/// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
-bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const TargetInstrInfo *TII,
- bool &Advance,
- MachineBasicBlock::iterator &I) {
+bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
// Thumb1 doesn't have updating LDR/STR.
// FIXME: Use LDM/STM with single register instead.
if (isThumb1) return false;
- MachineInstr *MI = MBBI;
- unsigned Base = MI->getOperand(1).getReg();
- bool BaseKill = MI->getOperand(1).isKill();
+ unsigned Base = getLoadStoreBaseOp(*MI).getReg();
+ bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
unsigned Bytes = getLSMultipleTransferSize(MI);
unsigned Opcode = MI->getOpcode();
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc DL = MI->getDebugLoc();
bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
return false;
- bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
+ bool isLd = isLoadSingle(Opcode);
// Can't do the merge if the destination register is the same as the would-be
// writeback register.
if (MI->getOperand(0).getReg() == Base)
unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
// Try merging with the previous instruction.
+ MachineBasicBlock &MBB = *MI->getParent();
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ MachineBasicBlock::iterator MBBI(MI);
if (MBBI != BeginMBBI) {
MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
}
if (DoMerge) {
NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
- if (NextMBBI == I) {
- Advance = true;
- ++I;
- }
MBB.erase(NextMBBI);
}
}
// updating load/store-multiple instructions can be used with only one
// register.)
MachineOperand &MO = MI->getOperand(0);
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
.addReg(Base, getDefRegState(true)) // WB base register
.addReg(Base, getKillRegState(isLd ? BaseKill : false))
.addImm(Pred).addReg(PredReg)
// LDR_PRE, LDR_POST
if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
} else {
int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
.addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
}
} else {
int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
// t2LDR_PRE, t2LDR_POST
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
}
if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
// STR_PRE, STR_POST
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
.addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
} else {
int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
// t2STR_PRE, t2STR_POST
- BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+ BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
}
return false;
}
-/// Advance register scavenger to just before the earliest memory op that is
-/// being merged.
-void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
- MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
- unsigned Position = MemOps[0].Position;
- for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
- if (MemOps[i].Position < Position) {
- Position = MemOps[i].Position;
- Loc = MemOps[i].MBBI;
- }
- }
-
- if (Loc != MBB.begin())
- RS->forward(std::prev(Loc));
-}
-
static void InsertLDR_STR(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
int Offset, bool isDef,
- DebugLoc dl, unsigned NewOpc,
+ DebugLoc DL, unsigned NewOpc,
unsigned Reg, bool RegDeadKill, bool RegUndef,
unsigned BaseReg, bool BaseKill, bool BaseUndef,
bool OffKill, bool OffUndef,
if (!Errata602117 && !NonConsecutiveRegs)
return false;
- MachineBasicBlock::iterator NewBBI = MBBI;
bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
bool EvenDeadKill = isLd ?
getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
++NumSTRD2STM;
}
- NewBBI = std::prev(MBBI);
} else {
// Split into two instructions.
unsigned NewOpc = (isLd)
OddReg, OddDeadKill, false,
BaseReg, false, BaseUndef, false, OffUndef,
Pred, PredReg, TII, isT2);
- NewBBI = std::prev(MBBI);
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, false,
BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
EvenReg, EvenDeadKill, EvenUndef,
BaseReg, false, BaseUndef, false, OffUndef,
Pred, PredReg, TII, isT2);
- NewBBI = std::prev(MBBI);
InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
OddReg, OddDeadKill, OddUndef,
BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
++NumSTRD2STR;
}
- MBB.erase(MI);
- MBBI = NewBBI;
+ MBBI = MBB.erase(MBBI);
return true;
}
/// An optimization pass to turn multiple LDR / STR ops of the same base and
/// incrementing offset into LDM / STM ops.
bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
- unsigned NumMerges = 0;
- unsigned NumMemOps = 0;
MemOpQueue MemOps;
unsigned CurrBase = 0;
unsigned CurrOpc = ~0u;
ARMCC::CondCodes CurrPred = ARMCC::AL;
unsigned CurrPredReg = 0;
unsigned Position = 0;
- SmallVector<MachineBasicBlock::iterator,4> Merges;
+ assert(Candidates.size() == 0);
+ LiveRegsValid = false;
- RS->enterBasicBlock(&MBB);
- MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
- while (MBBI != E) {
+ for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
+ I = MBBI) {
+ // The instruction in front of the iterator is the one we look at.
+ MBBI = std::prev(I);
if (FixInvalidRegPairOp(MBB, MBBI))
continue;
+ ++Position;
- bool Advance = false;
- bool TryMerge = false;
-
- bool isMemOp = isMemoryOp(MBBI);
- if (isMemOp) {
+ if (isMemoryOp(MBBI)) {
unsigned Opcode = MBBI->getOpcode();
unsigned Size = getLSMultipleTransferSize(MBBI);
const MachineOperand &MO = MBBI->getOperand(0);
unsigned Reg = MO.getReg();
- bool isKill = MO.isDef() ? false : MO.isKill();
- unsigned Base = MBBI->getOperand(1).getReg();
+ unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
int Offset = getMemoryOpOffset(MBBI);
- // Watch out for:
- // r4 := ldr [r5]
- // r5 := ldr [r5, #4]
- // r6 := ldr [r5, #8]
- //
- // The second ldr has effectively broken the chain even though it
- // looks like the later ldr(s) use the same base register. Try to
- // merge the ldr's so far, including this one. But don't try to
- // combine the following ldr(s).
- bool Clobber = isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg();
-
- // Watch out for:
- // r4 := ldr [r0, #8]
- // r4 := ldr [r0, #4]
- //
- // The optimization may reorder the second ldr in front of the first
- // ldr, which violates write after write(WAW) dependence. The same as
- // str. Try to merge inst(s) already in MemOps.
- bool Overlap = false;
- for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
- if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
- Overlap = true;
- break;
- }
- }
-
- if (CurrBase == 0 && !Clobber) {
+ if (CurrBase == 0) {
// Start of a new chain.
CurrBase = Base;
CurrOpc = Opcode;
CurrSize = Size;
CurrPred = Pred;
CurrPredReg = PredReg;
- MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
- ++NumMemOps;
- Advance = true;
- } else if (!Overlap) {
- if (Clobber) {
- TryMerge = true;
- Advance = true;
+ MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
+ continue;
+ }
+ // Note: No need to match PredReg in the next if.
+ if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
+ // Watch out for:
+ // r4 := ldr [r0, #8]
+ // r4 := ldr [r0, #4]
+ // or
+ // r0 := ldr [r0]
+ // If a load overrides the base register or a register loaded by
+ // another load in our chain, we cannot take this instruction.
+ bool Overlap = false;
+ if (isLoadSingle(Opcode)) {
+ Overlap = (Base == Reg);
+ if (!Overlap) {
+ for (const MemOpQueueEntry &E : MemOps) {
+ if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
+ Overlap = true;
+ break;
+ }
+ }
+ }
}
- if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
- // No need to match PredReg.
- // Continue adding to the queue.
+ if (!Overlap) {
+ // Check offset and sort memory operation into the current chain.
if (Offset > MemOps.back().Offset) {
- MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
- Position, MBBI));
- ++NumMemOps;
- Advance = true;
+ MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
+ continue;
} else {
- for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
- I != E; ++I) {
- if (Offset < I->Offset) {
- MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
- Position, MBBI));
- ++NumMemOps;
- Advance = true;
+ MemOpQueue::iterator MI, ME;
+ for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
+ if (Offset < MI->Offset) {
+ // Found a place to insert.
break;
- } else if (Offset == I->Offset) {
- // Collision! This can't be merged!
+ }
+ if (Offset == MI->Offset) {
+ // Collision, abort.
+ MI = ME;
break;
}
}
+ if (MI != MemOps.end()) {
+ MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
+ continue;
+ }
}
}
}
- }
-
- if (MBBI->isDebugValue()) {
- ++MBBI;
- if (MBBI == E)
- // Reach the end of the block, try merging the memory instructions.
- TryMerge = true;
- } else if (Advance) {
- ++Position;
- ++MBBI;
- if (MBBI == E)
- // Reach the end of the block, try merging the memory instructions.
- TryMerge = true;
- } else {
- TryMerge = true;
- }
- if (TryMerge) {
- if (NumMemOps > 1) {
- // Try to find a free register to use as a new base in case it's needed.
- // First advance to the instruction just before the start of the chain.
- AdvanceRS(MBB, MemOps);
-
- // Find a scratch register.
- unsigned Scratch =
- RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
-
- // Process the load / store instructions.
- RS->forward(std::prev(MBBI));
-
- // Merge ops.
- Merges.clear();
- MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
- CurrPred, CurrPredReg, Scratch, MemOps, Merges);
-
- // Try folding preceding/trailing base inc/dec into the generated
- // LDM/STM ops.
- for (unsigned i = 0, e = Merges.size(); i < e; ++i)
- if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
- ++NumMerges;
- NumMerges += Merges.size();
-
- // Try folding preceding/trailing base inc/dec into those load/store
- // that were not merged to form LDM/STM ops.
- for (unsigned i = 0; i != NumMemOps; ++i)
- if (!MemOps[i].Merged)
- if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
- ++NumMerges;
-
- // RS may be pointing to an instruction that's deleted.
- RS->skipTo(std::prev(MBBI));
- } else if (NumMemOps == 1) {
- // Try folding preceding/trailing base inc/dec into the single
- // load/store.
- if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
- ++NumMerges;
- RS->forward(std::prev(MBBI));
- }
- }
+ // Don't advance the iterator; The op will start a new chain next.
+ MBBI = I;
+ --Position;
+ // Fallthrough to look into existing chain.
+ } else if (MBBI->isDebugValue())
+ continue;
+ // If we are here then the chain is broken; Extract candidates for a merge.
+ if (MemOps.size() > 0) {
+ FormCandidates(MemOps);
+ // Reset for the next chain.
CurrBase = 0;
CurrOpc = ~0u;
CurrSize = 0;
CurrPred = ARMCC::AL;
CurrPredReg = 0;
- if (NumMemOps) {
- MemOps.clear();
- NumMemOps = 0;
- }
+ MemOps.clear();
+ }
+ }
+ if (MemOps.size() > 0)
+ FormCandidates(MemOps);
- // If iterator hasn't been advanced and this is not a memory op, skip it.
- // It can't start a new chain anyway.
- if (!Advance && !isMemOp && MBBI != E) {
- ++Position;
- ++MBBI;
+ // Sort candidates so they get processed from end to begin of the basic
+ // block later; This is necessary for liveness calculation.
+ auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
+ return M0->InsertPos < M1->InsertPos;
+ };
+ std::sort(Candidates.begin(), Candidates.end(), LessThan);
+
+ // Go through list of candidates and merge.
+ bool Changed = false;
+ for (const MergeCandidate *Candidate : Candidates) {
+ if (Candidate->Instrs.size() > 1) {
+ MachineInstr *Merged = MergeOpsUpdate(*Candidate);
+ // Merge preceding/trailing base inc/dec into the merged op.
+ if (Merged) {
+ MergeBaseUpdateLSMultiple(Merged);
+ Changed = true;
+ } else {
+ for (MachineInstr *MI : Candidate->Instrs) {
+ if (MergeBaseUpdateLoadStore(MI))
+ Changed = true;
+ }
}
+ } else {
+ assert(Candidate->Instrs.size() == 1);
+ if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
+ Changed = true;
}
}
- return NumMerges > 0;
+ Candidates.clear();
+
+ return Changed;
}
/// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
}
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ MF = &Fn;
STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
TL = STI->getTargetLowering();
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
- RS = new RegScavenger();
+ MRI = &Fn.getRegInfo();
+ RegClassInfoValid = false;
isThumb2 = AFI->isThumb2Function();
isThumb1 = AFI->isThumbFunction() && !isThumb2;
Modified |= MergeReturnIntoLDM(MBB);
}
- delete RS;
+ Allocator.Reset();
return Modified;
}
continue;
int Opc = MI->getOpcode();
- bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
+ bool isLd = isLoadSingle(Opc);
unsigned Base = MI->getOperand(1).getReg();
int Offset = getMemoryOpOffset(MI);