/// Index into the basic block where the merged instruction will be
/// inserted. (See MemOpQueueEntry.Position)
unsigned InsertPos;
- /// Whether the instructions can be merged into a ldm/stm instruction.
- bool CanMergeToLSMulti;
- /// Whether the instructions can be merged into a ldrd/strd instruction.
- bool CanMergeToLSDouble;
};
BumpPtrAllocator Allocator;
SmallVector<const MergeCandidate*,4> Candidates;
MachineBasicBlock::iterator MBBI,
DebugLoc DL, unsigned Base, unsigned WordOffset,
ARMCC::CondCodes Pred, unsigned PredReg);
- MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
- bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
- DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
- MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
- bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
- DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
+ MachineInstr *MergeOps(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, int Offset,
+ unsigned Base, bool BaseKill, unsigned Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg, DebugLoc DL,
+ ArrayRef<std::pair<unsigned, bool>> Regs);
void FormCandidates(const MemOpQueue &MemOps);
MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
/// Create and insert a LDM or STM with Base as base register and registers in
/// Regs as the register operands that would be loaded / stored. It returns
/// true if the transformation is done.
-MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
- bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
- DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
+MachineInstr *
+ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, int Offset,
+ unsigned Base, bool BaseKill, unsigned Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg, DebugLoc DL,
+ ArrayRef<std::pair<unsigned, bool>> Regs) {
unsigned NumRegs = Regs.size();
assert(NumRegs > 1);
return MIB.getInstr();
}
-MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
- bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
- DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
- bool IsLoad = isi32Load(Opcode);
- assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
- unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
-
- assert(Regs.size() == 2);
- MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
- TII->get(LoadStoreOpcode));
- if (IsLoad) {
- MIB.addReg(Regs[0].first, RegState::Define)
- .addReg(Regs[1].first, RegState::Define);
- } else {
- MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
- .addReg(Regs[1].first, getKillRegState(Regs[1].second));
- }
- MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
- return MIB.getInstr();
-}
-
/// Call MergeOps and update MemOps and merges accordingly on success.
MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
const MachineInstr *First = Cand.Instrs.front();
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
DebugLoc DL = First->getDebugLoc();
- MachineInstr *Merged = nullptr;
- if (Cand.CanMergeToLSDouble)
- Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
- Opcode, Pred, PredReg, DL, Regs);
- if (!Merged && Cand.CanMergeToLSMulti)
- Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
+ MachineInstr *Merged = MergeOps(MBB, InsertBefore, Offset, Base, BaseKill,
Opcode, Pred, PredReg, DL, Regs);
if (!Merged)
return nullptr;
return Merged;
}
-static bool isValidLSDoubleOffset(int Offset) {
- unsigned Value = abs(Offset);
- // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
- // multiplied by 4.
- return (Value % 4) == 0 && Value < 1024;
-}
-
/// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
const MachineInstr *FirstMI = MemOps[0].MI;
unsigned Latest = SIndex;
unsigned Earliest = SIndex;
unsigned Count = 1;
- bool CanMergeToLSDouble =
- STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
- // ARM errata 602117: LDRD with base in list may result in incorrect base
- // register when interrupted or faulted.
- if (STI->isCortexM3() && isi32Load(Opcode) &&
- PReg == getLoadStoreBaseOp(*MI).getReg())
- CanMergeToLSDouble = false;
-
- bool CanMergeToLSMulti = true;
- // On swift vldm/vstm starting with an odd register number as that needs
- // more uops than single vldrs.
- if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
- CanMergeToLSMulti = false;
-
- // Merge following instructions where possible.
+
+ // Merge additional instructions fulfilling LDM/STM constraints.
for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
int NewOffset = MemOps[I].Offset;
if (NewOffset != Offset + (int)Size)
break;
const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
unsigned Reg = MO.getReg();
+ if (Reg == ARM::SP)
+ break;
unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
-
- // See if the current load/store may be part of a multi load/store.
- bool PartOfLSMulti = CanMergeToLSMulti;
- if (PartOfLSMulti) {
- // Cannot load from SP
- if (Reg == ARM::SP)
- PartOfLSMulti = false;
- // Register numbers must be in ascending order.
- else if (RegNum <= PRegNum)
- PartOfLSMulti = false;
- // For VFP / NEON load/store multiples, the registers must be
- // consecutive and within the limit on the number of registers per
- // instruction.
- else if (!isNotVFP && RegNum != PRegNum+1)
- PartOfLSMulti = false;
- }
- // See if the current load/store may be part of a double load/store.
- bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
-
- if (!PartOfLSMulti && !PartOfLSDouble)
+ // Register numbers must be in ascending order.
+ if (RegNum <= PRegNum)
break;
- CanMergeToLSMulti &= PartOfLSMulti;
- CanMergeToLSDouble &= PartOfLSDouble;
+ // For VFP / NEON load/store multiples, the registers must be consecutive
+ // and within the limit on the number of registers per instruction.
+ if (!isNotVFP && RegNum != PRegNum+1)
+ break;
+ // On Swift we don't want vldm/vstm to start with a odd register num
+ // because Q register unaligned vldm/vstm need more uops.
+ if (!isNotVFP && STI->isSwift() && Count == 1 && (PRegNum % 2) == 1)
+ break;
+
// Track MemOp with latest and earliest position (Positions are
// counted in reverse).
unsigned Position = MemOps[I].Position;
Candidate->LatestMIIdx = Latest - SIndex;
Candidate->EarliestMIIdx = Earliest - SIndex;
Candidate->InsertPos = MemOps[Latest].Position;
- if (Count == 1)
- CanMergeToLSMulti = CanMergeToLSDouble = false;
- Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
- Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
Candidates.push_back(Candidate);
// Continue after the chain.
SIndex += Count;
// Go through list of candidates and merge.
bool Changed = false;
for (const MergeCandidate *Candidate : Candidates) {
- if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
+ if (Candidate->Instrs.size() > 1) {
MachineInstr *Merged = MergeOpsUpdate(*Candidate);
// Merge preceding/trailing base inc/dec into the merged op.
if (Merged) {
+ MergeBaseUpdateLSMultiple(Merged);
Changed = true;
- unsigned Opcode = Merged->getOpcode();
- if (Opcode != ARM::t2STRDi8 && Opcode != ARM::t2LDRDi8)
- MergeBaseUpdateLSMultiple(Merged);
} else {
for (MachineInstr *MI : Candidate->Instrs) {
if (MergeBaseUpdateLoadStore(MI))