#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
STATISTIC(NumLDMGened , "Number of ldm instructions generated");
STATISTIC(NumSTMGened , "Number of stm instructions generated");
-STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
-STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
+STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
+STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
MachineBasicBlock::iterator MBBI;
bool Merged;
MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
- : Offset(o), Position(p), MBBI(i), Merged(false) {};
+ : Offset(o), Position(p), MBBI(i), Merged(false) {}
};
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
typedef MemOpQueue::iterator MemOpQueueIter;
int Offset, unsigned Base, bool BaseKill, int Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
+ void MergeOpsUpdate(MachineBasicBlock &MBB,
+ MemOpQueue &MemOps,
+ unsigned memOpsBegin,
+ unsigned memOpsEnd,
+ unsigned insertAfter,
+ int Offset,
+ unsigned Base,
+ bool BaseKill,
+ int Opcode,
+ ARMCC::CondCodes Pred,
+ unsigned PredReg,
+ unsigned Scratch,
+ DebugLoc dl,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges);
void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
int Opcode, unsigned Size,
ARMCC::CondCodes Pred, unsigned PredReg,
case ARM::t2STRi12:
NumSTMGened++;
return ARM::t2STM;
- case ARM::FLDS:
- NumFLDMGened++;
- return ARM::FLDMS;
- case ARM::FSTS:
- NumFSTMGened++;
- return ARM::FSTMS;
- case ARM::FLDD:
- NumFLDMGened++;
- return ARM::FLDMD;
- case ARM::FSTD:
- NumFSTMGened++;
- return ARM::FSTMD;
+ case ARM::VLDRS:
+ NumVLDMGened++;
+ return ARM::VLDMS;
+ case ARM::VSTRS:
+ NumVSTMGened++;
+ return ARM::VSTMS;
+ case ARM::VLDRD:
+ NumVLDMGened++;
+ return ARM::VLDMD;
+ case ARM::VSTRD:
+ NumVSTMGened++;
+ return ARM::VSTMD;
default: llvm_unreachable("Unhandled opcode!");
}
return 0;
BaseKill = true; // New base is always killed right its use.
}
- bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
- bool isDef = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+ bool isDPR = Opcode == ARM::VLDRD || Opcode == ARM::VSTRD;
+ bool isDef = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
Opcode = getLoadStoreMultipleOpcode(Opcode);
MachineInstrBuilder MIB = (isAM4)
? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
return true;
}
+// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
+// success.
+void ARMLoadStoreOpt::
+MergeOpsUpdate(MachineBasicBlock &MBB,
+ MemOpQueue &memOps,
+ unsigned memOpsBegin,
+ unsigned memOpsEnd,
+ unsigned insertAfter,
+ int Offset,
+ unsigned Base,
+ bool BaseKill,
+ int Opcode,
+ ARMCC::CondCodes Pred,
+ unsigned PredReg,
+ unsigned Scratch,
+ DebugLoc dl,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+ // First calculate which of the registers should be killed by the merged
+ // instruction.
+ SmallVector<std::pair<unsigned, bool>, 8> Regs;
+ const unsigned insertPos = memOps[insertAfter].Position;
+ for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
+ const MachineOperand &MO = memOps[i].MBBI->getOperand(0);
+ unsigned Reg = MO.getReg();
+ bool isKill = MO.isKill();
+
+ // If we are inserting the merged operation after an unmerged operation that
+ // uses the same register, make sure to transfer any kill flag.
+ for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j)
+ if (memOps[j].Position<insertPos) {
+ const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
+ if (MOJ.getReg() == Reg && MOJ.isKill())
+ isKill = true;
+ }
+
+ Regs.push_back(std::make_pair(Reg, isKill));
+ }
+
+ // Try to do the merge.
+ MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
+ Loc++;
+ if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
+ Pred, PredReg, Scratch, dl, Regs))
+ return;
+
+ // Merge succeeded, update records.
+ Merges.push_back(prior(Loc));
+ for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
+ // Remove kill flags from any unmerged memops that come before insertPos.
+ if (Regs[i-memOpsBegin].second)
+ for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j)
+ if (memOps[j].Position<insertPos) {
+ MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
+ if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill())
+ MOJ.setIsKill(false);
+ }
+ MBB.erase(memOps[i].MBBI);
+ memOps[i].Merged = true;
+ }
+}
+
/// MergeLDR_STR - Merge a number of load / store instructions into one or more
/// load / store multiple instructions.
void
bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
int Offset = MemOps[SIndex].Offset;
int SOffset = Offset;
- unsigned Pos = MemOps[SIndex].Position;
+ unsigned insertAfter = SIndex;
MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
DebugLoc dl = Loc->getDebugLoc();
- unsigned PReg = Loc->getOperand(0).getReg();
- unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
- bool isKill = Loc->getOperand(0).isKill();
+ const MachineOperand &PMO = Loc->getOperand(0);
+ unsigned PReg = PMO.getReg();
+ unsigned PRegNum = PMO.isUndef() ? UINT_MAX
+ : ARMRegisterInfo::getRegisterNumbering(PReg);
- SmallVector<std::pair<unsigned,bool>, 8> Regs;
- Regs.push_back(std::make_pair(PReg, isKill));
for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
int NewOffset = MemOps[i].Offset;
- unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
- unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
- isKill = MemOps[i].MBBI->getOperand(0).isKill();
+ const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
+ unsigned Reg = MO.getReg();
+ unsigned RegNum = MO.isUndef() ? UINT_MAX
+ : ARMRegisterInfo::getRegisterNumbering(Reg);
// AM4 - register numbers in ascending order.
// AM5 - consecutive register numbers in ascending order.
- if (NewOffset == Offset + (int)Size &&
+ if (Reg != ARM::SP &&
+ NewOffset == Offset + (int)Size &&
((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
Offset += Size;
- Regs.push_back(std::make_pair(Reg, isKill));
PRegNum = RegNum;
} else {
// Can't merge this in. Try merge the earlier ones first.
- if (MergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg,
- Scratch, dl, Regs)) {
- Merges.push_back(prior(Loc));
- for (unsigned j = SIndex; j < i; ++j) {
- MBB.erase(MemOps[j].MBBI);
- MemOps[j].Merged = true;
- }
- }
+ MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
+ Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
MemOps, Merges);
return;
}
- if (MemOps[i].Position > Pos) {
- Pos = MemOps[i].Position;
- Loc = MemOps[i].MBBI;
- }
+ if (MemOps[i].Position > MemOps[insertAfter].Position)
+ insertAfter = i;
}
bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
- if (MergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg,
- Scratch, dl, Regs)) {
- Merges.push_back(prior(Loc));
- for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
- MBB.erase(MemOps[i].MBBI);
- MemOps[i].Merged = true;
- }
- }
-
+ MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
+ Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
return;
}
case ARM::t2LDRi12:
case ARM::t2STRi8:
case ARM::t2STRi12:
- case ARM::FLDS:
- case ARM::FSTS:
+ case ARM::VLDRS:
+ case ARM::VSTRS:
return 4;
- case ARM::FLDD:
- case ARM::FSTD:
+ case ARM::VLDRD:
+ case ARM::VSTRD:
return 8;
case ARM::LDM:
case ARM::STM:
case ARM::t2LDM:
case ARM::t2STM:
return (MI->getNumOperands() - 5) * 4;
- case ARM::FLDMS:
- case ARM::FSTMS:
- case ARM::FLDMD:
- case ARM::FSTMD:
+ case ARM::VLDMS:
+ case ARM::VSTMS:
+ case ARM::VLDMD:
+ case ARM::VSTMD:
return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
}
}
/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
-/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
+/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
///
/// stmia rn, <ra, rb, rc>
/// rn := rn + 4 * 3;
}
if (MBBI != MBB.end()) {
- MachineBasicBlock::iterator NextMBBI = next(MBBI);
+ MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
}
}
} else {
- // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
+ // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
return false;
}
if (MBBI != MBB.end()) {
- MachineBasicBlock::iterator NextMBBI = next(MBBI);
+ MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
if (Mode == ARM_AM::ia &&
isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
switch (Opc) {
case ARM::LDR: return ARM::LDR_PRE;
case ARM::STR: return ARM::STR_PRE;
- case ARM::FLDS: return ARM::FLDMS;
- case ARM::FLDD: return ARM::FLDMD;
- case ARM::FSTS: return ARM::FSTMS;
- case ARM::FSTD: return ARM::FSTMD;
+ case ARM::VLDRS: return ARM::VLDMS;
+ case ARM::VLDRD: return ARM::VLDMD;
+ case ARM::VSTRS: return ARM::VSTMS;
+ case ARM::VSTRD: return ARM::VSTMD;
case ARM::t2LDRi8:
case ARM::t2LDRi12:
return ARM::t2LDR_PRE;
switch (Opc) {
case ARM::LDR: return ARM::LDR_POST;
case ARM::STR: return ARM::STR_POST;
- case ARM::FLDS: return ARM::FLDMS;
- case ARM::FLDD: return ARM::FLDMD;
- case ARM::FSTS: return ARM::FSTMS;
- case ARM::FSTD: return ARM::FSTMD;
+ case ARM::VLDRS: return ARM::VLDMS;
+ case ARM::VLDRD: return ARM::VLDMD;
+ case ARM::VSTRS: return ARM::VSTMS;
+ case ARM::VSTRD: return ARM::VSTMD;
case ARM::t2LDRi8:
case ARM::t2LDRi12:
return ARM::t2LDR_POST;
unsigned Bytes = getLSMultipleTransferSize(MI);
int Opcode = MI->getOpcode();
DebugLoc dl = MI->getDebugLoc();
- bool isAM5 = Opcode == ARM::FLDD || Opcode == ARM::FLDS ||
- Opcode == ARM::FSTD || Opcode == ARM::FSTS;
+ bool isAM5 = Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
+ Opcode == ARM::VSTRD || Opcode == ARM::VSTRS;
bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
return false;
if (MI->getOperand(2).getImm() != 0)
return false;
- bool isLd = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+ bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
// Can't do the merge if the destination register is the same as the would-be
// writeback register.
if (isLd && MI->getOperand(0).getReg() == Base)
}
if (!DoMerge && MBBI != MBB.end()) {
- MachineBasicBlock::iterator NextMBBI = next(MBBI);
+ MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
if (!isAM5 &&
isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
DoMerge = true;
if (!DoMerge)
return false;
- bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
+ bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
unsigned Offset = 0;
if (isAM5)
Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
if (isLd) {
if (isAM5)
- // FLDMS, FLDMD
+ // VLDMS, VLDMD
BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
.addReg(Base, getKillRegState(BaseKill))
.addImm(Offset).addImm(Pred).addReg(PredReg)
} else {
MachineOperand &MO = MI->getOperand(0);
if (isAM5)
- // FSTMS, FSTMD
+ // VSTMS, VSTMD
BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
.addImm(Pred).addReg(PredReg)
.addReg(Base, getDefRegState(true)) // WB base register
/// isMemoryOp - Returns true if instruction is a memory operations (that this
/// pass is capable of operating on).
static bool isMemoryOp(const MachineInstr *MI) {
+ if (MI->hasOneMemOperand()) {
+ const MachineMemOperand *MMO = *MI->memoperands_begin();
+
+ // Don't touch volatile memory accesses - we may be changing their order.
+ if (MMO->isVolatile())
+ return false;
+
+ // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
+ // not.
+ if (MMO->getAlignment() < 4)
+ return false;
+ }
+
+ // str <undef> could probably be eliminated entirely, but for now we just want
+ // to avoid making a mess of it.
+ // FIXME: Use str <undef> as a wildcard to enable better stm folding.
+ if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
+ MI->getOperand(0).isUndef())
+ return false;
+
+ // Likewise don't mess with references to undefined addresses.
+ if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
+ MI->getOperand(1).isUndef())
+ return false;
+
int Opcode = MI->getOpcode();
switch (Opcode) {
default: break;
case ARM::LDR:
case ARM::STR:
return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
- case ARM::FLDS:
- case ARM::FSTS:
+ case ARM::VLDRS:
+ case ARM::VSTRS:
return MI->getOperand(1).isReg();
- case ARM::FLDD:
- case ARM::FSTD:
+ case ARM::VLDRD:
+ case ARM::VSTRD:
return MI->getOperand(1).isReg();
case ARM::t2LDRi8:
case ARM::t2LDRi12:
BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
Pred, PredReg, TII, isT2);
} else {
+ if (OddReg == EvenReg && EvenDeadKill) {
+ // If the two source operands are the same, the kill marker is probably
+ // on the first one. e.g.
+ // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
+ EvenDeadKill = false;
+ OddDeadKill = true;
+ }
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, EvenUndef,
BaseReg, false, BaseUndef, OffReg, false, OffUndef,
if (!STI->hasV5TEOps())
return false;
- // FIXME: FLDS / FSTS -> FLDD / FSTD
+ // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
unsigned Scale = 1;
unsigned Opcode = Op0->getOpcode();
if (Opcode == ARM::LDR)
continue;
int Opc = MI->getOpcode();
- bool isLd = isi32Load(Opc) || Opc == ARM::FLDS || Opc == ARM::FLDD;
+ bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
unsigned Base = MI->getOperand(1).getReg();
int Offset = getMemoryOpOffset(MI);