#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
const MachineLoopInfo &mli,
const MachineDominatorTree &mdt,
bool IsPostRAFlag,
+ bool RemoveKillFlags,
LiveIntervals *lis)
: ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis),
- IsPostRA(IsPostRAFlag), CanHandleTerminators(false), FirstDbgValue(0) {
+ IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags),
+ CanHandleTerminators(false), FirstDbgValue(0) {
assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
DbgValues.clear();
assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
RegionBegin = begin;
RegionEnd = end;
NumRegionInstrs = regioninstrs;
- MISUnitMap.clear();
-
- ScheduleDAG::clearDAG();
}
/// Close the current scheduling region. Don't clear any state in case the
/// this SUnit to following instructions in the same scheduling region that
/// depend the physical register referenced at OperIdx.
void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
- const MachineInstr *MI = SU->getInstr();
- const MachineOperand &MO = MI->getOperand(OperIdx);
+ MachineInstr *MI = SU->getInstr();
+ MachineOperand &MO = MI->getOperand(OperIdx);
// Optionally add output and anti dependencies. For anti
// dependencies we use a latency of 0 because for a multi-issue
// retrieve the existing SUnits list for this register's uses.
// Push this SUnit on the use list.
Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg()));
+ if (RemoveKillFlags)
+ MO.setIsKill(false);
}
else {
addPhysRegDataDeps(SU, OperIdx);
unsigned Reg = MI->getOperand(OperIdx).getReg();
// Record this local VReg use.
- VRegUses.insert(VReg2SUnit(Reg, SU));
+ VReg2UseMap::iterator UI = VRegUses.find(Reg);
+ for (; UI != VRegUses.end(); ++UI) {
+ if (UI->SU == SU)
+ break;
+ }
+ if (UI == VRegUses.end())
+ VRegUses.insert(VReg2SUnit(Reg, SU));
// Lookup this operand's reaching definition.
assert(LIS && "vreg dependencies requires LiveIntervals");
- LiveRangeQuery LRQ(LIS->getInterval(Reg), LIS->getInstructionIndex(MI));
+ LiveQueryResult LRQ
+ = LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI));
VNInfo *VNI = LRQ.valueIn();
// VNI will be valid because MachineOperand::readsReg() is checked by caller.
if (MIa == MIb)
return false;
+ // FIXME: Need to handle multiple memory operands to support all targets.
+ if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
+ return true;
+
if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI))
return true;
MachineMemOperand *MMOa = *MIa->memoperands_begin();
MachineMemOperand *MMOb = *MIb->memoperands_begin();
- // FIXME: Need to handle multiple memory operands to support all targets.
- if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
- llvm_unreachable("Multiple memory operands.");
-
// The following interface to AA is fashioned after DAGCombiner::isAlias
// and operates with MachineMemOperand offset with some important
// assumptions:
bool isNormalMemory = false) {
// If this is a false dependency,
// do not add the edge, but rememeber the rejected node.
- if (!EnableAASchedMI ||
- MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+ if (!AA || MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
Dep.setLatency(TrueMemOrderLatency);
SUb->addPred(Dep);
// Assign the Latency field of SU using target-provided information.
SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
+
+ // If this SUnit uses an unbuffered resource, mark it as such.
+ // These resources are used for in-order execution pipelines within an
+ // out-of-order core and are identified by BufferSize=1. BufferSize=0 is
+ // used for dispatch/issue groups and is not considered here.
+ if (SchedModel.hasInstrSchedModel()) {
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel.getWriteProcResBegin(SC),
+ PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
+ switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) {
+ case 0:
+ SU->hasReservedResource = true;
+ break;
+ case 1:
+ SU->isUnbuffered = true;
+ break;
+ default:
+ break;
+ }
+ }
+ }
}
}
-/// If RegPressure is non null, compute register pressure as a side effect. The
+/// If RegPressure is non-null, compute register pressure as a side effect. The
/// DAG builder is an efficient place to do it because it already visits
/// operands.
void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
- RegPressureTracker *RPTracker) {
+ RegPressureTracker *RPTracker,
+ PressureDiffs *PDiffs) {
+ const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+ bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
+ : ST.useAA();
+ AliasAnalysis *AAForDep = UseAA ? AA : 0;
+
+ MISUnitMap.clear();
+ ScheduleDAG::clearDAG();
+
// Create an SUnit for each real instruction.
initSUnits();
+ if (PDiffs)
+ PDiffs->init(SUnits.size());
+
// We build scheduling units by walking a block's instruction list from bottom
// to top.
DbgMI = MI;
continue;
}
+ SUnit *SU = MISUnitMap[MI];
+ assert(SU && "No SUnit mapped to this MI");
+
if (RPTracker) {
- RPTracker->recede();
+ PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : 0;
+ RPTracker->recede(/*LiveUses=*/0, PDiff);
assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI");
}
assert((CanHandleTerminators || (!MI->isTerminator() && !MI->isLabel())) &&
"Cannot schedule terminators or labels!");
- SUnit *SU = MISUnitMap[MI];
- assert(SU && "No SUnit mapped to this MI");
-
// Add register-based dependencies (data, anti, and output).
bool HasVRegDef = false;
for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
unsigned ChainLatency = 0;
if (AliasChain->getInstr()->mayLoad())
ChainLatency = TrueMemOrderLatency;
- addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes,
+ addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes,
ChainLatency);
}
AliasChain = SU;
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
- addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
+ addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes,
TrueMemOrderLatency);
for (MapVector<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
E = AliasMemDefs.end(); I != E; ++I)
- addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
+ addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes);
for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes,
+ addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes,
TrueMemOrderLatency);
}
adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
MapVector<const Value *, SUnit *>::iterator IE =
((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE) {
- addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
+ addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes,
+ 0, true);
I->second = SU;
} else {
if (ThisMayAlias)
((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
if (J != JE) {
for (unsigned i = 0, e = J->second.size(); i != e; ++i)
- addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes,
+ addChainDependency(AAForDep, MFI, SU, J->second[i], RejectMemNodes,
TrueMemOrderLatency, true);
J->second.clear();
}
// Add dependencies from all the PendingLoads, i.e. loads
// with no underlying object.
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
- addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
+ addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes,
TrueMemOrderLatency);
// Add dependence on alias chain, if needed.
if (AliasChain)
- addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
+ addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes);
// But we also should check dependent instructions for the
// SU in question.
adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
// potentially aliasing stores.
for (MapVector<const Value *, SUnit *>::iterator I =
AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
- addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
+ addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes);
PendingLoads.push_back(SU);
MayAlias = true;
MapVector<const Value *, SUnit *>::iterator IE =
((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE)
- addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
+ addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes,
+ 0, true);
if (ThisMayAlias)
AliasMemUses[V].push_back(SU);
else
adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
// Add dependencies on alias and barrier chains, if needed.
if (MayAlias && AliasChain)
- addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
+ addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes);
if (BarrierChain)
BarrierChain->addPred(SDep(SU, SDep::Barrier));
}
PendingLoads.clear();
}
-/// Compute the max cyclic critical path through the DAG. For loops that span
-/// basic blocks, MachineTraceMetrics should be used for this instead.
-unsigned ScheduleDAGInstrs::computeCyclicCriticalPath() {
- // This only applies to single block loop.
- if (!BB->isSuccessor(BB))
- return 0;
-
- unsigned MaxCyclicLatency = 0;
- // Visit each live out vreg def to find def/use pairs that cross iterations.
- for (SUnit::const_pred_iterator
- PI = ExitSU.Preds.begin(), PE = ExitSU.Preds.end(); PI != PE; ++PI) {
- MachineInstr *MI = PI->getSUnit()->getInstr();
+/// \brief Initialize register live-range state for updating kills.
+void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) {
+ // Start with no live registers.
+ LiveRegs.reset();
+
+ // Examine the live-in regs of all successors.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI) {
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ // Repeat, for reg and all subregs.
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ LiveRegs.set(*SubRegs);
+ }
+ }
+}
+
+bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) {
+ // Setting kill flag...
+ if (!MO.isKill()) {
+ MO.setIsKill(true);
+ return false;
+ }
+
+ // If MO itself is live, clear the kill flag...
+ if (LiveRegs.test(MO.getReg())) {
+ MO.setIsKill(false);
+ return false;
+ }
+
+ // If any subreg of MO is live, then create an imp-def for that
+ // subreg and keep MO marked as killed.
+ MO.setIsKill(false);
+ bool AllDead = true;
+ const unsigned SuperReg = MO.getReg();
+ MachineInstrBuilder MIB(MF, MI);
+ for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) {
+ if (LiveRegs.test(*SubRegs)) {
+ MIB.addReg(*SubRegs, RegState::ImplicitDefine);
+ AllDead = false;
+ }
+ }
+
+ if(AllDead)
+ MO.setIsKill(true);
+ return false;
+}
+
+// FIXME: Reuse the LivePhysRegs utility for this.
+void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
+
+ LiveRegs.resize(TRI->getNumRegs());
+ BitVector killedRegs(TRI->getNumRegs());
+
+ startBlockForKills(MBB);
+
+ // Examine block from end to start...
+ unsigned Count = MBB->size();
+ for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
+ I != E; --Count) {
+ MachineInstr *MI = --I;
+ if (MI->isDebugValue())
+ continue;
+
+ // Update liveness. Registers that are defed but not used in this
+ // instruction are now dead. Mark register and all subregs as they
+ // are completely defined.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef())
- break;
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask())
+ LiveRegs.clearBitsNotInMask(MO.getRegMask());
+ if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
- if (!Reg || TRI->isPhysicalRegister(Reg))
- continue;
+ if (Reg == 0) continue;
+ if (!MO.isDef()) continue;
+ // Ignore two-addr defs.
+ if (MI->isRegTiedToUseOperand(i)) continue;
+
+ // Repeat for reg and all subregs.
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ LiveRegs.reset(*SubRegs);
+ }
- const LiveInterval &LI = LIS->getInterval(Reg);
- unsigned LiveOutHeight = PI->getSUnit()->getHeight();
- unsigned LiveOutDepth = PI->getSUnit()->getDepth() + PI->getLatency();
- // Visit all local users of the vreg def.
- for (VReg2UseMap::iterator
- UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
- if (UI->SU == &ExitSU)
- continue;
+ // Examine all used registers and set/clear kill flag. When a
+ // register is used multiple times we only set the kill flag on
+ // the first use. Don't set kill flags on undef operands.
+ killedRegs.reset();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
+ unsigned Reg = MO.getReg();
+ if ((Reg == 0) || MRI.isReserved(Reg)) continue;
+
+ bool kill = false;
+ if (!killedRegs.test(Reg)) {
+ kill = true;
+ // A register is not killed if any subregs are live...
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ if (LiveRegs.test(*SubRegs)) {
+ kill = false;
+ break;
+ }
+ }
- // Only consider uses of the phi.
- LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(UI->SU->getInstr()));
- if (!LRQ.valueIn()->isPHIDef())
- continue;
+ // If subreg is not live, then register is killed if it became
+ // live in this instruction
+ if (kill)
+ kill = !LiveRegs.test(Reg);
+ }
- // Cheat a bit and assume that a path spanning two iterations is a
- // cycle, which could overestimate in strange cases. This allows cyclic
- // latency to be estimated as the minimum height or depth slack.
- unsigned CyclicLatency = 0;
- if (LiveOutDepth > UI->SU->getDepth())
- CyclicLatency = LiveOutDepth - UI->SU->getDepth();
- unsigned LiveInHeight = UI->SU->getHeight() + PI->getLatency();
- if (LiveInHeight > LiveOutHeight) {
- if (LiveInHeight - LiveOutHeight < CyclicLatency)
- CyclicLatency = LiveInHeight - LiveOutHeight;
- }
- else
- CyclicLatency = 0;
- DEBUG(dbgs() << "Cyclic Path: SU(" << PI->getSUnit()->NodeNum
- << ") -> SU(" << UI->SU->NodeNum << ") = "
- << CyclicLatency << "\n");
- if (CyclicLatency > MaxCyclicLatency)
- MaxCyclicLatency = CyclicLatency;
+ if (MO.isKill() != kill) {
+ DEBUG(dbgs() << "Fixing " << MO << " in ");
+ // Warning: toggleKillFlag may invalidate MO.
+ toggleKillFlag(MI, MO);
+ DEBUG(MI->dump());
}
+
+ killedRegs.set(Reg);
+ }
+
+ // Mark any used register (that is not using undef) and subregs as
+ // now live...
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
+ unsigned Reg = MO.getReg();
+ if ((Reg == 0) || MRI.isReserved(Reg)) continue;
+
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ LiveRegs.set(*SubRegs);
}
}
- DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "\n");
- return MaxCyclicLatency;
}
void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {