//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "early-ifcvt"
+#define DEBUG_TYPE "machine-trace-metrics"
#include "MachineTraceMetrics.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/PostOrderIterator.h"
MF = &Func;
TII = MF->getTarget().getInstrInfo();
TRI = MF->getTarget().getRegisterInfo();
- ItinData = MF->getTarget().getInstrItineraryData();
MRI = &MF->getRegInfo();
Loops = &getAnalysis<MachineLoopInfo>();
+ const TargetSubtargetInfo &ST =
+ MF->getTarget().getSubtarget<TargetSubtargetInfo>();
+ SchedModel.init(*ST.getSchedModel(), &ST, TII);
BlockInfo.resize(MF->getNumBlockIDs());
return false;
}
const MachineBasicBlock *Pred = *I;
const MachineTraceMetrics::TraceBlockInfo *PredTBI =
getDepthResources(Pred);
- assert(PredTBI && "Predecessor must be visited first");
+ // Ignore cycles that aren't natural loops.
+ if (!PredTBI)
+ continue;
// Pick the predecessor that would give this block the smallest InstrDepth.
unsigned Depth = PredTBI->InstrDepth + CurCount;
if (!Best || Depth < BestDepth)
continue;
const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
getHeightResources(Succ);
- assert(SuccTBI && "Successor must be visited first");
+ // Ignore cycles that aren't natural loops.
+ if (!SuccTBI)
+ continue;
// Pick the successor that would give this block the smallest InstrHeight.
unsigned Height = SuccTBI->InstrHeight;
if (!Best || Height < BestHeight)
namespace {
struct LoopBounds {
MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks;
+ SmallPtrSet<const MachineBasicBlock*, 8> Visited;
const MachineLoopInfo *Loops;
bool Downward;
LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks,
if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
return false;
// From is null once when To is the trace center block.
- if (!From)
- return true;
- const MachineLoop *FromLoop = LB.Loops->getLoopFor(From);
- if (!FromLoop)
- return true;
- // Don't follow backedges, don't leave FromLoop when going upwards.
- if ((LB.Downward ? To : From) == FromLoop->getHeader())
- return false;
- // Don't leave FromLoop.
- if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
- return false;
- // This is a new block. The PO traversal will compute height/depth
- // resources, causing us to reject new edges to To. This only works because
- // we reject back-edges, so the CFG is cycle-free.
- return true;
+ if (From) {
+ if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) {
+ // Don't follow backedges, don't leave FromLoop when going upwards.
+ if ((LB.Downward ? To : From) == FromLoop->getHeader())
+ return false;
+ // Don't leave FromLoop.
+ if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
+ return false;
+ }
+ }
+ // To is a new block. Mark the block as visited in case the CFG has cycles
+ // that MachineLoopInfo didn't recognize as a natural loop.
+ return LB.Visited.insert(To);
}
};
}
// Run an upwards post-order search for the trace start.
Bounds.Downward = false;
+ Bounds.Visited.clear();
typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO;
for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds);
I != E; ++I) {
// Run a downwards post-order search for the trace end.
Bounds.Downward = true;
+ Bounds.Visited.clear();
typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO;
for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds);
I != E; ++I) {
}
}
+/// The length of the critical path through a trace is the maximum of two path
+/// lengths:
+///
+/// 1. The maximum height+depth over all instructions in the trace center block.
+///
+/// 2. The longest cross-block dependency chain. For small blocks, it is
+/// possible that the critical path through the trace doesn't include any
+/// instructions in the block.
+///
+/// This function computes the second number from the live-in list of the
+/// center block.
+unsigned MachineTraceMetrics::Ensemble::
+computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
+ assert(TBI.HasValidInstrDepths && "Missing depth info");
+ assert(TBI.HasValidInstrHeights && "Missing height info");
+ unsigned MaxLen = 0;
+ for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+ const LiveInReg &LIR = TBI.LiveIns[i];
+ if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg))
+ continue;
+ const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
+ // Ignore dependencies outside the current trace.
+ const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
+ if (!DefTBI.hasValidDepth() || DefTBI.Head != TBI.Head)
+ continue;
+ unsigned Len = LIR.Height + Cycles[DefMI].Depth;
+ MaxLen = std::max(MaxLen, Len);
+ }
+ return MaxLen;
+}
+
/// Compute instruction depths for all instructions above or in MBB in its
/// trace. This assumes that the trace through MBB has already been computed.
void MachineTraceMetrics::Ensemble::
DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n");
TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
TBI.HasValidInstrDepths = true;
+ TBI.CriticalPath = 0;
+
+ // Also compute the critical path length through MBB when possible.
+ if (TBI.HasValidInstrHeights)
+ TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
+
for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
I != E; ++I) {
const MachineInstr *UseMI = I;
unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
// Add latency if DefMI is a real instruction. Transients get latency 0.
if (!Dep.DefMI->isTransient())
- DepCycle += MTM.TII->computeOperandLatency(MTM.ItinData,
- Dep.DefMI, Dep.DefOp,
- UseMI, Dep.UseOp,
- /* FindMin = */ false);
+ DepCycle += MTM.SchedModel
+ .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp,
+ /* FindMin = */ false);
Cycle = std::max(Cycle, DepCycle);
}
// Remember the instruction depth.
- Cycles[UseMI].Depth = Cycle;
- DEBUG(dbgs() << Cycle << '\t' << *UseMI);
+ InstrCycles &MICycles = Cycles[UseMI];
+ MICycles.Depth = Cycle;
+
+ if (!TBI.HasValidInstrHeights) {
+ DEBUG(dbgs() << Cycle << '\t' << *UseMI);
+ continue;
+ }
+ // Update critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
+ DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *UseMI);
}
}
}
// Height is the issue height computed from virtual register dependencies alone.
static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
SparseSet<LiveRegUnit> &RegUnits,
- const InstrItineraryData *ItinData,
+ const TargetSchedModel &SchedModel,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
SmallVector<unsigned, 8> ReadOps;
unsigned DepHeight = I->Cycle;
if (!MI->isTransient()) {
// We may not know the UseMI of this dependency, if it came from the
- // live-in list.
- if (I->MI)
- DepHeight += TII->computeOperandLatency(ItinData,
- MI, MO.getOperandNo(),
- I->MI, I->Op);
- else
- // No UseMI. Just use the MI latency instead.
- DepHeight += TII->getInstrLatency(ItinData, MI);
+ // live-in list. SchedModel can handle a NULL UseMI.
+ DepHeight += SchedModel
+ .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op,
+ /* FindMin = */ false);
}
Height = std::max(Height, DepHeight);
// This regunit is dead above MI.
static bool pushDepHeight(const DataDep &Dep,
const MachineInstr *UseMI, unsigned UseHeight,
MIHeightMap &Heights,
- const InstrItineraryData *ItinData,
+ const TargetSchedModel &SchedModel,
const TargetInstrInfo *TII) {
// Adjust height by Dep.DefMI latency.
if (!Dep.DefMI->isTransient())
- UseHeight += TII->computeOperandLatency(ItinData, Dep.DefMI, Dep.DefOp,
- UseMI, Dep.UseOp);
+ UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
+ UseMI, Dep.UseOp, false);
// Update Heights[DefMI] to be the maximum height seen.
MIHeightMap::iterator I;
DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n");
TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
TBI.HasValidInstrHeights = true;
+ TBI.CriticalPath = 0;
// Get dependencies from PHIs in the trace successor.
- if (TBI.Succ) {
- for (MachineBasicBlock::const_iterator
- I = TBI.Succ->begin(), E = TBI.Succ->end();
- I != E && !I->isPHI(); ++I) {
+ const MachineBasicBlock *Succ = TBI.Succ;
+ // If MBB is the last block in the trace, and it has a back-edge to the
+ // loop header, get loop-carried dependencies from PHIs in the header. For
+ // that purpose, pretend that all the loop header PHIs have height 0.
+ if (!Succ)
+ if (const MachineLoop *Loop = getLoopFor(MBB))
+ if (MBB->isSuccessor(Loop->getHeader()))
+ Succ = Loop->getHeader();
+
+ if (Succ) {
+ for (MachineBasicBlock::const_iterator I = Succ->begin(), E = Succ->end();
+ I != E && I->isPHI(); ++I) {
const MachineInstr *PHI = I;
Deps.clear();
getPHIDeps(PHI, Deps, MBB, MTM.MRI);
- if (!Deps.empty())
- if (pushDepHeight(Deps.front(), PHI, Cycles.lookup(PHI).Height,
- Heights, MTM.ItinData, MTM.TII))
+ if (!Deps.empty()) {
+ // Loop header PHI heights are all 0.
+ unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0;
+ DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI);
+ if (pushDepHeight(Deps.front(), PHI, Height,
+ Heights, MTM.SchedModel, MTM.TII))
addLiveIns(Deps.front().DefMI, Stack);
+ }
}
}
// There may also be regunit dependencies to include in the height.
if (HasPhysRegs)
Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits,
- MTM.ItinData, MTM.TII, MTM.TRI);
-
- DEBUG(dbgs() << Cycle << '\t' << *MI);
- Cycles[MI].Height = Cycle;
+ MTM.SchedModel, MTM.TII, MTM.TRI);
// Update the required height of any virtual registers read by MI.
for (unsigned i = 0, e = Deps.size(); i != e; ++i)
- if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.ItinData, MTM.TII))
+ if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
addLiveIns(Deps[i].DefMI, Stack);
+
+ InstrCycles &MICycles = Cycles[MI];
+ MICycles.Height = Cycle;
+ if (!TBI.HasValidInstrDepths) {
+ DEBUG(dbgs() << Cycle << '\t' << *MI);
+ continue;
+ }
+ // Update critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
+ DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *MI);
}
// Update virtual live-in heights. They were added by addLiveIns() with a 0
<< '@' << RI->Cycle);
}
DEBUG(dbgs() << '\n');
+
+ if (!TBI.HasValidInstrDepths)
+ continue;
+ // Add live-ins to the critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath,
+ computeCrossBlockCriticalPath(TBI));
+ DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
}
}
return Trace(*this, BlockInfo[MBB->getNumber()]);
}
+unsigned
+MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const {
+ assert(MI && "Not an instruction.");
+ assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) &&
+ "MI must be in the trace center block");
+ InstrCycles Cyc = getInstrCycles(MI);
+ return getCriticalPath() - (Cyc.Depth + Cyc.Height);
+}
+
+unsigned
+MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
+ const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
+ SmallVector<DataDep, 1> Deps;
+ getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
+ assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
+ DataDep &Dep = Deps.front();
+ unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth;
+ // Add latency if DefMI is a real instruction. Transients get latency 0.
+ if (!Dep.DefMI->isTransient())
+ DepCycle += TE.MTM.SchedModel
+ .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp, false);
+ return DepCycle;
+}
+
+unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
+ // For now, we compute the resource depth from instruction count / issue
+ // width. Eventually, we should compute resource depth per functional unit
+ // and return the max.
+ unsigned Instrs = TBI.InstrDepth;
+ if (Bottom)
+ Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
+ if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
+ Instrs /= IW;
+ // Assume issue width 1 without a schedule model.
+ return Instrs;
+}
+
+unsigned MachineTraceMetrics::Trace::
+getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
+ unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
+ for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
+ Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
+ if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
+ Instrs /= IW;
+ // Assume issue width 1 without a schedule model.
+ return Instrs;
+}
+
void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
OS << getName() << " ensemble:\n";
for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
OS << " +instrs";
} else
OS << "height invalid";
+ if (HasValidInstrDepths && HasValidInstrHeights)
+ OS << ", crit=" << CriticalPath;
}
void MachineTraceMetrics::Trace::print(raw_ostream &OS) const {
<< " --> BB#" << TBI.Tail << ':';
if (TBI.hasValidHeight() && TBI.hasValidDepth())
OS << ' ' << getInstrCount() << " instrs.";
+ if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
+ OS << ' ' << TBI.CriticalPath << " cycles.";
const MachineTraceMetrics::TraceBlockInfo *Block = &TBI;
OS << "\nBB#" << MBBNum;