X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FMachineTraceMetrics.cpp;h=8aacd1f31bb6f85717cb247d1ae3d334324e8752;hb=d6b76f9466f267ac5ec8ac0f3afa83da0d810490;hp=1c0894e15db5529465262b82d9b83583c4043221;hpb=79a20ce6f0d6c1041a5031aca41b50a1e58b1d4b;p=oota-llvm.git diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index 1c0894e15db..8aacd1f31bb 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -7,22 +7,26 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "early-ifcvt" -#include "MachineTraceMetrics.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SparseSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SparseSet.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; +#define DEBUG_TYPE "machine-trace-metrics" + char MachineTraceMetrics::ID = 0; char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID; @@ -34,8 +38,9 @@ INITIALIZE_PASS_END(MachineTraceMetrics, "machine-trace-metrics", "Machine Trace Metrics", false, true) MachineTraceMetrics::MachineTraceMetrics() - : MachineFunctionPass(ID), MF(0), TII(0), TRI(0), MRI(0), Loops(0) { - std::fill(Ensembles, array_endof(Ensembles), (Ensemble*)0); + : MachineFunctionPass(ID), MF(nullptr), TII(nullptr), TRI(nullptr), + MRI(nullptr), Loops(nullptr) { + std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr); } void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const { @@ -47,21 +52,24 @@ void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const { bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { MF = &Func; - TII = MF->getTarget().getInstrInfo(); - TRI = MF->getTarget().getRegisterInfo(); - ItinData = MF->getTarget().getInstrItineraryData(); + const TargetSubtargetInfo &ST = MF->getSubtarget(); + TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); MRI = &MF->getRegInfo(); Loops = &getAnalysis(); + SchedModel.init(ST.getSchedModel(), &ST, TII); BlockInfo.resize(MF->getNumBlockIDs()); + ProcResourceCycles.resize(MF->getNumBlockIDs() * + SchedModel.getNumProcResourceKinds()); return false; } void MachineTraceMetrics::releaseMemory() { - MF = 0; + MF = nullptr; BlockInfo.clear(); for (unsigned i = 0; i != TS_NumStrategies; ++i) { delete Ensembles[i]; - Ensembles[i] = 0; + Ensembles[i] = nullptr; } } @@ -81,22 +89,55 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { return FBI; // Compute resource usage in the block. - // FIXME: Compute per-functional unit counts. FBI->HasCalls = false; unsigned InstrCount = 0; - for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - const MachineInstr *MI = I; - if (MI->isTransient()) + + // Add up per-processor resource cycles as well. + unsigned PRKinds = SchedModel.getNumProcResourceKinds(); + SmallVector PRCycles(PRKinds); + + for (const auto &MI : *MBB) { + if (MI.isTransient()) continue; ++InstrCount; - if (MI->isCall()) + if (MI.isCall()) FBI->HasCalls = true; + + // Count processor resources used. + if (!SchedModel.hasInstrSchedModel()) + continue; + const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI); + if (!SC->isValid()) + continue; + + for (TargetSchedModel::ProcResIter + PI = SchedModel.getWriteProcResBegin(SC), + PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { + assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind"); + PRCycles[PI->ProcResourceIdx] += PI->Cycles; + } } FBI->InstrCount = InstrCount; + + // Scale the resource cycles so they are comparable. + unsigned PROffset = MBB->getNumber() * PRKinds; + for (unsigned K = 0; K != PRKinds; ++K) + ProcResourceCycles[PROffset + K] = + PRCycles[K] * SchedModel.getResourceFactor(K); + return FBI; } +ArrayRef +MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const { + assert(BlockInfo[MBBNum].hasResources() && + "getResources() must be called before getProcResourceCycles()"); + unsigned PRKinds = SchedModel.getNumProcResourceKinds(); + assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size()); + return makeArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds); +} + + //===----------------------------------------------------------------------===// // Ensemble utility functions //===----------------------------------------------------------------------===// @@ -104,6 +145,9 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct) : MTM(*ct) { BlockInfo.resize(MTM.BlockInfo.size()); + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds); + ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds); } // Virtual destructor serves as an anchor. @@ -119,21 +163,32 @@ MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const { void MachineTraceMetrics::Ensemble:: computeDepthResources(const MachineBasicBlock *MBB) { TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + unsigned PROffset = MBB->getNumber() * PRKinds; // Compute resources from trace above. The top block is simple. if (!TBI->Pred) { TBI->InstrDepth = 0; TBI->Head = MBB->getNumber(); + std::fill(ProcResourceDepths.begin() + PROffset, + ProcResourceDepths.begin() + PROffset + PRKinds, 0); return; } // Compute from the block above. A post-order traversal ensures the // predecessor is always computed first. - TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()]; + unsigned PredNum = TBI->Pred->getNumber(); + TraceBlockInfo *PredTBI = &BlockInfo[PredNum]; assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet"); const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred); TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount; TBI->Head = PredTBI->Head; + + // Compute per-resource depths. + ArrayRef PredPRDepths = getProcResourceDepths(PredNum); + ArrayRef PredPRCycles = MTM.getProcResourceCycles(PredNum); + for (unsigned K = 0; K != PRKinds; ++K) + ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K]; } // Update resource-related information in the TraceBlockInfo for MBB. @@ -141,22 +196,33 @@ computeDepthResources(const MachineBasicBlock *MBB) { void MachineTraceMetrics::Ensemble:: computeHeightResources(const MachineBasicBlock *MBB) { TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + unsigned PROffset = MBB->getNumber() * PRKinds; // Compute resources for the current block. TBI->InstrHeight = MTM.getResources(MBB)->InstrCount; + ArrayRef PRCycles = MTM.getProcResourceCycles(MBB->getNumber()); // The trace tail is done. if (!TBI->Succ) { TBI->Tail = MBB->getNumber(); + std::copy(PRCycles.begin(), PRCycles.end(), + ProcResourceHeights.begin() + PROffset); return; } // Compute from the block below. A post-order traversal ensures the // predecessor is always computed first. - TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()]; + unsigned SuccNum = TBI->Succ->getNumber(); + TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum]; assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet"); TBI->InstrHeight += SuccTBI->InstrHeight; TBI->Tail = SuccTBI->Tail; + + // Compute per-resource heights. + ArrayRef SuccPRHeights = getProcResourceHeights(SuccNum); + for (unsigned K = 0; K != PRKinds; ++K) + ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K]; } // Check if depth resources for MBB are valid and return the TBI. @@ -165,7 +231,7 @@ const MachineTraceMetrics::TraceBlockInfo* MachineTraceMetrics::Ensemble:: getDepthResources(const MachineBasicBlock *MBB) const { const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; - return TBI->hasValidDepth() ? TBI : 0; + return TBI->hasValidDepth() ? TBI : nullptr; } // Check if height resources for MBB are valid and return the TBI. @@ -174,7 +240,34 @@ const MachineTraceMetrics::TraceBlockInfo* MachineTraceMetrics::Ensemble:: getHeightResources(const MachineBasicBlock *MBB) const { const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; - return TBI->hasValidHeight() ? TBI : 0; + return TBI->hasValidHeight() ? TBI : nullptr; +} + +/// Get an array of processor resource depths for MBB. Indexed by processor +/// resource kind, this array contains the scaled processor resources consumed +/// by all blocks preceding MBB in its trace. It does not include instructions +/// in MBB. +/// +/// Compare TraceBlockInfo::InstrDepth. +ArrayRef +MachineTraceMetrics::Ensemble:: +getProcResourceDepths(unsigned MBBNum) const { + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size()); + return makeArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds); +} + +/// Get an array of processor resource heights for MBB. Indexed by processor +/// resource kind, this array contains the scaled processor resources consumed +/// by this block and all blocks following it in its trace. +/// +/// Compare TraceBlockInfo::InstrHeight. +ArrayRef +MachineTraceMetrics::Ensemble:: +getProcResourceHeights(unsigned MBBNum) const { + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size()); + return makeArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds); } //===----------------------------------------------------------------------===// @@ -205,9 +298,9 @@ static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) { // instructions. namespace { class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble { - const char *getName() const { return "MinInstr"; } - const MachineBasicBlock *pickTracePred(const MachineBasicBlock*); - const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*); + const char *getName() const override { return "MinInstr"; } + const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override; + const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override; public: MinInstrCountEnsemble(MachineTraceMetrics *mtm) @@ -219,20 +312,22 @@ public: const MachineBasicBlock* MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) { if (MBB->pred_empty()) - return 0; + return nullptr; const MachineLoop *CurLoop = getLoopFor(MBB); // Don't leave loops, and never follow back-edges. if (CurLoop && MBB == CurLoop->getHeader()) - return 0; + return nullptr; unsigned CurCount = MTM.getResources(MBB)->InstrCount; - const MachineBasicBlock *Best = 0; + const MachineBasicBlock *Best = nullptr; unsigned BestDepth = 0; for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) { const MachineBasicBlock *Pred = *I; const MachineTraceMetrics::TraceBlockInfo *PredTBI = getDepthResources(Pred); - assert(PredTBI && "Predecessor must be visited first"); + // Ignore cycles that aren't natural loops. + if (!PredTBI) + continue; // Pick the predecessor that would give this block the smallest InstrDepth. unsigned Depth = PredTBI->InstrDepth + CurCount; if (!Best || Depth < BestDepth) @@ -245,9 +340,9 @@ MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) { const MachineBasicBlock* MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) { if (MBB->pred_empty()) - return 0; + return nullptr; const MachineLoop *CurLoop = getLoopFor(MBB); - const MachineBasicBlock *Best = 0; + const MachineBasicBlock *Best = nullptr; unsigned BestHeight = 0; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { @@ -260,7 +355,9 @@ MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) { continue; const MachineTraceMetrics::TraceBlockInfo *SuccTBI = getHeightResources(Succ); - assert(SuccTBI && "Successor must be visited first"); + // Ignore cycles that aren't natural loops. + if (!SuccTBI) + continue; // Pick the successor that would give this block the smallest InstrHeight. unsigned Height = SuccTBI->InstrHeight; if (!Best || Height < BestHeight) @@ -314,6 +411,7 @@ void MachineTraceMetrics::verifyAnalysis() const { namespace { struct LoopBounds { MutableArrayRef Blocks; + SmallPtrSet Visited; const MachineLoopInfo *Loops; bool Downward; LoopBounds(MutableArrayRef blocks, @@ -338,21 +436,19 @@ public: if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth()) return false; // From is null once when To is the trace center block. - if (!From) - return true; - const MachineLoop *FromLoop = LB.Loops->getLoopFor(From); - if (!FromLoop) - return true; - // Don't follow backedges, don't leave FromLoop when going upwards. - if ((LB.Downward ? To : From) == FromLoop->getHeader()) - return false; - // Don't leave FromLoop. - if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To))) - return false; - // This is a new block. The PO traversal will compute height/depth - // resources, causing us to reject new edges to To. This only works because - // we reject back-edges, so the CFG is cycle-free. - return true; + if (From) { + if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) { + // Don't follow backedges, don't leave FromLoop when going upwards. + if ((LB.Downward ? To : From) == FromLoop->getHeader()) + return false; + // Don't leave FromLoop. + if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To))) + return false; + } + } + // To is a new block. Mark the block as visited in case the CFG has cycles + // that MachineLoopInfo didn't recognize as a natural loop. + return LB.Visited.insert(To).second; } }; } @@ -366,6 +462,7 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { // Run an upwards post-order search for the trace start. Bounds.Downward = false; + Bounds.Visited.clear(); typedef ipo_ext_iterator UpwardPO; for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds); I != E; ++I) { @@ -385,6 +482,7 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { // Run a downwards post-order search for the trace end. Bounds.Downward = true; + Bounds.Visited.clear(); typedef po_ext_iterator DownwardPO; for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds); I != E; ++I) { @@ -466,9 +564,8 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) { // invalidated, but their instructions will stay the same, so there is no // need to erase the Cycle entries. They will be overwritten when we // recompute. - for (MachineBasicBlock::const_iterator I = BadMBB->begin(), E = BadMBB->end(); - I != E; ++I) - Cycles.erase(I); + for (const auto &I : *BadMBB) + Cycles.erase(&I); } void MachineTraceMetrics::Ensemble::verify() const { @@ -525,7 +622,7 @@ struct DataDep { assert(TargetRegisterInfo::isVirtualRegister(VirtReg)); MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg); assert(!DefI.atEnd() && "Register has no defs"); - DefMI = &*DefI; + DefMI = DefI->getParent(); DefOp = DefI.getOperandNo(); assert((++DefI).atEnd() && "Register has multiple defs"); } @@ -588,7 +685,7 @@ struct LiveRegUnit { unsigned getSparseSetIndex() const { return RegUnit; } - LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(0), Op(0) {} + LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(nullptr), Op(0) {} }; } @@ -668,7 +765,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); // Ignore dependencies outside the current trace. const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()]; - if (!DefTBI.hasValidDepth() || DefTBI.Head != TBI.Head) + if (!DefTBI.isUsefulDominator(TBI)) continue; unsigned Len = LIR.Height + Cycles[DefMI].Depth; MaxLen = std::max(MaxLen, Len); @@ -704,25 +801,35 @@ computeInstrDepths(const MachineBasicBlock *MBB) { SmallVector Deps; while (!Stack.empty()) { MBB = Stack.pop_back_val(); - DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n"); + DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n"); TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; TBI.HasValidInstrDepths = true; TBI.CriticalPath = 0; + // Print out resource depths here as well. + DEBUG({ + dbgs() << format("%7u Instructions\n", TBI.InstrDepth); + ArrayRef PRDepths = getProcResourceDepths(MBB->getNumber()); + for (unsigned K = 0; K != PRDepths.size(); ++K) + if (PRDepths[K]) { + unsigned Factor = MTM.SchedModel.getResourceFactor(K); + dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K])) + << MTM.SchedModel.getProcResource(K)->Name << " (" + << PRDepths[K]/Factor << " ops x" << Factor << ")\n"; + } + }); + // Also compute the critical path length through MBB when possible. if (TBI.HasValidInstrHeights) TBI.CriticalPath = computeCrossBlockCriticalPath(TBI); - for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - const MachineInstr *UseMI = I; - + for (const auto &UseMI : *MBB) { // Collect all data dependencies. Deps.clear(); - if (UseMI->isPHI()) - getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI); - else if (getDataDeps(UseMI, Deps, MTM.MRI)) - updatePhysDepsDownwards(UseMI, Deps, RegUnits, MTM.TRI); + if (UseMI.isPHI()) + getPHIDeps(&UseMI, Deps, TBI.Pred, MTM.MRI); + else if (getDataDeps(&UseMI, Deps, MTM.MRI)) + updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI); // Filter and process dependencies, computing the earliest issue cycle. unsigned Cycle = 0; @@ -731,29 +838,27 @@ computeInstrDepths(const MachineBasicBlock *MBB) { const TraceBlockInfo&DepTBI = BlockInfo[Dep.DefMI->getParent()->getNumber()]; // Ignore dependencies from outside the current trace. - if (!DepTBI.hasValidDepth() || DepTBI.Head != TBI.Head) + if (!DepTBI.isUsefulDominator(TBI)) continue; assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency"); unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth; // Add latency if DefMI is a real instruction. Transients get latency 0. if (!Dep.DefMI->isTransient()) - DepCycle += MTM.TII->computeOperandLatency(MTM.ItinData, - Dep.DefMI, Dep.DefOp, - UseMI, Dep.UseOp, - /* FindMin = */ false); + DepCycle += MTM.SchedModel + .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp); Cycle = std::max(Cycle, DepCycle); } // Remember the instruction depth. - InstrCycles &MICycles = Cycles[UseMI]; + InstrCycles &MICycles = Cycles[&UseMI]; MICycles.Depth = Cycle; if (!TBI.HasValidInstrHeights) { - DEBUG(dbgs() << Cycle << '\t' << *UseMI); + DEBUG(dbgs() << Cycle << '\t' << UseMI); continue; } // Update critical path length. TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height); - DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *UseMI); + DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI); } } } @@ -763,7 +868,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) { // Height is the issue height computed from virtual register dependencies alone. static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, SparseSet &RegUnits, - const InstrItineraryData *ItinData, + const TargetSchedModel &SchedModel, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { SmallVector ReadOps; @@ -786,14 +891,9 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, unsigned DepHeight = I->Cycle; if (!MI->isTransient()) { // We may not know the UseMI of this dependency, if it came from the - // live-in list. - if (I->MI) - DepHeight += TII->computeOperandLatency(ItinData, - MI, MO.getOperandNo(), - I->MI, I->Op); - else - // No UseMI. Just use the MI latency instead. - DepHeight += TII->getInstrLatency(ItinData, MI); + // live-in list. SchedModel can handle a NULL UseMI. + DepHeight += SchedModel + .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op); } Height = std::max(Height, DepHeight); // This regunit is dead above MI. @@ -826,17 +926,17 @@ typedef DenseMap MIHeightMap; static bool pushDepHeight(const DataDep &Dep, const MachineInstr *UseMI, unsigned UseHeight, MIHeightMap &Heights, - const InstrItineraryData *ItinData, + const TargetSchedModel &SchedModel, const TargetInstrInfo *TII) { // Adjust height by Dep.DefMI latency. if (!Dep.DefMI->isTransient()) - UseHeight += TII->computeOperandLatency(ItinData, Dep.DefMI, Dep.DefOp, - UseMI, Dep.UseOp); + UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, + UseMI, Dep.UseOp); // Update Heights[DefMI] to be the maximum height seen. MIHeightMap::iterator I; bool New; - tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight)); + std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight)); if (New) return true; @@ -846,14 +946,14 @@ static bool pushDepHeight(const DataDep &Dep, return false; } -/// Assuming that DefMI was used by Trace.back(), add it to the live-in lists -/// of all the blocks in Trace. Stop when reaching the block that contains -/// DefMI. +/// Assuming that the virtual register defined by DefMI:DefOp was used by +/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop +/// when reaching the block that contains DefMI. void MachineTraceMetrics::Ensemble:: -addLiveIns(const MachineInstr *DefMI, +addLiveIns(const MachineInstr *DefMI, unsigned DefOp, ArrayRef Trace) { assert(!Trace.empty() && "Trace should contain at least one block"); - unsigned Reg = DefMI->getOperand(0).getReg(); + unsigned Reg = DefMI->getOperand(DefOp).getReg(); assert(TargetRegisterInfo::isVirtualRegister(Reg)); const MachineBasicBlock *DefMBB = DefMI->getParent(); @@ -924,18 +1024,42 @@ computeInstrHeights(const MachineBasicBlock *MBB) { TBI.HasValidInstrHeights = true; TBI.CriticalPath = 0; + DEBUG({ + dbgs() << format("%7u Instructions\n", TBI.InstrHeight); + ArrayRef PRHeights = getProcResourceHeights(MBB->getNumber()); + for (unsigned K = 0; K != PRHeights.size(); ++K) + if (PRHeights[K]) { + unsigned Factor = MTM.SchedModel.getResourceFactor(K); + dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K])) + << MTM.SchedModel.getProcResource(K)->Name << " (" + << PRHeights[K]/Factor << " ops x" << Factor << ")\n"; + } + }); + // Get dependencies from PHIs in the trace successor. - if (TBI.Succ) { - for (MachineBasicBlock::const_iterator - I = TBI.Succ->begin(), E = TBI.Succ->end(); - I != E && !I->isPHI(); ++I) { - const MachineInstr *PHI = I; + const MachineBasicBlock *Succ = TBI.Succ; + // If MBB is the last block in the trace, and it has a back-edge to the + // loop header, get loop-carried dependencies from PHIs in the header. For + // that purpose, pretend that all the loop header PHIs have height 0. + if (!Succ) + if (const MachineLoop *Loop = getLoopFor(MBB)) + if (MBB->isSuccessor(Loop->getHeader())) + Succ = Loop->getHeader(); + + if (Succ) { + for (const auto &PHI : *Succ) { + if (!PHI.isPHI()) + break; Deps.clear(); - getPHIDeps(PHI, Deps, MBB, MTM.MRI); - if (!Deps.empty()) - if (pushDepHeight(Deps.front(), PHI, Cycles.lookup(PHI).Height, - Heights, MTM.ItinData, MTM.TII)) - addLiveIns(Deps.front().DefMI, Stack); + getPHIDeps(&PHI, Deps, MBB, MTM.MRI); + if (!Deps.empty()) { + // Loop header PHI heights are all 0. + unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0; + DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI); + if (pushDepHeight(Deps.front(), &PHI, Height, + Heights, MTM.SchedModel, MTM.TII)) + addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack); + } } } @@ -962,12 +1086,12 @@ computeInstrHeights(const MachineBasicBlock *MBB) { // There may also be regunit dependencies to include in the height. if (HasPhysRegs) Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, - MTM.ItinData, MTM.TII, MTM.TRI); + MTM.SchedModel, MTM.TII, MTM.TRI); // Update the required height of any virtual registers read by MI. for (unsigned i = 0, e = Deps.size(); i != e; ++i) - if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.ItinData, MTM.TII)) - addLiveIns(Deps[i].DefMI, Stack); + if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.SchedModel, MTM.TII)) + addLiveIns(Deps[i].DefMI, Deps[i].DefOp, Stack); InstrCycles &MICycles = Cycles[MI]; MICycles.Height = Cycle; @@ -1017,6 +1141,124 @@ MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) { return Trace(*this, BlockInfo[MBB->getNumber()]); } +unsigned +MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const { + assert(MI && "Not an instruction."); + assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) && + "MI must be in the trace center block"); + InstrCycles Cyc = getInstrCycles(MI); + return getCriticalPath() - (Cyc.Depth + Cyc.Height); +} + +unsigned +MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { + const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum()); + SmallVector Deps; + getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI); + assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor"); + DataDep &Dep = Deps.front(); + unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth; + // Add latency if DefMI is a real instruction. Transients get latency 0. + if (!Dep.DefMI->isTransient()) + DepCycle += TE.MTM.SchedModel + .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp); + return DepCycle; +} + +/// When bottom is set include instructions in current block in estimate. +unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { + // Find the limiting processor resource. + // Numbers have been pre-scaled to be comparable. + unsigned PRMax = 0; + ArrayRef PRDepths = TE.getProcResourceDepths(getBlockNum()); + if (Bottom) { + ArrayRef PRCycles = TE.MTM.getProcResourceCycles(getBlockNum()); + for (unsigned K = 0; K != PRDepths.size(); ++K) + PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]); + } else { + for (unsigned K = 0; K != PRDepths.size(); ++K) + PRMax = std::max(PRMax, PRDepths[K]); + } + // Convert to cycle count. + PRMax = TE.MTM.getCycles(PRMax); + + /// All instructions before current block + unsigned Instrs = TBI.InstrDepth; + // plus instructions in current block + if (Bottom) + Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount; + if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) + Instrs /= IW; + // Assume issue width 1 without a schedule model. + return std::max(Instrs, PRMax); +} + +unsigned MachineTraceMetrics::Trace::getResourceLength( + ArrayRef Extrablocks, + ArrayRef ExtraInstrs, + ArrayRef RemoveInstrs) const { + // Add up resources above and below the center block. + ArrayRef PRDepths = TE.getProcResourceDepths(getBlockNum()); + ArrayRef PRHeights = TE.getProcResourceHeights(getBlockNum()); + unsigned PRMax = 0; + + // Capture computing cycles from extra instructions + auto extraCycles = [this](ArrayRef Instrs, + unsigned ResourceIdx) + ->unsigned { + unsigned Cycles = 0; + for (unsigned I = 0; I != Instrs.size(); ++I) { + const MCSchedClassDesc *SC = Instrs[I]; + if (!SC->isValid()) + continue; + for (TargetSchedModel::ProcResIter + PI = TE.MTM.SchedModel.getWriteProcResBegin(SC), + PE = TE.MTM.SchedModel.getWriteProcResEnd(SC); + PI != PE; ++PI) { + if (PI->ProcResourceIdx != ResourceIdx) + continue; + Cycles += + (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx)); + } + } + return Cycles; + }; + + for (unsigned K = 0; K != PRDepths.size(); ++K) { + unsigned PRCycles = PRDepths[K] + PRHeights[K]; + for (unsigned I = 0; I != Extrablocks.size(); ++I) + PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K]; + PRCycles += extraCycles(ExtraInstrs, K); + PRCycles -= extraCycles(RemoveInstrs, K); + PRMax = std::max(PRMax, PRCycles); + } + // Convert to cycle count. + PRMax = TE.MTM.getCycles(PRMax); + + // Instrs: #instructions in current trace outside current block. + unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight; + // Add instruction count from the extra blocks. + for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i) + Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount; + Instrs += ExtraInstrs.size(); + Instrs -= RemoveInstrs.size(); + if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) + Instrs /= IW; + // Assume issue width 1 without a schedule model. + return std::max(Instrs, PRMax); +} + +bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr *DefMI, + const MachineInstr *UseMI) const { + if (DefMI->getParent() == UseMI->getParent()) + return true; + + const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI->getParent()->getNumber()]; + const TraceBlockInfo &TBI = TE.BlockInfo[UseMI->getParent()->getNumber()]; + + return DepTBI.isUsefulDominator(TBI); +} + void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const { OS << getName() << " ensemble:\n"; for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {