X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FMachineTraceMetrics.cpp;h=9b91af541db708000832a19ec352a0d91719e622;hb=ebd7eabca4c090175b71f221e880f8bd937a4523;hp=1c0894e15db5529465262b82d9b83583c4043221;hpb=79a20ce6f0d6c1041a5031aca41b50a1e58b1d4b;p=oota-llvm.git diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index 1c0894e15db..9b91af541db 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -7,15 +7,17 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "early-ifcvt" +#define DEBUG_TYPE "machine-trace-metrics" #include "MachineTraceMetrics.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/PostOrderIterator.h" @@ -49,9 +51,11 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { MF = &Func; TII = MF->getTarget().getInstrInfo(); TRI = MF->getTarget().getRegisterInfo(); - ItinData = MF->getTarget().getInstrItineraryData(); MRI = &MF->getRegInfo(); Loops = &getAnalysis(); + const TargetSubtargetInfo &ST = + MF->getTarget().getSubtarget(); + SchedModel.init(*ST.getSchedModel(), &ST, TII); BlockInfo.resize(MF->getNumBlockIDs()); return false; } @@ -232,7 +236,9 @@ MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) { const MachineBasicBlock *Pred = *I; const MachineTraceMetrics::TraceBlockInfo *PredTBI = getDepthResources(Pred); - assert(PredTBI && "Predecessor must be visited first"); + // Ignore cycles that aren't natural loops. + if (!PredTBI) + continue; // Pick the predecessor that would give this block the smallest InstrDepth. unsigned Depth = PredTBI->InstrDepth + CurCount; if (!Best || Depth < BestDepth) @@ -260,7 +266,9 @@ MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) { continue; const MachineTraceMetrics::TraceBlockInfo *SuccTBI = getHeightResources(Succ); - assert(SuccTBI && "Successor must be visited first"); + // Ignore cycles that aren't natural loops. + if (!SuccTBI) + continue; // Pick the successor that would give this block the smallest InstrHeight. unsigned Height = SuccTBI->InstrHeight; if (!Best || Height < BestHeight) @@ -314,6 +322,7 @@ void MachineTraceMetrics::verifyAnalysis() const { namespace { struct LoopBounds { MutableArrayRef Blocks; + SmallPtrSet Visited; const MachineLoopInfo *Loops; bool Downward; LoopBounds(MutableArrayRef blocks, @@ -338,21 +347,19 @@ public: if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth()) return false; // From is null once when To is the trace center block. - if (!From) - return true; - const MachineLoop *FromLoop = LB.Loops->getLoopFor(From); - if (!FromLoop) - return true; - // Don't follow backedges, don't leave FromLoop when going upwards. - if ((LB.Downward ? To : From) == FromLoop->getHeader()) - return false; - // Don't leave FromLoop. - if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To))) - return false; - // This is a new block. The PO traversal will compute height/depth - // resources, causing us to reject new edges to To. This only works because - // we reject back-edges, so the CFG is cycle-free. - return true; + if (From) { + if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) { + // Don't follow backedges, don't leave FromLoop when going upwards. + if ((LB.Downward ? To : From) == FromLoop->getHeader()) + return false; + // Don't leave FromLoop. + if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To))) + return false; + } + } + // To is a new block. Mark the block as visited in case the CFG has cycles + // that MachineLoopInfo didn't recognize as a natural loop. + return LB.Visited.insert(To); } }; } @@ -366,6 +373,7 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { // Run an upwards post-order search for the trace start. Bounds.Downward = false; + Bounds.Visited.clear(); typedef ipo_ext_iterator UpwardPO; for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds); I != E; ++I) { @@ -385,6 +393,7 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { // Run a downwards post-order search for the trace end. Bounds.Downward = true; + Bounds.Visited.clear(); typedef po_ext_iterator DownwardPO; for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds); I != E; ++I) { @@ -737,10 +746,9 @@ computeInstrDepths(const MachineBasicBlock *MBB) { unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth; // Add latency if DefMI is a real instruction. Transients get latency 0. if (!Dep.DefMI->isTransient()) - DepCycle += MTM.TII->computeOperandLatency(MTM.ItinData, - Dep.DefMI, Dep.DefOp, - UseMI, Dep.UseOp, - /* FindMin = */ false); + DepCycle += MTM.SchedModel + .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp, + /* FindMin = */ false); Cycle = std::max(Cycle, DepCycle); } // Remember the instruction depth. @@ -763,7 +771,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) { // Height is the issue height computed from virtual register dependencies alone. static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, SparseSet &RegUnits, - const InstrItineraryData *ItinData, + const TargetSchedModel &SchedModel, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { SmallVector ReadOps; @@ -786,14 +794,10 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, unsigned DepHeight = I->Cycle; if (!MI->isTransient()) { // We may not know the UseMI of this dependency, if it came from the - // live-in list. - if (I->MI) - DepHeight += TII->computeOperandLatency(ItinData, - MI, MO.getOperandNo(), - I->MI, I->Op); - else - // No UseMI. Just use the MI latency instead. - DepHeight += TII->getInstrLatency(ItinData, MI); + // live-in list. SchedModel can handle a NULL UseMI. + DepHeight += SchedModel + .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op, + /* FindMin = */ false); } Height = std::max(Height, DepHeight); // This regunit is dead above MI. @@ -826,12 +830,12 @@ typedef DenseMap MIHeightMap; static bool pushDepHeight(const DataDep &Dep, const MachineInstr *UseMI, unsigned UseHeight, MIHeightMap &Heights, - const InstrItineraryData *ItinData, + const TargetSchedModel &SchedModel, const TargetInstrInfo *TII) { // Adjust height by Dep.DefMI latency. if (!Dep.DefMI->isTransient()) - UseHeight += TII->computeOperandLatency(ItinData, Dep.DefMI, Dep.DefOp, - UseMI, Dep.UseOp); + UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, + UseMI, Dep.UseOp, false); // Update Heights[DefMI] to be the maximum height seen. MIHeightMap::iterator I; @@ -925,17 +929,29 @@ computeInstrHeights(const MachineBasicBlock *MBB) { TBI.CriticalPath = 0; // Get dependencies from PHIs in the trace successor. - if (TBI.Succ) { - for (MachineBasicBlock::const_iterator - I = TBI.Succ->begin(), E = TBI.Succ->end(); - I != E && !I->isPHI(); ++I) { + const MachineBasicBlock *Succ = TBI.Succ; + // If MBB is the last block in the trace, and it has a back-edge to the + // loop header, get loop-carried dependencies from PHIs in the header. For + // that purpose, pretend that all the loop header PHIs have height 0. + if (!Succ) + if (const MachineLoop *Loop = getLoopFor(MBB)) + if (MBB->isSuccessor(Loop->getHeader())) + Succ = Loop->getHeader(); + + if (Succ) { + for (MachineBasicBlock::const_iterator I = Succ->begin(), E = Succ->end(); + I != E && I->isPHI(); ++I) { const MachineInstr *PHI = I; Deps.clear(); getPHIDeps(PHI, Deps, MBB, MTM.MRI); - if (!Deps.empty()) - if (pushDepHeight(Deps.front(), PHI, Cycles.lookup(PHI).Height, - Heights, MTM.ItinData, MTM.TII)) + if (!Deps.empty()) { + // Loop header PHI heights are all 0. + unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0; + DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI); + if (pushDepHeight(Deps.front(), PHI, Height, + Heights, MTM.SchedModel, MTM.TII)) addLiveIns(Deps.front().DefMI, Stack); + } } } @@ -962,11 +978,11 @@ computeInstrHeights(const MachineBasicBlock *MBB) { // There may also be regunit dependencies to include in the height. if (HasPhysRegs) Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, - MTM.ItinData, MTM.TII, MTM.TRI); + MTM.SchedModel, MTM.TII, MTM.TRI); // Update the required height of any virtual registers read by MI. for (unsigned i = 0, e = Deps.size(); i != e; ++i) - if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.ItinData, MTM.TII)) + if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.SchedModel, MTM.TII)) addLiveIns(Deps[i].DefMI, Stack); InstrCycles &MICycles = Cycles[MI]; @@ -1017,6 +1033,54 @@ MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) { return Trace(*this, BlockInfo[MBB->getNumber()]); } +unsigned +MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const { + assert(MI && "Not an instruction."); + assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) && + "MI must be in the trace center block"); + InstrCycles Cyc = getInstrCycles(MI); + return getCriticalPath() - (Cyc.Depth + Cyc.Height); +} + +unsigned +MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { + const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum()); + SmallVector Deps; + getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI); + assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor"); + DataDep &Dep = Deps.front(); + unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth; + // Add latency if DefMI is a real instruction. Transients get latency 0. + if (!Dep.DefMI->isTransient()) + DepCycle += TE.MTM.SchedModel + .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp, false); + return DepCycle; +} + +unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { + // For now, we compute the resource depth from instruction count / issue + // width. Eventually, we should compute resource depth per functional unit + // and return the max. + unsigned Instrs = TBI.InstrDepth; + if (Bottom) + Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount; + if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) + Instrs /= IW; + // Assume issue width 1 without a schedule model. + return Instrs; +} + +unsigned MachineTraceMetrics::Trace:: +getResourceLength(ArrayRef Extrablocks) const { + unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight; + for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i) + Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount; + if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) + Instrs /= IW; + // Assume issue width 1 without a schedule model. + return Instrs; +} + void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const { OS << getName() << " ensemble:\n"; for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {