X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FSelectionDAG%2FScheduleDAGList.cpp;h=067407b1eb84e5f27bf13eb1cb69066df6b20c22;hb=b3bc6352defdf1a5c6b1b0770d0c4d603f6524a8;hp=9e6bd75475771fb6b7962e1380febb35ae8b89a5;hpb=49eee4a26f17b27d8949be3b6da7d9e75846be00;p=oota-llvm.git diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp index 9e6bd754757..067407b1eb8 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -2,15 +2,15 @@ // // The LLVM Compiler Infrastructure // -// This file was developed by Evan Cheng and is distributed under the -// University of Illinois Open Source License. See LICENSE.TXT for details. +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // -// This implements bottom-up and top-down list schedulers, using standard -// algorithms. The basic approach uses a priority queue of available nodes to -// schedule. One at a time, nodes are taken from the priority queue (thus in -// priority order), checked for legality to schedule, and emitted if legal. +// This implements a top-down list scheduler, using standard algorithms. +// The basic approach uses a priority queue of available nodes to schedule. +// One at a time, nodes are taken from the priority queue (thus in priority +// order), checked for legality to schedule, and emitted if legal. // // Nodes may not be legal to schedule either due to structural hazards (e.g. // pipeline or resource constraints) or because an input to the instruction has @@ -18,534 +18,242 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sched" +#define DEBUG_TYPE "pre-RA-sched" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/Statistic.h" #include -#include -#include -#include -#include using namespace llvm; -namespace { - Statistic<> NumNoops ("scheduler", "Number of noops inserted"); - Statistic<> NumStalls("scheduler", "Number of pipeline stalls"); - - /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or - /// a group of nodes flagged together. - struct SUnit { - SDNode *Node; // Representative node. - std::vector FlaggedNodes; // All nodes flagged to Node. - std::set Preds; // All real predecessors. - std::set ChainPreds; // All chain predecessors. - std::set Succs; // All real successors. - std::set ChainSuccs; // All chain successors. - short NumPredsLeft; // # of preds not scheduled. - short NumSuccsLeft; // # of succs not scheduled. - short NumChainPredsLeft; // # of chain preds not scheduled. - short NumChainSuccsLeft; // # of chain succs not scheduled. - bool isTwoAddress : 1; // Is a two-address instruction. - bool isDefNUseOperand : 1; // Is a def&use operand. - unsigned short Latency; // Node latency. - unsigned CycleBound; // Upper/lower cycle to be scheduled at. - unsigned NodeNum; // Entry # of node in the node vector. - - SUnit(SDNode *node, unsigned nodenum) - : Node(node), NumPredsLeft(0), NumSuccsLeft(0), - NumChainPredsLeft(0), NumChainSuccsLeft(0), - isTwoAddress(false), isDefNUseOperand(false), - Latency(0), CycleBound(0), NodeNum(nodenum) {} - - void dump(const SelectionDAG *G, bool All=true) const; - }; -} - -void SUnit::dump(const SelectionDAG *G, bool All) const { - std::cerr << "SU: "; - Node->dump(G); - std::cerr << "\n"; - if (FlaggedNodes.size() != 0) { - for (unsigned i = 0, e = FlaggedNodes.size(); i != e; i++) { - std::cerr << " "; - FlaggedNodes[i]->dump(G); - std::cerr << "\n"; - } - } - - if (All) { - std::cerr << " # preds left : " << NumPredsLeft << "\n"; - std::cerr << " # succs left : " << NumSuccsLeft << "\n"; - std::cerr << " # chain preds left : " << NumChainPredsLeft << "\n"; - std::cerr << " # chain succs left : " << NumChainSuccsLeft << "\n"; - std::cerr << " Latency : " << Latency << "\n"; - - if (Preds.size() != 0) { - std::cerr << " Predecessors:\n"; - for (std::set::const_iterator I = Preds.begin(), - E = Preds.end(); I != E; ++I) { - std::cerr << " "; - (*I)->dump(G, false); - } - } - if (ChainPreds.size() != 0) { - std::cerr << " Chained Preds:\n"; - for (std::set::const_iterator I = ChainPreds.begin(), - E = ChainPreds.end(); I != E; ++I) { - std::cerr << " "; - (*I)->dump(G, false); - } - } - if (Succs.size() != 0) { - std::cerr << " Successors:\n"; - for (std::set::const_iterator I = Succs.begin(), - E = Succs.end(); I != E; ++I) { - std::cerr << " "; - (*I)->dump(G, false); - } - } - if (ChainSuccs.size() != 0) { - std::cerr << " Chained succs:\n"; - for (std::set::const_iterator I = ChainSuccs.begin(), - E = ChainSuccs.end(); I != E; ++I) { - std::cerr << " "; - (*I)->dump(G, false); - } - } - } -} +STATISTIC(NumNoops , "Number of noops inserted"); +STATISTIC(NumStalls, "Number of pipeline stalls"); +static RegisterScheduler + tdListDAGScheduler("list-td", "Top-down list scheduler", + createTDListDAGScheduler); + namespace { - class SchedulingPriorityQueue; - - /// Sorting functions for the Available queue. - struct ls_rr_sort : public std::binary_function { - SchedulingPriorityQueue *SPQ; - ls_rr_sort(SchedulingPriorityQueue *spq) : SPQ(spq) {} - ls_rr_sort(const ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} - - bool operator()(const SUnit* left, const SUnit* right) const; - }; -} // end anonymous namespace - -namespace { - class SchedulingPriorityQueue { - // SUnits - The SUnits for the current graph. - std::vector &SUnits; - - // SethiUllmanNumbers - The SethiUllman number for each node. - std::vector SethiUllmanNumbers; - - std::priority_queue, ls_rr_sort> Queue; - public: - SchedulingPriorityQueue(std::vector &sunits) - : SUnits(sunits), Queue(ls_rr_sort(this)) { - // Calculate node priorities. - CalculatePriorities(); - } - - unsigned getSethiUllmanNumber(unsigned NodeNum) const { - assert(NodeNum < SethiUllmanNumbers.size()); - return SethiUllmanNumbers[NodeNum]; - } - - bool empty() const { return Queue.empty(); } - - void push(SUnit *U) { - Queue.push(U); - } - SUnit *pop() { - SUnit *V = Queue.top(); - Queue.pop(); - return V; - } - private: - void CalculatePriorities(); - int CalcNodePriority(SUnit *SU); - }; -} - -bool ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { - unsigned LeftNum = left->NodeNum; - unsigned RightNum = right->NodeNum; - - int LBonus = (int)left ->isDefNUseOperand; - int RBonus = (int)right->isDefNUseOperand; - - // Special tie breaker: if two nodes share a operand, the one that - // use it as a def&use operand is preferred. - if (left->isTwoAddress && !right->isTwoAddress) { - SDNode *DUNode = left->Node->getOperand(0).Val; - if (DUNode->isOperand(right->Node)) - LBonus++; - } - if (!left->isTwoAddress && right->isTwoAddress) { - SDNode *DUNode = right->Node->getOperand(0).Val; - if (DUNode->isOperand(left->Node)) - RBonus++; - } - - // Priority1 is just the number of live range genned. - int LPriority1 = left ->NumPredsLeft - LBonus; - int RPriority1 = right->NumPredsLeft - RBonus; - int LPriority2 = SPQ->getSethiUllmanNumber(LeftNum) + LBonus; - int RPriority2 = SPQ->getSethiUllmanNumber(RightNum) + RBonus; - - if (LPriority1 > RPriority1) - return true; - else if (LPriority1 == RPriority1) - if (LPriority2 < RPriority2) - return true; - else if (LPriority2 == RPriority2) - if (left->CycleBound > right->CycleBound) - return true; - - return false; -} - - -/// CalcNodePriority - Priority is the Sethi Ullman number. -/// Smaller number is the higher priority. -int SchedulingPriorityQueue::CalcNodePriority(SUnit *SU) { - int &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; - if (SethiUllmanNumber != INT_MIN) - return SethiUllmanNumber; - - if (SU->Preds.size() == 0) { - SethiUllmanNumber = 1; - } else { - int Extra = 0; - for (std::set::iterator I = SU->Preds.begin(), - E = SU->Preds.end(); I != E; ++I) { - SUnit *PredSU = *I; - int PredSethiUllman = CalcNodePriority(PredSU); - if (PredSethiUllman > SethiUllmanNumber) { - SethiUllmanNumber = PredSethiUllman; - Extra = 0; - } else if (PredSethiUllman == SethiUllmanNumber) - Extra++; - } - - if (SU->Node->getOpcode() != ISD::TokenFactor) - SethiUllmanNumber += Extra; - else - SethiUllmanNumber = (Extra == 1) ? 0 : Extra-1; - } - - return SethiUllmanNumber; -} - -/// CalculatePriorities - Calculate priorities of all scheduling units. -void SchedulingPriorityQueue::CalculatePriorities() { - SethiUllmanNumbers.assign(SUnits.size(), INT_MIN); - - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - // FIXME: assumes uniform latency for now. - SUnits[i].Latency = 1; - (void)CalcNodePriority(&SUnits[i]); - } -} - - - - -namespace { -/// ScheduleDAGList - List scheduler. -class ScheduleDAGList : public ScheduleDAG { +//===----------------------------------------------------------------------===// +/// ScheduleDAGList - The actual list scheduler implementation. This supports +/// top-down scheduling. +/// +class VISIBILITY_HIDDEN ScheduleDAGList : public ScheduleDAG { private: - // SDNode to SUnit mapping (many to one). - std::map SUnitMap; - // The schedule. Null SUnit*'s represent noop instructions. - std::vector Sequence; - // Current scheduling cycle. - unsigned CurrCycle; + /// AvailableQueue - The priority queue to use for the available SUnits. + /// + SchedulingPriorityQueue *AvailableQueue; - // The scheduling units. - std::vector SUnits; + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands becomes available, the instruction is + /// added to the AvailableQueue. This keeps track of each SUnit and the + /// number of cycles left to execute before the operation is available. + std::vector > PendingQueue; - /// isBottomUp - This is true if the scheduling problem is bottom-up, false if - /// it is top-down. - bool isBottomUp; - /// HazardRec - The hazard recognizer to use. HazardRecognizer *HazardRec; - + public: ScheduleDAGList(SelectionDAG &dag, MachineBasicBlock *bb, - const TargetMachine &tm, bool isbottomup, + const TargetMachine &tm, + SchedulingPriorityQueue *availqueue, HazardRecognizer *HR) - : ScheduleDAG(listSchedulingBURR, dag, bb, tm), - CurrCycle(0), isBottomUp(isbottomup), HazardRec(HR) { + : ScheduleDAG(dag, bb, tm), + AvailableQueue(availqueue), HazardRec(HR) { } ~ScheduleDAGList() { delete HazardRec; + delete AvailableQueue; } void Schedule(); - void dump() const; - private: - SUnit *NewSUnit(SDNode *N); - void ReleasePred(SchedulingPriorityQueue &Avail, - SUnit *PredSU, bool isChain = false); - void ReleaseSucc(SchedulingPriorityQueue &Avail, - SUnit *SuccSU, bool isChain = false); - void ScheduleNodeBottomUp(SchedulingPriorityQueue &Avail, SUnit *SU); - void ScheduleNodeTopDown(SchedulingPriorityQueue &Avail, SUnit *SU); - void ListScheduleTopDown(SchedulingPriorityQueue &Available); - void ListScheduleBottomUp(SchedulingPriorityQueue &Available); - void BuildSchedUnits(); - void EmitSchedule(); + void ReleaseSucc(SUnit *SuccSU, bool isChain); + void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void ListScheduleTopDown(); }; } // end anonymous namespace HazardRecognizer::~HazardRecognizer() {} -/// NewSUnit - Creates a new SUnit and return a ptr to it. -SUnit *ScheduleDAGList::NewSUnit(SDNode *N) { - SUnits.push_back(SUnit(N, SUnits.size())); - return &SUnits.back(); -} +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGList::Schedule() { + DOUT << "********** List Scheduling **********\n"; + + // Build scheduling units. + BuildSchedUnits(); -/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to -/// the Available queue is the count reaches zero. Also update its cycle bound. -void ScheduleDAGList::ReleasePred(SchedulingPriorityQueue &Available, - SUnit *PredSU, bool isChain) { - // FIXME: the distance between two nodes is not always == the predecessor's - // latency. For example, the reader can very well read the register written - // by the predecessor later than the issue cycle. It also depends on the - // interrupt model (drain vs. freeze). - PredSU->CycleBound = std::max(PredSU->CycleBound,CurrCycle + PredSU->Latency); - - if (!isChain) - PredSU->NumSuccsLeft--; - else - PredSU->NumChainSuccsLeft--; + AvailableQueue->initNodes(SUnits); -#ifndef NDEBUG - if (PredSU->NumSuccsLeft < 0 || PredSU->NumChainSuccsLeft < 0) { - std::cerr << "*** List scheduling failed! ***\n"; - PredSU->dump(&DAG); - std::cerr << " has been released too many times!\n"; - assert(0); - } -#endif + ListScheduleTopDown(); - if ((PredSU->NumSuccsLeft + PredSU->NumChainSuccsLeft) == 0) { - // EntryToken has to go last! Special case it here. - if (PredSU->Node->getOpcode() != ISD::EntryToken) - Available.push(PredSU); - } + AvailableQueue->releaseState(); } +//===----------------------------------------------------------------------===// +// Top-Down Scheduling +//===----------------------------------------------------------------------===// + /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to -/// the Available queue is the count reaches zero. Also update its cycle bound. -void ScheduleDAGList::ReleaseSucc(SchedulingPriorityQueue &Available, - SUnit *SuccSU, bool isChain) { - // FIXME: the distance between two nodes is not always == the predecessor's - // latency. For example, the reader can very well read the register written - // by the predecessor later than the issue cycle. It also depends on the - // interrupt model (drain vs. freeze). - SuccSU->CycleBound = std::max(SuccSU->CycleBound,CurrCycle + SuccSU->Latency); +/// the PendingQueue if the count reaches zero. +void ScheduleDAGList::ReleaseSucc(SUnit *SuccSU, bool isChain) { + SuccSU->NumPredsLeft--; - if (!isChain) - SuccSU->NumPredsLeft--; - else - SuccSU->NumChainPredsLeft--; + assert(SuccSU->NumPredsLeft >= 0 && + "List scheduling internal error"); -#ifndef NDEBUG - if (SuccSU->NumPredsLeft < 0 || SuccSU->NumChainPredsLeft < 0) { - std::cerr << "*** List scheduling failed! ***\n"; - SuccSU->dump(&DAG); - std::cerr << " has been released too many times!\n"; - abort(); + if (SuccSU->NumPredsLeft == 0) { + // Compute how many cycles it will be before this actually becomes + // available. This is the max of the start time of all predecessors plus + // their latencies. + unsigned AvailableCycle = 0; + for (SUnit::pred_iterator I = SuccSU->Preds.begin(), + E = SuccSU->Preds.end(); I != E; ++I) { + // If this is a token edge, we don't need to wait for the latency of the + // preceeding instruction (e.g. a long-latency load) unless there is also + // some other data dependence. + SUnit &Pred = *I->Dep; + unsigned PredDoneCycle = Pred.Cycle; + if (!I->isCtrl) + PredDoneCycle += Pred.Latency; + else if (Pred.Latency) + PredDoneCycle += 1; + + AvailableCycle = std::max(AvailableCycle, PredDoneCycle); + } + + PendingQueue.push_back(std::make_pair(AvailableCycle, SuccSU)); } -#endif - - if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) - Available.push(SuccSU); -} - -/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending -/// count of its predecessors. If a predecessor pending count is zero, add it to -/// the Available queue. -void ScheduleDAGList::ScheduleNodeBottomUp(SchedulingPriorityQueue &Available, - SUnit *SU) { - DEBUG(std::cerr << "*** Scheduling: "); - DEBUG(SU->dump(&DAG, false)); - - Sequence.push_back(SU); - - // Bottom up: release predecessors - for (std::set::iterator I1 = SU->Preds.begin(), - E1 = SU->Preds.end(); I1 != E1; ++I1) { - ReleasePred(Available, *I1); - SU->NumPredsLeft--; - } - for (std::set::iterator I2 = SU->ChainPreds.begin(), - E2 = SU->ChainPreds.end(); I2 != E2; ++I2) - ReleasePred(Available, *I2, true); - - CurrCycle++; } /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. -void ScheduleDAGList::ScheduleNodeTopDown(SchedulingPriorityQueue &Available, - SUnit *SU) { - DEBUG(std::cerr << "*** Scheduling: "); - DEBUG(SU->dump(&DAG, false)); +void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { + DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(SU->dump(&DAG)); Sequence.push_back(SU); + SU->Cycle = CurCycle; // Bottom up: release successors. - for (std::set::iterator I1 = SU->Succs.begin(), - E1 = SU->Succs.end(); I1 != E1; ++I1) { - ReleaseSucc(Available, *I1); - SU->NumSuccsLeft--; - } - for (std::set::iterator I2 = SU->ChainSuccs.begin(), - E2 = SU->ChainSuccs.end(); I2 != E2; ++I2) - ReleaseSucc(Available, *I2, true); - - CurrCycle++; -} - -/// isReady - True if node's lower cycle bound is less or equal to the current -/// scheduling cycle. Always true if all nodes have uniform latency 1. -static inline bool isReady(SUnit *SU, unsigned CurrCycle) { - return SU->CycleBound <= CurrCycle; -} - -/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up -/// schedulers. -void ScheduleDAGList::ListScheduleBottomUp(SchedulingPriorityQueue &Available) { - // Add root to Available queue. - Available.push(SUnitMap[DAG.getRoot().Val]); - - // While Available queue is not empty, grab the node with the highest - // priority. If it is not ready put it back. Schedule the node. - std::vector NotReady; - while (!Available.empty()) { - SUnit *CurrNode = Available.pop(); - - while (!isReady(CurrNode, CurrCycle)) { - NotReady.push_back(CurrNode); - CurrNode = Available.pop(); - } - - // Add the nodes that aren't ready back onto the available list. - while (!NotReady.empty()) { - Available.push(NotReady.back()); - NotReady.pop_back(); - } - - ScheduleNodeBottomUp(Available, CurrNode); - } - - // Add entry node last - if (DAG.getEntryNode().Val != DAG.getRoot().Val) { - SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; - Sequence.push_back(Entry); - } - - // Reverse the order if it is bottom up. - std::reverse(Sequence.begin(), Sequence.end()); - - -#ifndef NDEBUG - // Verify that all SUnits were scheduled. - bool AnyNotSched = false; - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - if (SUnits[i].NumSuccsLeft != 0 || SUnits[i].NumChainSuccsLeft != 0) { - if (!AnyNotSched) - std::cerr << "*** List scheduling failed! ***\n"; - SUnits[i].dump(&DAG); - std::cerr << "has not been scheduled!\n"; - AnyNotSched = true; - } - } - assert(!AnyNotSched); -#endif + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + ReleaseSucc(I->Dep, I->isCtrl); } /// ListScheduleTopDown - The main loop of list scheduling for top-down /// schedulers. -void ScheduleDAGList::ListScheduleTopDown(SchedulingPriorityQueue &Available) { - // Emit the entry node first. - SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; - ScheduleNodeTopDown(Available, Entry); - HazardRec->EmitInstruction(Entry->Node); - +void ScheduleDAGList::ListScheduleTopDown() { + unsigned CurCycle = 0; + // All leaves to Available queue. for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { // It is available if it has no predecessors. - if ((SUnits[i].Preds.size() + SUnits[i].ChainPreds.size()) == 0 && - &SUnits[i] != Entry) - Available.push(&SUnits[i]); + if (SUnits[i].Preds.empty()) { + AvailableQueue->push(&SUnits[i]); + SUnits[i].isAvailable = SUnits[i].isPending = true; + } } // While Available queue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. std::vector NotReady; - while (!Available.empty()) { - SUnit *FoundNode = 0; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue->empty() || !PendingQueue.empty()) { + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + if (PendingQueue[i].first == CurCycle) { + AvailableQueue->push(PendingQueue[i].second); + PendingQueue[i].second->isAvailable = true; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } else { + assert(PendingQueue[i].first > CurCycle && "Negative latency?"); + } + } + + // If there are no instructions available, don't try to issue anything, and + // don't advance the hazard recognizer. + if (AvailableQueue->empty()) { + ++CurCycle; + continue; + } + SUnit *FoundSUnit = 0; + SDNode *FoundNode = 0; + bool HasNoopHazards = false; - do { - SUnit *CurNode = Available.pop(); + while (!AvailableQueue->empty()) { + SUnit *CurSUnit = AvailableQueue->pop(); // Get the node represented by this SUnit. - SDNode *N = CurNode->Node; + FoundNode = CurSUnit->Node; + // If this is a pseudo op, like copyfromreg, look to see if there is a // real target node flagged to it. If so, use the target node. - for (unsigned i = 0, e = CurNode->FlaggedNodes.size(); - N->getOpcode() < ISD::BUILTIN_OP_END && i != e; ++i) - N = CurNode->FlaggedNodes[i]; + for (unsigned i = 0, e = CurSUnit->FlaggedNodes.size(); + !FoundNode->isMachineOpcode() && i != e; ++i) + FoundNode = CurSUnit->FlaggedNodes[i]; - HazardRecognizer::HazardType HT = HazardRec->getHazardType(N); + HazardRecognizer::HazardType HT = HazardRec->getHazardType(FoundNode); if (HT == HazardRecognizer::NoHazard) { - FoundNode = CurNode; + FoundSUnit = CurSUnit; break; } // Remember if this is a noop hazard. HasNoopHazards |= HT == HazardRecognizer::NoopHazard; - NotReady.push_back(CurNode); - } while (!Available.empty()); + NotReady.push_back(CurSUnit); + } // Add the nodes that aren't ready back onto the available list. - while (!NotReady.empty()) { - Available.push(NotReady.back()); - NotReady.pop_back(); + if (!NotReady.empty()) { + AvailableQueue->push_all(NotReady); + NotReady.clear(); } // If we found a node to schedule, do it now. - if (FoundNode) { - ScheduleNodeTopDown(Available, FoundNode); - HazardRec->EmitInstruction(FoundNode->Node); + if (FoundSUnit) { + ScheduleNodeTopDown(FoundSUnit, CurCycle); + HazardRec->EmitInstruction(FoundNode); + FoundSUnit->isScheduled = true; + AvailableQueue->ScheduledNode(FoundSUnit); + + // If this is a pseudo-op node, we don't want to increment the current + // cycle. + if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! + ++CurCycle; } else if (!HasNoopHazards) { // Otherwise, we have a pipeline stall, but no other problem, just advance // the current cycle and try again. - DEBUG(std::cerr << "*** Advancing cycle, no work to do\n"); + DOUT << "*** Advancing cycle, no work to do\n"; HazardRec->AdvanceCycle(); ++NumStalls; + ++CurCycle; } else { // Otherwise, we have no instructions to issue and we have instructions // that will fault if we don't do this right. This is the case for // processors without pipeline interlocks and other cases. - DEBUG(std::cerr << "*** Emitting noop\n"); + DOUT << "*** Emitting noop\n"; HazardRec->EmitNoop(); Sequence.push_back(0); // NULL SUnit* -> noop ++NumNoops; + ++CurCycle; } } @@ -553,11 +261,11 @@ void ScheduleDAGList::ListScheduleTopDown(SchedulingPriorityQueue &Available) { // Verify that all SUnits were scheduled. bool AnyNotSched = false; for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - if (SUnits[i].NumPredsLeft != 0 || SUnits[i].NumChainPredsLeft != 0) { + if (SUnits[i].NumPredsLeft != 0) { if (!AnyNotSched) - std::cerr << "*** List scheduling failed! ***\n"; + cerr << "*** List scheduling failed! ***\n"; SUnits[i].dump(&DAG); - std::cerr << "has not been scheduled!\n"; + cerr << "has not been scheduled!\n"; AnyNotSched = true; } } @@ -565,168 +273,276 @@ void ScheduleDAGList::ListScheduleTopDown(SchedulingPriorityQueue &Available) { #endif } - -void ScheduleDAGList::BuildSchedUnits() { - // Reserve entries in the vector for each of the SUnits we are creating. This - // ensure that reallocation of the vector won't happen, so SUnit*'s won't get - // invalidated. - SUnits.reserve(NodeCount); +//===----------------------------------------------------------------------===// +// LatencyPriorityQueue Implementation +//===----------------------------------------------------------------------===// +// +// This is a SchedulingPriorityQueue that schedules using latency information to +// reduce the length of the critical path through the basic block. +// +namespace { + class LatencyPriorityQueue; - // Pass 1: create the SUnit's. - for (unsigned i = 0, NC = NodeCount; i < NC; i++) { - NodeInfo *NI = &Info[i]; - SDNode *N = NI->Node; - if (isPassiveNode(N)) - continue; + /// Sorting functions for the Available queue. + struct latency_sort : public std::binary_function { + LatencyPriorityQueue *PQ; + latency_sort(LatencyPriorityQueue *pq) : PQ(pq) {} + latency_sort(const latency_sort &RHS) : PQ(RHS.PQ) {} + + bool operator()(const SUnit* left, const SUnit* right) const; + }; +} // end anonymous namespace - SUnit *SU; - if (NI->isInGroup()) { - if (NI != NI->Group->getBottom()) // Bottom up, so only look at bottom - continue; // node of the NodeGroup - - SU = NewSUnit(N); - // Find the flagged nodes. - SDOperand FlagOp = N->getOperand(N->getNumOperands() - 1); - SDNode *Flag = FlagOp.Val; - unsigned ResNo = FlagOp.ResNo; - while (Flag->getValueType(ResNo) == MVT::Flag) { - NodeInfo *FNI = getNI(Flag); - assert(FNI->Group == NI->Group); - SU->FlaggedNodes.insert(SU->FlaggedNodes.begin(), Flag); - SUnitMap[Flag] = SU; - - FlagOp = Flag->getOperand(Flag->getNumOperands() - 1); - Flag = FlagOp.Val; - ResNo = FlagOp.ResNo; - } - } else { - SU = NewSUnit(N); +namespace { + class LatencyPriorityQueue : public SchedulingPriorityQueue { + // SUnits - The SUnits for the current graph. + std::vector *SUnits; + + // Latencies - The latency (max of latency from this node to the bb exit) + // for each node. + std::vector Latencies; + + /// NumNodesSolelyBlocking - This vector contains, for every node in the + /// Queue, the number of nodes that the node is the sole unscheduled + /// predecessor for. This is used as a tie-breaker heuristic for better + /// mobility. + std::vector NumNodesSolelyBlocking; + + PriorityQueue, latency_sort> Queue; +public: + LatencyPriorityQueue() : Queue(latency_sort(this)) { + } + + void initNodes(std::vector &sunits) { + SUnits = &sunits; + // Calculate node priorities. + CalculatePriorities(); } - SUnitMap[N] = SU; - } - // Pass 2: add the preds, succs, etc. - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - SUnit *SU = &SUnits[i]; - SDNode *N = SU->Node; - NodeInfo *NI = getNI(N); + void addNode(const SUnit *SU) { + Latencies.resize(SUnits->size(), -1); + NumNodesSolelyBlocking.resize(SUnits->size(), 0); + CalcLatency(*SU); + } + + void updateNode(const SUnit *SU) { + Latencies[SU->NodeNum] = -1; + CalcLatency(*SU); + } + + void releaseState() { + SUnits = 0; + Latencies.clear(); + } - if (N->isTargetOpcode() && TII->isTwoAddrInstr(N->getTargetOpcode())) - SU->isTwoAddress = true; - - if (NI->isInGroup()) { - // Find all predecessors (of the group). - NodeGroupOpIterator NGOI(NI); - while (!NGOI.isEnd()) { - SDOperand Op = NGOI.next(); - SDNode *OpN = Op.Val; - MVT::ValueType VT = OpN->getValueType(Op.ResNo); - NodeInfo *OpNI = getNI(OpN); - if (OpNI->Group != NI->Group && !isPassiveNode(OpN)) { - assert(VT != MVT::Flag); - SUnit *OpSU = SUnitMap[OpN]; - if (VT == MVT::Other) { - if (SU->ChainPreds.insert(OpSU).second) - SU->NumChainPredsLeft++; - if (OpSU->ChainSuccs.insert(SU).second) - OpSU->NumChainSuccsLeft++; - } else { - if (SU->Preds.insert(OpSU).second) - SU->NumPredsLeft++; - if (OpSU->Succs.insert(SU).second) - OpSU->NumSuccsLeft++; - } - } - } - } else { - // Find node predecessors. - for (unsigned j = 0, e = N->getNumOperands(); j != e; j++) { - SDOperand Op = N->getOperand(j); - SDNode *OpN = Op.Val; - MVT::ValueType VT = OpN->getValueType(Op.ResNo); - if (!isPassiveNode(OpN)) { - assert(VT != MVT::Flag); - SUnit *OpSU = SUnitMap[OpN]; - if (VT == MVT::Other) { - if (SU->ChainPreds.insert(OpSU).second) - SU->NumChainPredsLeft++; - if (OpSU->ChainSuccs.insert(SU).second) - OpSU->NumChainSuccsLeft++; - } else { - if (SU->Preds.insert(OpSU).second) - SU->NumPredsLeft++; - if (OpSU->Succs.insert(SU).second) - OpSU->NumSuccsLeft++; - if (j == 0 && SU->isTwoAddress) - OpSU->isDefNUseOperand = true; - } - } - } + unsigned getLatency(unsigned NodeNum) const { + assert(NodeNum < Latencies.size()); + return Latencies[NodeNum]; } - } + + unsigned getNumSolelyBlockNodes(unsigned NodeNum) const { + assert(NodeNum < NumNodesSolelyBlocking.size()); + return NumNodesSolelyBlocking[NodeNum]; + } + + unsigned size() const { return Queue.size(); } + + bool empty() const { return Queue.empty(); } + + virtual void push(SUnit *U) { + push_impl(U); + } + void push_impl(SUnit *U); + + void push_all(const std::vector &Nodes) { + for (unsigned i = 0, e = Nodes.size(); i != e; ++i) + push_impl(Nodes[i]); + } + + SUnit *pop() { + if (empty()) return NULL; + SUnit *V = Queue.top(); + Queue.pop(); + return V; + } + + void remove(SUnit *SU) { + assert(!Queue.empty() && "Not in queue!"); + Queue.erase_one(SU); + } + + // ScheduledNode - As nodes are scheduled, we look to see if there are any + // successor nodes that have a single unscheduled predecessor. If so, that + // single predecessor has a higher priority, since scheduling it will make + // the node available. + void ScheduledNode(SUnit *Node); + +private: + void CalculatePriorities(); + int CalcLatency(const SUnit &SU); + void AdjustPriorityOfUnscheduledPreds(SUnit *SU); + SUnit *getSingleUnscheduledPred(SUnit *SU); + }; } -/// EmitSchedule - Emit the machine code in scheduled order. -void ScheduleDAGList::EmitSchedule() { - for (unsigned i = 0, e = Sequence.size(); i != e; i++) { - if (SUnit *SU = Sequence[i]) { - for (unsigned j = 0, ee = SU->FlaggedNodes.size(); j != ee; j++) { - SDNode *N = SU->FlaggedNodes[j]; - EmitNode(getNI(N)); +bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { + unsigned LHSNum = LHS->NodeNum; + unsigned RHSNum = RHS->NodeNum; + + // The most important heuristic is scheduling the critical path. + unsigned LHSLatency = PQ->getLatency(LHSNum); + unsigned RHSLatency = PQ->getLatency(RHSNum); + if (LHSLatency < RHSLatency) return true; + if (LHSLatency > RHSLatency) return false; + + // After that, if two nodes have identical latencies, look to see if one will + // unblock more other nodes than the other. + unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum); + unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum); + if (LHSBlocked < RHSBlocked) return true; + if (LHSBlocked > RHSBlocked) return false; + + // Finally, just to provide a stable ordering, use the node number as a + // deciding factor. + return LHSNum < RHSNum; +} + + +/// CalcNodePriority - Calculate the maximal path from the node to the exit. +/// +int LatencyPriorityQueue::CalcLatency(const SUnit &SU) { + int &Latency = Latencies[SU.NodeNum]; + if (Latency != -1) + return Latency; + + std::vector WorkList; + WorkList.push_back(&SU); + while (!WorkList.empty()) { + const SUnit *Cur = WorkList.back(); + bool AllDone = true; + int MaxSuccLatency = 0; + for (SUnit::const_succ_iterator I = Cur->Succs.begin(),E = Cur->Succs.end(); + I != E; ++I) { + int SuccLatency = Latencies[I->Dep->NodeNum]; + if (SuccLatency == -1) { + AllDone = false; + WorkList.push_back(I->Dep); + } else { + MaxSuccLatency = std::max(MaxSuccLatency, SuccLatency); } - EmitNode(getNI(SU->Node)); - } else { - // Null SUnit* is a noop. - EmitNoop(); + } + if (AllDone) { + Latencies[Cur->NodeNum] = MaxSuccLatency + Cur->Latency; + WorkList.pop_back(); } } + + return Latency; } -/// dump - dump the schedule. -void ScheduleDAGList::dump() const { - for (unsigned i = 0, e = Sequence.size(); i != e; i++) { - if (SUnit *SU = Sequence[i]) - SU->dump(&DAG, false); - else - std::cerr << "**** NOOP ****\n"; +/// CalculatePriorities - Calculate priorities of all scheduling units. +void LatencyPriorityQueue::CalculatePriorities() { + Latencies.assign(SUnits->size(), -1); + NumNodesSolelyBlocking.assign(SUnits->size(), 0); + + // For each node, calculate the maximal path from the node to the exit. + std::vector > WorkList; + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + const SUnit *SU = &(*SUnits)[i]; + if (SU->Succs.empty()) + WorkList.push_back(std::make_pair(SU, 0U)); + } + + while (!WorkList.empty()) { + const SUnit *SU = WorkList.back().first; + unsigned SuccLat = WorkList.back().second; + WorkList.pop_back(); + int &Latency = Latencies[SU->NodeNum]; + if (Latency == -1 || (SU->Latency + SuccLat) > (unsigned)Latency) { + Latency = SU->Latency + SuccLat; + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); + I != E; ++I) + WorkList.push_back(std::make_pair(I->Dep, Latency)); + } } } -/// Schedule - Schedule the DAG using list scheduling. -/// FIXME: Right now it only supports the burr (bottom up register reducing) -/// heuristic. -void ScheduleDAGList::Schedule() { - DEBUG(std::cerr << "********** List Scheduling **********\n"); +/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor +/// of SU, return it, otherwise return null. +SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) { + SUnit *OnlyAvailablePred = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit &Pred = *I->Dep; + if (!Pred.isScheduled) { + // We found an available, but not scheduled, predecessor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailablePred && OnlyAvailablePred != &Pred) + return 0; + OnlyAvailablePred = &Pred; + } + } + + return OnlyAvailablePred; +} - // Build scheduling units. - BuildSchedUnits(); +void LatencyPriorityQueue::push_impl(SUnit *SU) { + // Look at all of the successors of this node. Count the number of nodes that + // this node is the sole unscheduled node for. + unsigned NumNodesBlocking = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + if (getSingleUnscheduledPred(I->Dep) == SU) + ++NumNodesBlocking; + NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; - SchedulingPriorityQueue PQ(SUnits); - - // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. - if (isBottomUp) - ListScheduleBottomUp(PQ); - else - ListScheduleTopDown(PQ); + Queue.push(SU); +} + + +// ScheduledNode - As nodes are scheduled, we look to see if there are any +// successor nodes that have a single unscheduled predecessor. If so, that +// single predecessor has a higher priority, since scheduling it will make +// the node available. +void LatencyPriorityQueue::ScheduledNode(SUnit *SU) { + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + AdjustPriorityOfUnscheduledPreds(I->Dep); +} + +/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just +/// scheduled. If SU is not itself available, then there is at least one +/// predecessor node that has not been scheduled yet. If SU has exactly ONE +/// unscheduled predecessor, we want to increase its priority: it getting +/// scheduled will make this node available, so it is better than some other +/// node of the same priority that will not make a node available. +void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { + if (SU->isPending) return; // All preds scheduled. - DEBUG(std::cerr << "*** Final schedule ***\n"); - DEBUG(dump()); - DEBUG(std::cerr << "\n"); + SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); + if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return; - // Emit in scheduled order - EmitSchedule(); -} + // Okay, we found a single predecessor that is available, but not scheduled. + // Since it is available, it must be in the priority queue. First remove it. + remove(OnlyAvailablePred); -llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAG &DAG, - MachineBasicBlock *BB) { - return new ScheduleDAGList(DAG, BB, DAG.getTarget(), true, - new HazardRecognizer()); + // Reinsert the node into the priority queue, which recomputes its + // NumNodesSolelyBlocking value. + push(OnlyAvailablePred); } -/// createTDListDAGScheduler - This creates a top-down list scheduler with the -/// specified hazard recognizer. -ScheduleDAG* llvm::createTDListDAGScheduler(SelectionDAG &DAG, - MachineBasicBlock *BB, - HazardRecognizer *HR) { - return new ScheduleDAGList(DAG, BB, DAG.getTarget(), false, HR); + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +/// createTDListDAGScheduler - This creates a top-down list scheduler with a +/// new hazard recognizer. This scheduler takes ownership of the hazard +/// recognizer and deletes it when done. +ScheduleDAG* llvm::createTDListDAGScheduler(SelectionDAGISel *IS, + SelectionDAG *DAG, + MachineBasicBlock *BB, bool Fast) { + return new ScheduleDAGList(*DAG, BB, DAG->getTarget(), + new LatencyPriorityQueue(), + IS->CreateTargetHazardRecognizer()); }