1 //===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Evan Cheng and is distributed under the
6 // University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements bottom-up and top-down list schedulers, using standard
11 // algorithms. The basic approach uses a priority queue of available nodes to
12 // schedule. One at a time, nodes are taken from the priority queue (thus in
13 // priority order), checked for legality to schedule, and emitted if legal.
15 // Nodes may not be legal to schedule either due to structural hazards (e.g.
16 // pipeline or resource constraints) or because an input to the instruction has
17 // not completed execution.
19 //===----------------------------------------------------------------------===//
21 #define DEBUG_TYPE "sched"
22 #include "llvm/CodeGen/ScheduleDAG.h"
23 #include "llvm/CodeGen/SSARegMap.h"
24 #include "llvm/Target/MRegisterInfo.h"
25 #include "llvm/Target/TargetMachine.h"
26 #include "llvm/Target/TargetInstrInfo.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/ADT/Statistic.h"
34 #include "llvm/Support/CommandLine.h"
39 SchedVertically("sched-vertically", cl::Hidden);
43 Statistic<> NumNoops ("scheduler", "Number of noops inserted");
44 Statistic<> NumStalls("scheduler", "Number of pipeline stalls");
46 /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
47 /// a group of nodes flagged together.
49 SDNode *Node; // Representative node.
50 std::vector<SDNode*> FlaggedNodes; // All nodes flagged to Node.
52 // Preds/Succs - The SUnits before/after us in the graph. The boolean value
53 // is true if the edge is a token chain edge, false if it is a value edge.
54 std::set<std::pair<SUnit*,bool> > Preds; // All sunit predecessors.
55 std::set<std::pair<SUnit*,bool> > Succs; // All sunit successors.
57 short NumPredsLeft; // # of preds not scheduled.
58 short NumSuccsLeft; // # of succs not scheduled.
59 short NumChainPredsLeft; // # of chain preds not scheduled.
60 short NumChainSuccsLeft; // # of chain succs not scheduled.
61 bool isTwoAddress : 1; // Is a two-address instruction.
62 bool isDefNUseOperand : 1; // Is a def&use operand.
63 bool isPending : 1; // True once pending.
64 bool isAvailable : 1; // True once available.
65 bool isScheduled : 1; // True once scheduled.
66 unsigned short Latency; // Node latency.
67 unsigned CycleBound; // Upper/lower cycle to be scheduled at.
68 unsigned Cycle; // Once scheduled, the cycle of the op.
69 unsigned NodeNum; // Entry # of node in the node vector.
71 SUnit(SDNode *node, unsigned nodenum)
72 : Node(node), NumPredsLeft(0), NumSuccsLeft(0),
73 NumChainPredsLeft(0), NumChainSuccsLeft(0),
74 isTwoAddress(false), isDefNUseOperand(false),
75 isPending(false), isAvailable(false), isScheduled(false),
76 Latency(0), CycleBound(0), Cycle(0), NodeNum(nodenum) {}
78 void dump(const SelectionDAG *G) const;
79 void dumpAll(const SelectionDAG *G) const;
83 void SUnit::dump(const SelectionDAG *G) const {
84 std::cerr << "SU(" << NodeNum << "): ";
87 if (FlaggedNodes.size() != 0) {
88 for (unsigned i = 0, e = FlaggedNodes.size(); i != e; i++) {
90 FlaggedNodes[i]->dump(G);
96 void SUnit::dumpAll(const SelectionDAG *G) const {
99 std::cerr << " # preds left : " << NumPredsLeft << "\n";
100 std::cerr << " # succs left : " << NumSuccsLeft << "\n";
101 std::cerr << " # chain preds left : " << NumChainPredsLeft << "\n";
102 std::cerr << " # chain succs left : " << NumChainSuccsLeft << "\n";
103 std::cerr << " Latency : " << Latency << "\n";
105 if (Preds.size() != 0) {
106 std::cerr << " Predecessors:\n";
107 for (std::set<std::pair<SUnit*,bool> >::const_iterator I = Preds.begin(),
108 E = Preds.end(); I != E; ++I) {
112 std::cerr << " val ";
116 if (Succs.size() != 0) {
117 std::cerr << " Successors:\n";
118 for (std::set<std::pair<SUnit*, bool> >::const_iterator I = Succs.begin(),
119 E = Succs.end(); I != E; ++I) {
123 std::cerr << " val ";
130 //===----------------------------------------------------------------------===//
131 /// SchedulingPriorityQueue - This interface is used to plug different
132 /// priorities computation algorithms into the list scheduler. It implements the
133 /// interface of a standard priority queue, where nodes are inserted in
134 /// arbitrary order and returned in priority order. The computation of the
135 /// priority and the representation of the queue are totally up to the
136 /// implementation to decide.
139 class SchedulingPriorityQueue {
141 virtual ~SchedulingPriorityQueue() {}
143 virtual void initNodes(const std::vector<SUnit> &SUnits) = 0;
144 virtual void releaseState() = 0;
146 virtual bool empty() const = 0;
147 virtual void push(SUnit *U) = 0;
149 virtual void push_all(const std::vector<SUnit *> &Nodes) = 0;
150 virtual SUnit *pop() = 0;
152 virtual void RemoveFromPriorityQueue(SUnit *SU) = 0;
154 /// ScheduledNode - As each node is scheduled, this method is invoked. This
155 /// allows the priority function to adjust the priority of node that have
156 /// already been emitted.
157 virtual void ScheduledNode(SUnit *Node) {}
164 //===----------------------------------------------------------------------===//
165 /// ScheduleDAGList - The actual list scheduler implementation. This supports
166 /// both top-down and bottom-up scheduling.
168 class ScheduleDAGList : public ScheduleDAG {
170 // SDNode to SUnit mapping (many to one).
171 std::map<SDNode*, SUnit*> SUnitMap;
173 // The schedule. Null SUnit*'s represent noop instructions.
174 std::vector<SUnit*> Sequence;
176 // The scheduling units.
177 std::vector<SUnit> SUnits;
179 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
183 /// AvailableQueue - The priority queue to use for the available SUnits.
185 SchedulingPriorityQueue *AvailableQueue;
187 /// PendingQueue - This contains all of the instructions whose operands have
188 /// been issued, but their results are not ready yet (due to the latency of
189 /// the operation). Once the operands becomes available, the instruction is
190 /// added to the AvailableQueue. This keeps track of each SUnit and the
191 /// number of cycles left to execute before the operation is available.
192 std::vector<std::pair<unsigned, SUnit*> > PendingQueue;
194 /// HazardRec - The hazard recognizer to use.
195 HazardRecognizer *HazardRec;
197 /// OpenNodes - Nodes with open live ranges, i.e. predecessors or successors
198 /// of scheduled nodes which are not themselves scheduled.
199 std::map<const TargetRegisterClass*, std::set<SUnit*> > OpenNodes;
201 std::map<const TargetRegisterClass*, unsigned> RegPressureLimits;
204 ScheduleDAGList(SelectionDAG &dag, MachineBasicBlock *bb,
205 const TargetMachine &tm, bool isbottomup,
206 SchedulingPriorityQueue *availqueue,
207 HazardRecognizer *HR)
208 : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup),
209 AvailableQueue(availqueue), HazardRec(HR) {
214 delete AvailableQueue;
219 void dumpSchedule() const;
222 SUnit *NewSUnit(SDNode *N);
223 void ReleasePred(SUnit *PredSU, bool isChain, unsigned CurCycle);
224 void ReleaseSucc(SUnit *SuccSU, bool isChain);
225 void ScheduleNodeBottomUp(SUnit *SU, unsigned& CurCycle, bool Veritical=true);
226 void ScheduleVertically(SUnit *SU, unsigned& CurCycle);
227 void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
228 void ListScheduleTopDown();
229 void ListScheduleBottomUp();
230 void BuildSchedUnits();
233 } // end anonymous namespace
235 HazardRecognizer::~HazardRecognizer() {}
238 /// NewSUnit - Creates a new SUnit and return a ptr to it.
239 SUnit *ScheduleDAGList::NewSUnit(SDNode *N) {
240 SUnits.push_back(SUnit(N, SUnits.size()));
241 return &SUnits.back();
244 /// BuildSchedUnits - Build SUnits from the selection dag that we are input.
245 /// This SUnit graph is similar to the SelectionDAG, but represents flagged
246 /// together nodes with a single SUnit.
247 void ScheduleDAGList::BuildSchedUnits() {
248 // Reserve entries in the vector for each of the SUnits we are creating. This
249 // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
251 SUnits.reserve(std::distance(DAG.allnodes_begin(), DAG.allnodes_end()));
253 const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
255 for (SelectionDAG::allnodes_iterator NI = DAG.allnodes_begin(),
256 E = DAG.allnodes_end(); NI != E; ++NI) {
257 if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
260 // If this node has already been processed, stop now.
261 if (SUnitMap[NI]) continue;
263 SUnit *NodeSUnit = NewSUnit(NI);
265 // See if anything is flagged to this node, if so, add them to flagged
266 // nodes. Nodes can have at most one flag input and one flag output. Flags
267 // are required the be the last operand and result of a node.
269 // Scan up, adding flagged preds to FlaggedNodes.
271 while (N->getNumOperands() &&
272 N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) {
273 N = N->getOperand(N->getNumOperands()-1).Val;
274 NodeSUnit->FlaggedNodes.push_back(N);
275 SUnitMap[N] = NodeSUnit;
278 // Scan down, adding this node and any flagged succs to FlaggedNodes if they
279 // have a user of the flag operand.
281 while (N->getValueType(N->getNumValues()-1) == MVT::Flag) {
282 SDOperand FlagVal(N, N->getNumValues()-1);
284 // There are either zero or one users of the Flag result.
285 bool HasFlagUse = false;
286 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
288 if (FlagVal.isOperand(*UI)) {
290 NodeSUnit->FlaggedNodes.push_back(N);
291 SUnitMap[N] = NodeSUnit;
295 if (!HasFlagUse) break;
298 // Now all flagged nodes are in FlaggedNodes and N is the bottom-most node.
301 SUnitMap[N] = NodeSUnit;
303 // Compute the latency for the node. We use the sum of the latencies for
304 // all nodes flagged together into this SUnit.
305 if (InstrItins.isEmpty()) {
306 // No latency information.
307 NodeSUnit->Latency = 1;
309 NodeSUnit->Latency = 0;
310 if (N->isTargetOpcode()) {
311 unsigned SchedClass = TII->getSchedClass(N->getTargetOpcode());
312 InstrStage *S = InstrItins.begin(SchedClass);
313 InstrStage *E = InstrItins.end(SchedClass);
315 NodeSUnit->Latency += S->Cycles;
317 for (unsigned i = 0, e = NodeSUnit->FlaggedNodes.size(); i != e; ++i) {
318 SDNode *FNode = NodeSUnit->FlaggedNodes[i];
319 if (FNode->isTargetOpcode()) {
320 unsigned SchedClass = TII->getSchedClass(FNode->getTargetOpcode());
321 InstrStage *S = InstrItins.begin(SchedClass);
322 InstrStage *E = InstrItins.end(SchedClass);
324 NodeSUnit->Latency += S->Cycles;
330 // Pass 2: add the preds, succs, etc.
331 for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
332 SUnit *SU = &SUnits[su];
333 SDNode *MainNode = SU->Node;
335 if (MainNode->isTargetOpcode()) {
336 unsigned Opc = MainNode->getTargetOpcode();
337 if (TII->isTwoAddrInstr(Opc)) {
338 SU->isTwoAddress = true;
339 SDNode *OpN = MainNode->getOperand(0).Val;
340 SUnit *OpSU = SUnitMap[OpN];
342 OpSU->isDefNUseOperand = true;
346 // Find all predecessors and successors of the group.
347 // Temporarily add N to make code simpler.
348 SU->FlaggedNodes.push_back(MainNode);
350 for (unsigned n = 0, e = SU->FlaggedNodes.size(); n != e; ++n) {
351 SDNode *N = SU->FlaggedNodes[n];
353 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
354 SDNode *OpN = N->getOperand(i).Val;
355 if (isPassiveNode(OpN)) continue; // Not scheduled.
356 SUnit *OpSU = SUnitMap[OpN];
357 assert(OpSU && "Node has no SUnit!");
358 if (OpSU == SU) continue; // In the same group.
360 MVT::ValueType OpVT = N->getOperand(i).getValueType();
361 assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
362 bool isChain = OpVT == MVT::Other;
364 if (SU->Preds.insert(std::make_pair(OpSU, isChain)).second) {
368 SU->NumChainPredsLeft++;
371 if (OpSU->Succs.insert(std::make_pair(SU, isChain)).second) {
373 OpSU->NumSuccsLeft++;
375 OpSU->NumChainSuccsLeft++;
381 // Remove MainNode from FlaggedNodes again.
382 SU->FlaggedNodes.pop_back();
385 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
386 SUnits[su].dumpAll(&DAG));
390 /// EmitSchedule - Emit the machine code in scheduled order.
391 void ScheduleDAGList::EmitSchedule() {
392 std::map<SDNode*, unsigned> VRBaseMap;
393 for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
394 if (SUnit *SU = Sequence[i]) {
395 for (unsigned j = 0, ee = SU->FlaggedNodes.size(); j != ee; j++)
396 EmitNode(SU->FlaggedNodes[j], VRBaseMap);
397 EmitNode(SU->Node, VRBaseMap);
399 // Null SUnit* is a noop.
405 /// dump - dump the schedule.
406 void ScheduleDAGList::dumpSchedule() const {
407 for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
408 if (SUnit *SU = Sequence[i])
411 std::cerr << "**** NOOP ****\n";
415 /// Schedule - Schedule the DAG using list scheduling.
416 void ScheduleDAGList::Schedule() {
417 DEBUG(std::cerr << "********** List Scheduling **********\n");
419 // Build scheduling units.
422 AvailableQueue->initNodes(SUnits);
424 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
426 ListScheduleBottomUp();
428 ListScheduleTopDown();
430 AvailableQueue->releaseState();
432 DEBUG(std::cerr << "*** Final schedule ***\n");
433 DEBUG(dumpSchedule());
434 DEBUG(std::cerr << "\n");
436 // Emit in scheduled order
440 //===----------------------------------------------------------------------===//
441 // Bottom-Up Scheduling
442 //===----------------------------------------------------------------------===//
444 static const TargetRegisterClass *getRegClass(SUnit *SU,
445 const TargetInstrInfo *TII,
446 const MRegisterInfo *MRI,
448 if (SU->Node->isTargetOpcode()) {
449 unsigned Opc = SU->Node->getTargetOpcode();
450 const TargetInstrDescriptor &II = TII->get(Opc);
451 return II.OpInfo->RegClass;
453 assert(SU->Node->getOpcode() == ISD::CopyFromReg);
454 unsigned SrcReg = cast<RegisterSDNode>(SU->Node->getOperand(1))->getReg();
455 if (MRegisterInfo::isVirtualRegister(SrcReg))
456 return RegMap->getRegClass(SrcReg);
458 for (MRegisterInfo::regclass_iterator I = MRI->regclass_begin(),
459 E = MRI->regclass_end(); I != E; ++I)
460 if ((*I)->hasType(SU->Node->getValueType(0)) &&
461 (*I)->contains(SrcReg))
463 assert(false && "Couldn't find register class for reg copy!");
469 static unsigned getNumResults(SUnit *SU) {
470 unsigned NumResults = 0;
471 for (unsigned i = 0, e = SU->Node->getNumValues(); i != e; ++i) {
472 MVT::ValueType VT = SU->Node->getValueType(i);
473 if (VT != MVT::Other && VT != MVT::Flag)
479 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
480 /// the Available queue is the count reaches zero. Also update its cycle bound.
481 void ScheduleDAGList::ReleasePred(SUnit *PredSU, bool isChain,
483 // FIXME: the distance between two nodes is not always == the predecessor's
484 // latency. For example, the reader can very well read the register written
485 // by the predecessor later than the issue cycle. It also depends on the
486 // interrupt model (drain vs. freeze).
487 PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency);
490 PredSU->NumSuccsLeft--;
492 PredSU->NumChainSuccsLeft--;
495 if (PredSU->NumSuccsLeft < 0 || PredSU->NumChainSuccsLeft < 0) {
496 std::cerr << "*** List scheduling failed! ***\n";
498 std::cerr << " has been released too many times!\n";
503 if ((PredSU->NumSuccsLeft + PredSU->NumChainSuccsLeft) == 0) {
504 // EntryToken has to go last! Special case it here.
505 if (PredSU->Node->getOpcode() != ISD::EntryToken) {
506 PredSU->isAvailable = true;
507 AvailableQueue->push(PredSU);
511 if (getNumResults(PredSU) > 0) {
512 const TargetRegisterClass *RegClass = getRegClass(PredSU, TII, MRI, RegMap);
513 OpenNodes[RegClass].insert(PredSU);
517 /// SharesOperandWithTwoAddr - Check if there is a unscheduled two-address node
518 /// with which SU shares an operand. If so, returns the node.
519 static SUnit *SharesOperandWithTwoAddr(SUnit *SU) {
520 assert(!SU->isTwoAddress && "Node cannot be two-address op");
521 for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(),
522 E = SU->Preds.end(); I != E; ++I) {
523 if (I->second) continue;
524 SUnit *PredSU = I->first;
525 for (std::set<std::pair<SUnit*, bool> >::iterator II =
526 PredSU->Succs.begin(), EE = PredSU->Succs.end(); II != EE; ++II) {
527 if (II->second) continue;
528 SUnit *SSU = II->first;
529 if (SSU->isTwoAddress && !SSU->isScheduled) {
537 static bool isFloater(const SUnit *SU) {
538 unsigned Opc = SU->Node->getOpcode();
539 return (Opc != ISD::CopyFromReg && SU->NumPredsLeft == 0);
542 static bool isSimpleFloaterUse(const SUnit *SU) {
544 for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(),
545 E = SU->Preds.end(); I != E; ++I) {
546 if (I->second) continue;
549 if (!isFloater(I->first))
555 /// ScheduleVertically - Schedule vertically. That is, follow up the D&U chain
556 /// (of two-address code) and schedule floaters aggressively.
557 void ScheduleDAGList::ScheduleVertically(SUnit *SU, unsigned& CurCycle) {
558 // Try scheduling Def&Use operand if register pressure is low.
559 const TargetRegisterClass *RegClass = getRegClass(SU, TII, MRI, RegMap);
560 unsigned Pressure = OpenNodes[RegClass].size();
561 unsigned Limit = RegPressureLimits[RegClass];
563 // See if we can schedule any predecessor that takes no registers.
564 for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(),
565 E = SU->Preds.end(); I != E; ++I) {
566 if (I->second) continue;
568 SUnit *PredSU = I->first;
569 if (!PredSU->isAvailable || PredSU->isScheduled)
572 if (isFloater(PredSU)) {
573 DEBUG(std::cerr<<"*** Scheduling floater\n");
574 AvailableQueue->RemoveFromPriorityQueue(PredSU);
575 ScheduleNodeBottomUp(PredSU, CurCycle, false);
580 if (SU->isTwoAddress && Pressure < Limit) {
581 DUSU = SUnitMap[SU->Node->getOperand(0).Val];
582 if (!DUSU->isAvailable || DUSU->isScheduled)
584 else if (!DUSU->isTwoAddress) {
585 SUnit *SSU = SharesOperandWithTwoAddr(DUSU);
586 if (SSU && SSU->isAvailable) {
587 AvailableQueue->RemoveFromPriorityQueue(SSU);
588 ScheduleNodeBottomUp(SSU, CurCycle, false);
589 Pressure = OpenNodes[RegClass].size();
590 if (Pressure >= Limit)
597 DEBUG(std::cerr<<"*** Low register pressure: scheduling D&U operand\n");
598 AvailableQueue->RemoveFromPriorityQueue(DUSU);
599 ScheduleNodeBottomUp(DUSU, CurCycle, false);
600 Pressure = OpenNodes[RegClass].size();
601 ScheduleVertically(DUSU, CurCycle);
605 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
606 /// count of its predecessors. If a predecessor pending count is zero, add it to
607 /// the Available queue.
608 void ScheduleDAGList::ScheduleNodeBottomUp(SUnit *SU, unsigned& CurCycle,
610 DEBUG(std::cerr << "*** Scheduling [" << CurCycle << "]: ");
611 DEBUG(SU->dump(&DAG));
612 SU->Cycle = CurCycle;
614 AvailableQueue->ScheduledNode(SU);
615 Sequence.push_back(SU);
617 // Bottom up: release predecessors
618 for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Preds.begin(),
619 E = SU->Preds.end(); I != E; ++I)
620 ReleasePred(I->first, I->second, CurCycle);
621 SU->isScheduled = true;
624 if (getNumResults(SU) != 0) {
625 const TargetRegisterClass *RegClass = getRegClass(SU, TII, MRI, RegMap);
626 OpenNodes[RegClass].erase(SU);
628 if (SchedVertically && Vertical)
629 ScheduleVertically(SU, CurCycle);
633 /// isReady - True if node's lower cycle bound is less or equal to the current
634 /// scheduling cycle. Always true if all nodes have uniform latency 1.
635 static inline bool isReady(SUnit *SU, unsigned CurCycle) {
636 return SU->CycleBound <= CurCycle;
639 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
641 void ScheduleDAGList::ListScheduleBottomUp() {
642 // Determine rough register pressure limit.
643 for (MRegisterInfo::regclass_iterator RCI = MRI->regclass_begin(),
644 E = MRI->regclass_end(); RCI != E; ++RCI) {
645 const TargetRegisterClass *RC = *RCI;
646 unsigned Limit = RC->getNumRegs();
647 Limit = (Limit > 2) ? Limit - 2 : 0;
648 std::map<const TargetRegisterClass*, unsigned>::iterator RPI =
649 RegPressureLimits.find(RC);
650 if (RPI == RegPressureLimits.end())
651 RegPressureLimits[RC] = Limit;
653 unsigned &OldLimit = RegPressureLimits[RC];
654 if (Limit < OldLimit)
659 unsigned CurCycle = 0;
660 // Add root to Available queue.
661 AvailableQueue->push(SUnitMap[DAG.getRoot().Val]);
663 // While Available queue is not empty, grab the node with the highest
664 // priority. If it is not ready put it back. Schedule the node.
665 std::vector<SUnit*> NotReady;
666 SUnit *CurNode = NULL;
667 while (!AvailableQueue->empty()) {
668 SUnit *CurNode = AvailableQueue->pop();
669 while (!isReady(CurNode, CurCycle)) {
670 NotReady.push_back(CurNode);
671 CurNode = AvailableQueue->pop();
674 // Add the nodes that aren't ready back onto the available list.
675 AvailableQueue->push_all(NotReady);
678 ScheduleNodeBottomUp(CurNode, CurCycle);
681 // Add entry node last
682 if (DAG.getEntryNode().Val != DAG.getRoot().Val) {
683 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val];
684 Sequence.push_back(Entry);
687 // Reverse the order if it is bottom up.
688 std::reverse(Sequence.begin(), Sequence.end());
692 // Verify that all SUnits were scheduled.
693 bool AnyNotSched = false;
694 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
695 if (SUnits[i].NumSuccsLeft != 0 || SUnits[i].NumChainSuccsLeft != 0) {
697 std::cerr << "*** List scheduling failed! ***\n";
698 SUnits[i].dump(&DAG);
699 std::cerr << "has not been scheduled!\n";
703 assert(!AnyNotSched);
707 //===----------------------------------------------------------------------===//
708 // Top-Down Scheduling
709 //===----------------------------------------------------------------------===//
711 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
712 /// the PendingQueue if the count reaches zero.
713 void ScheduleDAGList::ReleaseSucc(SUnit *SuccSU, bool isChain) {
715 SuccSU->NumPredsLeft--;
717 SuccSU->NumChainPredsLeft--;
719 assert(SuccSU->NumPredsLeft >= 0 && SuccSU->NumChainPredsLeft >= 0 &&
720 "List scheduling internal error");
722 if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) {
723 // Compute how many cycles it will be before this actually becomes
724 // available. This is the max of the start time of all predecessors plus
726 unsigned AvailableCycle = 0;
727 for (std::set<std::pair<SUnit*, bool> >::iterator I = SuccSU->Preds.begin(),
728 E = SuccSU->Preds.end(); I != E; ++I) {
729 // If this is a token edge, we don't need to wait for the latency of the
730 // preceeding instruction (e.g. a long-latency load) unless there is also
731 // some other data dependence.
732 unsigned PredDoneCycle = I->first->Cycle;
734 PredDoneCycle += I->first->Latency;
735 else if (I->first->Latency)
738 AvailableCycle = std::max(AvailableCycle, PredDoneCycle);
741 PendingQueue.push_back(std::make_pair(AvailableCycle, SuccSU));
745 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
746 /// count of its successors. If a successor pending count is zero, add it to
747 /// the Available queue.
748 void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
749 DEBUG(std::cerr << "*** Scheduling [" << CurCycle << "]: ");
750 DEBUG(SU->dump(&DAG));
752 Sequence.push_back(SU);
753 SU->Cycle = CurCycle;
755 // Bottom up: release successors.
756 for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Succs.begin(),
757 E = SU->Succs.end(); I != E; ++I)
758 ReleaseSucc(I->first, I->second);
761 /// ListScheduleTopDown - The main loop of list scheduling for top-down
763 void ScheduleDAGList::ListScheduleTopDown() {
764 unsigned CurCycle = 0;
765 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val];
767 // All leaves to Available queue.
768 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
769 // It is available if it has no predecessors.
770 if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) {
771 AvailableQueue->push(&SUnits[i]);
772 SUnits[i].isAvailable = SUnits[i].isPending = true;
776 // Emit the entry node first.
777 ScheduleNodeTopDown(Entry, CurCycle);
778 HazardRec->EmitInstruction(Entry->Node);
780 // While Available queue is not empty, grab the node with the highest
781 // priority. If it is not ready put it back. Schedule the node.
782 std::vector<SUnit*> NotReady;
783 while (!AvailableQueue->empty() || !PendingQueue.empty()) {
784 // Check to see if any of the pending instructions are ready to issue. If
785 // so, add them to the available queue.
786 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
787 if (PendingQueue[i].first == CurCycle) {
788 AvailableQueue->push(PendingQueue[i].second);
789 PendingQueue[i].second->isAvailable = true;
790 PendingQueue[i] = PendingQueue.back();
791 PendingQueue.pop_back();
794 assert(PendingQueue[i].first > CurCycle && "Negative latency?");
798 // If there are no instructions available, don't try to issue anything, and
799 // don't advance the hazard recognizer.
800 if (AvailableQueue->empty()) {
805 SUnit *FoundSUnit = 0;
806 SDNode *FoundNode = 0;
808 bool HasNoopHazards = false;
809 while (!AvailableQueue->empty()) {
810 SUnit *CurSUnit = AvailableQueue->pop();
812 // Get the node represented by this SUnit.
813 FoundNode = CurSUnit->Node;
815 // If this is a pseudo op, like copyfromreg, look to see if there is a
816 // real target node flagged to it. If so, use the target node.
817 for (unsigned i = 0, e = CurSUnit->FlaggedNodes.size();
818 FoundNode->getOpcode() < ISD::BUILTIN_OP_END && i != e; ++i)
819 FoundNode = CurSUnit->FlaggedNodes[i];
821 HazardRecognizer::HazardType HT = HazardRec->getHazardType(FoundNode);
822 if (HT == HazardRecognizer::NoHazard) {
823 FoundSUnit = CurSUnit;
827 // Remember if this is a noop hazard.
828 HasNoopHazards |= HT == HazardRecognizer::NoopHazard;
830 NotReady.push_back(CurSUnit);
833 // Add the nodes that aren't ready back onto the available list.
834 if (!NotReady.empty()) {
835 AvailableQueue->push_all(NotReady);
839 // If we found a node to schedule, do it now.
841 ScheduleNodeTopDown(FoundSUnit, CurCycle);
842 HazardRec->EmitInstruction(FoundNode);
843 FoundSUnit->isScheduled = true;
844 AvailableQueue->ScheduledNode(FoundSUnit);
846 // If this is a pseudo-op node, we don't want to increment the current
848 if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
850 } else if (!HasNoopHazards) {
851 // Otherwise, we have a pipeline stall, but no other problem, just advance
852 // the current cycle and try again.
853 DEBUG(std::cerr << "*** Advancing cycle, no work to do\n");
854 HazardRec->AdvanceCycle();
858 // Otherwise, we have no instructions to issue and we have instructions
859 // that will fault if we don't do this right. This is the case for
860 // processors without pipeline interlocks and other cases.
861 DEBUG(std::cerr << "*** Emitting noop\n");
862 HazardRec->EmitNoop();
863 Sequence.push_back(0); // NULL SUnit* -> noop
870 // Verify that all SUnits were scheduled.
871 bool AnyNotSched = false;
872 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
873 if (SUnits[i].NumPredsLeft != 0 || SUnits[i].NumChainPredsLeft != 0) {
875 std::cerr << "*** List scheduling failed! ***\n";
876 SUnits[i].dump(&DAG);
877 std::cerr << "has not been scheduled!\n";
881 assert(!AnyNotSched);
885 //===----------------------------------------------------------------------===//
886 // RegReductionPriorityQueue Implementation
887 //===----------------------------------------------------------------------===//
889 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
890 // to reduce register pressure.
893 class RegReductionPriorityQueue;
895 /// Sorting functions for the Available queue.
896 struct ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
897 RegReductionPriorityQueue *SPQ;
898 ls_rr_sort(RegReductionPriorityQueue *spq) : SPQ(spq) {}
899 ls_rr_sort(const ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
901 bool operator()(const SUnit* left, const SUnit* right) const;
903 } // end anonymous namespace
906 class RegReductionPriorityQueue : public SchedulingPriorityQueue {
907 // SUnits - The SUnits for the current graph.
908 const std::vector<SUnit> *SUnits;
910 // SethiUllmanNumbers - The SethiUllman number for each node.
911 std::vector<int> SethiUllmanNumbers;
913 std::priority_queue<SUnit*, std::vector<SUnit*>, ls_rr_sort> Queue;
915 RegReductionPriorityQueue() :
916 Queue(ls_rr_sort(this)) {}
918 void initNodes(const std::vector<SUnit> &sunits) {
920 // Calculate node priorities.
921 CalculatePriorities();
923 void releaseState() {
925 SethiUllmanNumbers.clear();
928 int getSethiUllmanNumber(unsigned NodeNum) const {
929 assert(NodeNum < SethiUllmanNumbers.size());
930 return SethiUllmanNumbers[NodeNum];
933 bool empty() const { return Queue.empty(); }
935 void push(SUnit *U) {
938 void push_all(const std::vector<SUnit *> &Nodes) {
939 for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
940 Queue.push(Nodes[i]);
944 SUnit *V = Queue.top();
949 /// RemoveFromPriorityQueue - This is a really inefficient way to remove a
950 /// node from a priority queue. We should roll our own heap to make this
951 /// better or something.
952 void RemoveFromPriorityQueue(SUnit *SU) {
953 std::vector<SUnit*> Temp;
955 assert(!Queue.empty() && "Not in queue!");
956 while (Queue.top() != SU) {
957 Temp.push_back(Queue.top());
959 assert(!Queue.empty() && "Not in queue!");
962 // Remove the node from the PQ.
965 // Add all the other nodes back.
966 for (unsigned i = 0, e = Temp.size(); i != e; ++i)
971 void CalculatePriorities();
972 int CalcNodePriority(const SUnit *SU);
976 bool ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
977 unsigned LeftNum = left->NodeNum;
978 unsigned RightNum = right->NodeNum;
979 bool LIsTarget = left->Node->isTargetOpcode();
980 bool RIsTarget = right->Node->isTargetOpcode();
981 int LPriority = SPQ->getSethiUllmanNumber(LeftNum);
982 int RPriority = SPQ->getSethiUllmanNumber(RightNum);
983 bool LIsFloater = LIsTarget && (LPriority == 1 || LPriority == 0);
984 bool RIsFloater = RIsTarget && (RPriority == 1 || RPriority == 0);
988 // Schedule floaters (e.g. load from some constant address) and those nodes
989 // with a single predecessor each first. They maintain / reduce register
996 // Special tie breaker: if two nodes share a operand, the one that use it
997 // as a def&use operand is preferred.
998 if (LIsTarget && RIsTarget) {
999 if (left->isTwoAddress && !right->isTwoAddress) {
1000 SDNode *DUNode = left->Node->getOperand(0).Val;
1001 if (DUNode->isOperand(right->Node))
1004 if (!left->isTwoAddress && right->isTwoAddress) {
1005 SDNode *DUNode = right->Node->getOperand(0).Val;
1006 if (DUNode->isOperand(left->Node))
1011 if (LPriority+LBonus < RPriority+RBonus)
1013 else if (LPriority+LBonus == RPriority+RBonus)
1014 if (left->NumPredsLeft > right->NumPredsLeft)
1016 else if (left->NumPredsLeft+LBonus == right->NumPredsLeft+RBonus)
1017 if (left->CycleBound > right->CycleBound)
1023 /// CalcNodePriority - Priority is the Sethi Ullman number.
1024 /// Smaller number is the higher priority.
1025 int RegReductionPriorityQueue::CalcNodePriority(const SUnit *SU) {
1026 int &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum];
1027 if (SethiUllmanNumber != 0)
1028 return SethiUllmanNumber;
1030 unsigned Opc = SU->Node->getOpcode();
1031 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1032 SethiUllmanNumber = INT_MAX - 10;
1033 else if (SU->NumSuccsLeft == 0)
1034 // If SU does not have a use, i.e. it doesn't produce a value that would
1035 // be consumed (e.g. store), then it terminates a chain of computation.
1036 // Give it a small SethiUllman number so it will be scheduled right before its
1037 // predecessors that it doesn't lengthen their live ranges.
1038 SethiUllmanNumber = INT_MIN + 10;
1039 else if (SU->NumPredsLeft == 0 && Opc != ISD::CopyFromReg)
1040 SethiUllmanNumber = 1;
1043 for (std::set<std::pair<SUnit*, bool> >::const_iterator
1044 I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) {
1045 if (I->second) continue; // ignore chain preds
1046 SUnit *PredSU = I->first;
1047 int PredSethiUllman = CalcNodePriority(PredSU);
1048 if (PredSethiUllman > SethiUllmanNumber) {
1049 SethiUllmanNumber = PredSethiUllman;
1051 } else if (PredSethiUllman == SethiUllmanNumber && !I->second)
1055 SethiUllmanNumber += Extra;
1058 return SethiUllmanNumber;
1061 /// CalculatePriorities - Calculate priorities of all scheduling units.
1062 void RegReductionPriorityQueue::CalculatePriorities() {
1063 SethiUllmanNumbers.assign(SUnits->size(), 0);
1065 for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1066 CalcNodePriority(&(*SUnits)[i]);
1069 //===----------------------------------------------------------------------===//
1070 // LatencyPriorityQueue Implementation
1071 //===----------------------------------------------------------------------===//
1073 // This is a SchedulingPriorityQueue that schedules using latency information to
1074 // reduce the length of the critical path through the basic block.
1077 class LatencyPriorityQueue;
1079 /// Sorting functions for the Available queue.
1080 struct latency_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1081 LatencyPriorityQueue *PQ;
1082 latency_sort(LatencyPriorityQueue *pq) : PQ(pq) {}
1083 latency_sort(const latency_sort &RHS) : PQ(RHS.PQ) {}
1085 bool operator()(const SUnit* left, const SUnit* right) const;
1087 } // end anonymous namespace
1090 class LatencyPriorityQueue : public SchedulingPriorityQueue {
1091 // SUnits - The SUnits for the current graph.
1092 const std::vector<SUnit> *SUnits;
1094 // Latencies - The latency (max of latency from this node to the bb exit)
1096 std::vector<int> Latencies;
1098 /// NumNodesSolelyBlocking - This vector contains, for every node in the
1099 /// Queue, the number of nodes that the node is the sole unscheduled
1100 /// predecessor for. This is used as a tie-breaker heuristic for better
1102 std::vector<unsigned> NumNodesSolelyBlocking;
1104 std::priority_queue<SUnit*, std::vector<SUnit*>, latency_sort> Queue;
1106 LatencyPriorityQueue() : Queue(latency_sort(this)) {
1109 void initNodes(const std::vector<SUnit> &sunits) {
1111 // Calculate node priorities.
1112 CalculatePriorities();
1114 void releaseState() {
1119 unsigned getLatency(unsigned NodeNum) const {
1120 assert(NodeNum < Latencies.size());
1121 return Latencies[NodeNum];
1124 unsigned getNumSolelyBlockNodes(unsigned NodeNum) const {
1125 assert(NodeNum < NumNodesSolelyBlocking.size());
1126 return NumNodesSolelyBlocking[NodeNum];
1129 bool empty() const { return Queue.empty(); }
1131 virtual void push(SUnit *U) {
1134 void push_impl(SUnit *U);
1136 void push_all(const std::vector<SUnit *> &Nodes) {
1137 for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
1138 push_impl(Nodes[i]);
1142 SUnit *V = Queue.top();
1147 /// RemoveFromPriorityQueue - This is a really inefficient way to remove a
1148 /// node from a priority queue. We should roll our own heap to make this
1149 /// better or something.
1150 void RemoveFromPriorityQueue(SUnit *SU) {
1151 std::vector<SUnit*> Temp;
1153 assert(!Queue.empty() && "Not in queue!");
1154 while (Queue.top() != SU) {
1155 Temp.push_back(Queue.top());
1157 assert(!Queue.empty() && "Not in queue!");
1160 // Remove the node from the PQ.
1163 // Add all the other nodes back.
1164 for (unsigned i = 0, e = Temp.size(); i != e; ++i)
1165 Queue.push(Temp[i]);
1168 // ScheduledNode - As nodes are scheduled, we look to see if there are any
1169 // successor nodes that have a single unscheduled predecessor. If so, that
1170 // single predecessor has a higher priority, since scheduling it will make
1171 // the node available.
1172 void ScheduledNode(SUnit *Node);
1175 void CalculatePriorities();
1176 int CalcLatency(const SUnit &SU);
1177 void AdjustPriorityOfUnscheduledPreds(SUnit *SU);
1181 bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
1182 unsigned LHSNum = LHS->NodeNum;
1183 unsigned RHSNum = RHS->NodeNum;
1185 // The most important heuristic is scheduling the critical path.
1186 unsigned LHSLatency = PQ->getLatency(LHSNum);
1187 unsigned RHSLatency = PQ->getLatency(RHSNum);
1188 if (LHSLatency < RHSLatency) return true;
1189 if (LHSLatency > RHSLatency) return false;
1191 // After that, if two nodes have identical latencies, look to see if one will
1192 // unblock more other nodes than the other.
1193 unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
1194 unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
1195 if (LHSBlocked < RHSBlocked) return true;
1196 if (LHSBlocked > RHSBlocked) return false;
1198 // Finally, just to provide a stable ordering, use the node number as a
1200 return LHSNum < RHSNum;
1204 /// CalcNodePriority - Calculate the maximal path from the node to the exit.
1206 int LatencyPriorityQueue::CalcLatency(const SUnit &SU) {
1207 int &Latency = Latencies[SU.NodeNum];
1211 int MaxSuccLatency = 0;
1212 for (std::set<std::pair<SUnit*, bool> >::const_iterator I = SU.Succs.begin(),
1213 E = SU.Succs.end(); I != E; ++I)
1214 MaxSuccLatency = std::max(MaxSuccLatency, CalcLatency(*I->first));
1216 return Latency = MaxSuccLatency + SU.Latency;
1219 /// CalculatePriorities - Calculate priorities of all scheduling units.
1220 void LatencyPriorityQueue::CalculatePriorities() {
1221 Latencies.assign(SUnits->size(), -1);
1222 NumNodesSolelyBlocking.assign(SUnits->size(), 0);
1224 for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1225 CalcLatency((*SUnits)[i]);
1228 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
1229 /// of SU, return it, otherwise return null.
1230 static SUnit *getSingleUnscheduledPred(SUnit *SU) {
1231 SUnit *OnlyAvailablePred = 0;
1232 for (std::set<std::pair<SUnit*, bool> >::const_iterator I = SU->Preds.begin(),
1233 E = SU->Preds.end(); I != E; ++I)
1234 if (!I->first->isScheduled) {
1235 // We found an available, but not scheduled, predecessor. If it's the
1236 // only one we have found, keep track of it... otherwise give up.
1237 if (OnlyAvailablePred && OnlyAvailablePred != I->first)
1239 OnlyAvailablePred = I->first;
1242 return OnlyAvailablePred;
1245 void LatencyPriorityQueue::push_impl(SUnit *SU) {
1246 // Look at all of the successors of this node. Count the number of nodes that
1247 // this node is the sole unscheduled node for.
1248 unsigned NumNodesBlocking = 0;
1249 for (std::set<std::pair<SUnit*, bool> >::const_iterator I = SU->Succs.begin(),
1250 E = SU->Succs.end(); I != E; ++I)
1251 if (getSingleUnscheduledPred(I->first) == SU)
1253 NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
1259 // ScheduledNode - As nodes are scheduled, we look to see if there are any
1260 // successor nodes that have a single unscheduled predecessor. If so, that
1261 // single predecessor has a higher priority, since scheduling it will make
1262 // the node available.
1263 void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
1264 for (std::set<std::pair<SUnit*, bool> >::const_iterator I = SU->Succs.begin(),
1265 E = SU->Succs.end(); I != E; ++I)
1266 AdjustPriorityOfUnscheduledPreds(I->first);
1269 /// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
1270 /// scheduled. If SU is not itself available, then there is at least one
1271 /// predecessor node that has not been scheduled yet. If SU has exactly ONE
1272 /// unscheduled predecessor, we want to increase its priority: it getting
1273 /// scheduled will make this node available, so it is better than some other
1274 /// node of the same priority that will not make a node available.
1275 void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
1276 if (SU->isPending) return; // All preds scheduled.
1278 SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
1279 if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return;
1281 // Okay, we found a single predecessor that is available, but not scheduled.
1282 // Since it is available, it must be in the priority queue. First remove it.
1283 RemoveFromPriorityQueue(OnlyAvailablePred);
1285 // Reinsert the node into the priority queue, which recomputes its
1286 // NumNodesSolelyBlocking value.
1287 push(OnlyAvailablePred);
1291 //===----------------------------------------------------------------------===//
1292 // Public Constructor Functions
1293 //===----------------------------------------------------------------------===//
1295 llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAG &DAG,
1296 MachineBasicBlock *BB) {
1297 return new ScheduleDAGList(DAG, BB, DAG.getTarget(), true,
1298 new RegReductionPriorityQueue(),
1299 new HazardRecognizer());
1302 /// createTDListDAGScheduler - This creates a top-down list scheduler with the
1303 /// specified hazard recognizer.
1304 ScheduleDAG* llvm::createTDListDAGScheduler(SelectionDAG &DAG,
1305 MachineBasicBlock *BB,
1306 HazardRecognizer *HR) {
1307 return new ScheduleDAGList(DAG, BB, DAG.getTarget(), false,
1308 new LatencyPriorityQueue(),