1 //===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements bottom-up and top-down register pressure reduction list
11 // schedulers, using standard algorithms. The basic approach uses a priority
12 // queue of available nodes to schedule. One at a time, nodes are taken from
13 // the priority queue (thus in priority order), checked for legality to
14 // schedule, and emitted if legal.
16 //===----------------------------------------------------------------------===//
18 #define DEBUG_TYPE "pre-RA-sched"
19 #include "ScheduleDAGSDNodes.h"
20 #include "llvm/InlineAsm.h"
21 #include "llvm/CodeGen/SchedulerRegistry.h"
22 #include "llvm/CodeGen/SelectionDAGISel.h"
23 #include "llvm/Target/TargetRegisterInfo.h"
24 #include "llvm/Target/TargetData.h"
25 #include "llvm/Target/TargetMachine.h"
26 #include "llvm/Target/TargetInstrInfo.h"
27 #include "llvm/Target/TargetLowering.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/raw_ostream.h"
38 static cl::opt<bool> RegPressureAware("reg-pressure-aware-sched",
39 cl::init(false), cl::Hidden);
41 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
42 STATISTIC(NumUnfolds, "Number of nodes unfolded");
43 STATISTIC(NumDups, "Number of duplicated nodes");
44 STATISTIC(NumPRCopies, "Number of physical register copies");
46 static RegisterScheduler
47 burrListDAGScheduler("list-burr",
48 "Bottom-up register reduction list scheduling",
49 createBURRListDAGScheduler);
50 static RegisterScheduler
51 tdrListrDAGScheduler("list-tdrr",
52 "Top-down register reduction list scheduling",
53 createTDRRListDAGScheduler);
54 static RegisterScheduler
55 sourceListDAGScheduler("source",
56 "Similar to list-burr but schedules in source "
57 "order when possible",
58 createSourceListDAGScheduler);
60 static RegisterScheduler
61 hybridListDAGScheduler("list-hybrid",
62 "Bottom-up rr list scheduling which avoid stalls for "
63 "long latency instructions",
64 createHybridListDAGScheduler);
67 //===----------------------------------------------------------------------===//
68 /// ScheduleDAGRRList - The actual register reduction list scheduler
69 /// implementation. This supports both top-down and bottom-up scheduling.
71 class ScheduleDAGRRList : public ScheduleDAGSDNodes {
73 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
77 /// NeedLatency - True if the scheduler will make use of latency information.
81 /// AvailableQueue - The priority queue to use for the available SUnits.
82 SchedulingPriorityQueue *AvailableQueue;
84 /// LiveRegDefs - A set of physical registers and their definition
85 /// that are "live". These nodes must be scheduled before any other nodes that
86 /// modifies the registers can be scheduled.
88 std::vector<SUnit*> LiveRegDefs;
89 std::vector<unsigned> LiveRegCycles;
91 /// Topo - A topological ordering for SUnits which permits fast IsReachable
92 /// and similar queries.
93 ScheduleDAGTopologicalSort Topo;
96 ScheduleDAGRRList(MachineFunction &mf,
97 bool isbottomup, bool needlatency,
98 SchedulingPriorityQueue *availqueue)
99 : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency),
100 AvailableQueue(availqueue), Topo(SUnits) {
103 ~ScheduleDAGRRList() {
104 delete AvailableQueue;
109 /// IsReachable - Checks if SU is reachable from TargetSU.
110 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
111 return Topo.IsReachable(SU, TargetSU);
114 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
116 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
117 return Topo.WillCreateCycle(SU, TargetSU);
120 /// AddPred - adds a predecessor edge to SUnit SU.
121 /// This returns true if this is a new predecessor.
122 /// Updates the topological ordering if required.
123 void AddPred(SUnit *SU, const SDep &D) {
124 Topo.AddPred(SU, D.getSUnit());
128 /// RemovePred - removes a predecessor edge from SUnit SU.
129 /// This returns true if an edge was removed.
130 /// Updates the topological ordering if required.
131 void RemovePred(SUnit *SU, const SDep &D) {
132 Topo.RemovePred(SU, D.getSUnit());
137 void ReleasePred(SUnit *SU, const SDep *PredEdge);
138 void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
139 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
140 void ReleaseSuccessors(SUnit *SU);
141 void CapturePred(SDep *PredEdge);
142 void ScheduleNodeBottomUp(SUnit*, unsigned);
143 void ScheduleNodeTopDown(SUnit*, unsigned);
144 void UnscheduleNodeBottomUp(SUnit*);
145 void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
146 SUnit *CopyAndMoveSuccessors(SUnit*);
147 void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
148 const TargetRegisterClass*,
149 const TargetRegisterClass*,
150 SmallVector<SUnit*, 2>&);
151 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
152 void ListScheduleTopDown();
153 void ListScheduleBottomUp();
156 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
157 /// Updates the topological ordering if required.
158 SUnit *CreateNewSUnit(SDNode *N) {
159 unsigned NumSUnits = SUnits.size();
160 SUnit *NewNode = NewSUnit(N);
161 // Update the topological ordering.
162 if (NewNode->NodeNum >= NumSUnits)
163 Topo.InitDAGTopologicalSorting();
167 /// CreateClone - Creates a new SUnit from an existing one.
168 /// Updates the topological ordering if required.
169 SUnit *CreateClone(SUnit *N) {
170 unsigned NumSUnits = SUnits.size();
171 SUnit *NewNode = Clone(N);
172 // Update the topological ordering.
173 if (NewNode->NodeNum >= NumSUnits)
174 Topo.InitDAGTopologicalSorting();
178 /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
179 /// need actual latency information but the hybrid scheduler does.
180 bool ForceUnitLatencies() const {
184 } // end anonymous namespace
187 /// Schedule - Schedule the DAG using list scheduling.
188 void ScheduleDAGRRList::Schedule() {
190 << "********** List Scheduling BB#" << BB->getNumber()
194 LiveRegDefs.resize(TRI->getNumRegs(), NULL);
195 LiveRegCycles.resize(TRI->getNumRegs(), 0);
197 // Build the scheduling graph.
198 BuildSchedGraph(NULL);
200 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
201 SUnits[su].dumpAll(this));
202 Topo.InitDAGTopologicalSorting();
204 AvailableQueue->initNodes(SUnits);
206 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
208 ListScheduleBottomUp();
210 ListScheduleTopDown();
212 AvailableQueue->releaseState();
215 //===----------------------------------------------------------------------===//
216 // Bottom-Up Scheduling
217 //===----------------------------------------------------------------------===//
219 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
220 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
221 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
222 SUnit *PredSU = PredEdge->getSUnit();
225 if (PredSU->NumSuccsLeft == 0) {
226 dbgs() << "*** Scheduling failed! ***\n";
228 dbgs() << " has been released too many times!\n";
232 --PredSU->NumSuccsLeft;
234 if (!ForceUnitLatencies()) {
235 // Updating predecessor's height. This is now the cycle when the
236 // predecessor can be scheduled without causing a pipeline stall.
237 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
240 // If all the node's successors are scheduled, this node is ready
241 // to be scheduled. Ignore the special EntrySU node.
242 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
243 PredSU->isAvailable = true;
244 AvailableQueue->push(PredSU);
248 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
249 // Bottom up: release predecessors
250 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
252 ReleasePred(SU, &*I);
253 if (I->isAssignedRegDep()) {
254 // This is a physical register dependency and it's impossible or
255 // expensive to copy the register. Make sure nothing that can
256 // clobber the register is scheduled between the predecessor and
258 if (!LiveRegDefs[I->getReg()]) {
260 LiveRegDefs[I->getReg()] = I->getSUnit();
261 LiveRegCycles[I->getReg()] = CurCycle;
267 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
268 /// count of its predecessors. If a predecessor pending count is zero, add it to
269 /// the Available queue.
270 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
271 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
272 DEBUG(SU->dump(this));
275 if (CurCycle < SU->getHeight())
276 DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n");
279 // FIXME: Handle noop hazard.
280 SU->setHeightToAtLeast(CurCycle);
281 Sequence.push_back(SU);
283 AvailableQueue->ScheduledNode(SU);
285 ReleasePredecessors(SU, CurCycle);
287 // Release all the implicit physical register defs that are live.
288 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
290 if (I->isAssignedRegDep()) {
291 if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
292 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
293 assert(LiveRegDefs[I->getReg()] == SU &&
294 "Physical register dependency violated?");
296 LiveRegDefs[I->getReg()] = NULL;
297 LiveRegCycles[I->getReg()] = 0;
302 SU->isScheduled = true;
305 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
306 /// unscheduled, incrcease the succ left count of its predecessors. Remove
307 /// them from AvailableQueue if necessary.
308 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
309 SUnit *PredSU = PredEdge->getSUnit();
310 if (PredSU->isAvailable) {
311 PredSU->isAvailable = false;
312 if (!PredSU->isPending)
313 AvailableQueue->remove(PredSU);
316 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
317 ++PredSU->NumSuccsLeft;
320 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
321 /// its predecessor states to reflect the change.
322 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
323 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
324 DEBUG(SU->dump(this));
326 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
329 if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){
330 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
331 assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
332 "Physical register dependency violated?");
334 LiveRegDefs[I->getReg()] = NULL;
335 LiveRegCycles[I->getReg()] = 0;
339 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
341 if (I->isAssignedRegDep()) {
342 if (!LiveRegDefs[I->getReg()]) {
343 LiveRegDefs[I->getReg()] = SU;
346 if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
347 LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
351 SU->setHeightDirty();
352 SU->isScheduled = false;
353 SU->isAvailable = true;
354 AvailableQueue->push(SU);
355 AvailableQueue->UnscheduledNode(SU);
358 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
359 /// BTCycle in order to schedule a specific node.
360 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
361 unsigned &CurCycle) {
363 while (CurCycle > BtCycle) {
364 OldSU = Sequence.back();
366 if (SU->isSucc(OldSU))
367 // Don't try to remove SU from AvailableQueue.
368 SU->isAvailable = false;
369 UnscheduleNodeBottomUp(OldSU);
371 AvailableQueue->setCurCycle(CurCycle);
374 assert(!SU->isSucc(OldSU) && "Something is wrong!");
379 static bool isOperandOf(const SUnit *SU, SDNode *N) {
380 for (const SDNode *SUNode = SU->getNode(); SUNode;
381 SUNode = SUNode->getFlaggedNode()) {
382 if (SUNode->isOperandOf(N))
388 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
389 /// successors to the newly created node.
390 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
391 if (SU->getNode()->getFlaggedNode())
394 SDNode *N = SU->getNode();
399 bool TryUnfold = false;
400 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
401 EVT VT = N->getValueType(i);
404 else if (VT == MVT::Other)
407 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
408 const SDValue &Op = N->getOperand(i);
409 EVT VT = Op.getNode()->getValueType(Op.getResNo());
415 SmallVector<SDNode*, 2> NewNodes;
416 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
419 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
420 assert(NewNodes.size() == 2 && "Expected a load folding node!");
423 SDNode *LoadNode = NewNodes[0];
424 unsigned NumVals = N->getNumValues();
425 unsigned OldNumVals = SU->getNode()->getNumValues();
426 for (unsigned i = 0; i != NumVals; ++i)
427 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
428 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
429 SDValue(LoadNode, 1));
431 // LoadNode may already exist. This can happen when there is another
432 // load from the same location and producing the same type of value
433 // but it has different alignment or volatileness.
434 bool isNewLoad = true;
436 if (LoadNode->getNodeId() != -1) {
437 LoadSU = &SUnits[LoadNode->getNodeId()];
440 LoadSU = CreateNewSUnit(LoadNode);
441 LoadNode->setNodeId(LoadSU->NodeNum);
442 ComputeLatency(LoadSU);
445 SUnit *NewSU = CreateNewSUnit(N);
446 assert(N->getNodeId() == -1 && "Node already inserted!");
447 N->setNodeId(NewSU->NodeNum);
449 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
450 for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
451 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
452 NewSU->isTwoAddress = true;
456 if (TID.isCommutable())
457 NewSU->isCommutable = true;
458 ComputeLatency(NewSU);
460 // Record all the edges to and from the old SU, by category.
461 SmallVector<SDep, 4> ChainPreds;
462 SmallVector<SDep, 4> ChainSuccs;
463 SmallVector<SDep, 4> LoadPreds;
464 SmallVector<SDep, 4> NodePreds;
465 SmallVector<SDep, 4> NodeSuccs;
466 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
469 ChainPreds.push_back(*I);
470 else if (isOperandOf(I->getSUnit(), LoadNode))
471 LoadPreds.push_back(*I);
473 NodePreds.push_back(*I);
475 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
478 ChainSuccs.push_back(*I);
480 NodeSuccs.push_back(*I);
483 // Now assign edges to the newly-created nodes.
484 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
485 const SDep &Pred = ChainPreds[i];
486 RemovePred(SU, Pred);
488 AddPred(LoadSU, Pred);
490 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
491 const SDep &Pred = LoadPreds[i];
492 RemovePred(SU, Pred);
494 AddPred(LoadSU, Pred);
496 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
497 const SDep &Pred = NodePreds[i];
498 RemovePred(SU, Pred);
499 AddPred(NewSU, Pred);
501 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
502 SDep D = NodeSuccs[i];
503 SUnit *SuccDep = D.getSUnit();
505 RemovePred(SuccDep, D);
509 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
510 SDep D = ChainSuccs[i];
511 SUnit *SuccDep = D.getSUnit();
513 RemovePred(SuccDep, D);
520 // Add a data dependency to reflect that NewSU reads the value defined
522 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
525 AvailableQueue->addNode(LoadSU);
526 AvailableQueue->addNode(NewSU);
530 if (NewSU->NumSuccsLeft == 0) {
531 NewSU->isAvailable = true;
537 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
538 NewSU = CreateClone(SU);
540 // New SUnit has the exact same predecessors.
541 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
543 if (!I->isArtificial())
546 // Only copy scheduled successors. Cut them from old node's successor
547 // list and move them over.
548 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
549 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
551 if (I->isArtificial())
553 SUnit *SuccSU = I->getSUnit();
554 if (SuccSU->isScheduled) {
559 DelDeps.push_back(std::make_pair(SuccSU, D));
562 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
563 RemovePred(DelDeps[i].first, DelDeps[i].second);
565 AvailableQueue->updateNode(SU);
566 AvailableQueue->addNode(NewSU);
572 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
573 /// scheduled successors of the given SUnit to the last copy.
574 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
575 const TargetRegisterClass *DestRC,
576 const TargetRegisterClass *SrcRC,
577 SmallVector<SUnit*, 2> &Copies) {
578 SUnit *CopyFromSU = CreateNewSUnit(NULL);
579 CopyFromSU->CopySrcRC = SrcRC;
580 CopyFromSU->CopyDstRC = DestRC;
582 SUnit *CopyToSU = CreateNewSUnit(NULL);
583 CopyToSU->CopySrcRC = DestRC;
584 CopyToSU->CopyDstRC = SrcRC;
586 // Only copy scheduled successors. Cut them from old node's successor
587 // list and move them over.
588 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
589 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
591 if (I->isArtificial())
593 SUnit *SuccSU = I->getSUnit();
594 if (SuccSU->isScheduled) {
596 D.setSUnit(CopyToSU);
598 DelDeps.push_back(std::make_pair(SuccSU, *I));
601 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
602 RemovePred(DelDeps[i].first, DelDeps[i].second);
604 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
605 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
607 AvailableQueue->updateNode(SU);
608 AvailableQueue->addNode(CopyFromSU);
609 AvailableQueue->addNode(CopyToSU);
610 Copies.push_back(CopyFromSU);
611 Copies.push_back(CopyToSU);
616 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
617 /// definition of the specified node.
618 /// FIXME: Move to SelectionDAG?
619 static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
620 const TargetInstrInfo *TII) {
621 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
622 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
623 unsigned NumRes = TID.getNumDefs();
624 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
629 return N->getValueType(NumRes);
632 /// CheckForLiveRegDef - Return true and update live register vector if the
633 /// specified register def of the specified SUnit clobbers any "live" registers.
634 static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
635 std::vector<SUnit*> &LiveRegDefs,
636 SmallSet<unsigned, 4> &RegAdded,
637 SmallVector<unsigned, 4> &LRegs,
638 const TargetRegisterInfo *TRI) {
640 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
641 if (RegAdded.insert(Reg)) {
642 LRegs.push_back(Reg);
646 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
647 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
648 if (RegAdded.insert(*Alias)) {
649 LRegs.push_back(*Alias);
656 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
657 /// scheduling of the given node to satisfy live physical register dependencies.
658 /// If the specific node is the last one that's available to schedule, do
659 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
660 bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
661 SmallVector<unsigned, 4> &LRegs){
662 if (NumLiveRegs == 0)
665 SmallSet<unsigned, 4> RegAdded;
666 // If this node would clobber any "live" register, then it's not ready.
667 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
669 if (I->isAssignedRegDep())
670 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
671 RegAdded, LRegs, TRI);
674 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
675 if (Node->getOpcode() == ISD::INLINEASM) {
676 // Inline asm can clobber physical defs.
677 unsigned NumOps = Node->getNumOperands();
678 if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
679 --NumOps; // Ignore the flag operand.
681 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
683 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
684 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
686 ++i; // Skip the ID value.
687 if (InlineAsm::isRegDefKind(Flags) ||
688 InlineAsm::isRegDefEarlyClobberKind(Flags)) {
689 // Check for def of register or earlyclobber register.
690 for (; NumVals; --NumVals, ++i) {
691 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
692 if (TargetRegisterInfo::isPhysicalRegister(Reg))
693 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
701 if (!Node->isMachineOpcode())
703 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
704 if (!TID.ImplicitDefs)
706 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
707 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
709 return !LRegs.empty();
713 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
715 void ScheduleDAGRRList::ListScheduleBottomUp() {
716 unsigned CurCycle = 0;
718 // Release any predecessors of the special Exit node.
719 ReleasePredecessors(&ExitSU, CurCycle);
721 // Add root to Available queue.
722 if (!SUnits.empty()) {
723 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
724 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
725 RootSU->isAvailable = true;
726 AvailableQueue->push(RootSU);
729 // While Available queue is not empty, grab the node with the highest
730 // priority. If it is not ready put it back. Schedule the node.
731 SmallVector<SUnit*, 4> NotReady;
732 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
733 Sequence.reserve(SUnits.size());
734 while (!AvailableQueue->empty()) {
735 bool Delayed = false;
737 SUnit *CurSU = AvailableQueue->pop();
739 SmallVector<unsigned, 4> LRegs;
740 if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
743 LRegsMap.insert(std::make_pair(CurSU, LRegs));
745 CurSU->isPending = true; // This SU is not in AvailableQueue right now.
746 NotReady.push_back(CurSU);
747 CurSU = AvailableQueue->pop();
750 // All candidates are delayed due to live physical reg dependencies.
751 // Try backtracking, code duplication, or inserting cross class copies
753 if (Delayed && !CurSU) {
754 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
755 SUnit *TrySU = NotReady[i];
756 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
758 // Try unscheduling up to the point where it's safe to schedule
760 unsigned LiveCycle = CurCycle;
761 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
762 unsigned Reg = LRegs[j];
763 unsigned LCycle = LiveRegCycles[Reg];
764 LiveCycle = std::min(LiveCycle, LCycle);
766 SUnit *OldSU = Sequence[LiveCycle];
767 if (!WillCreateCycle(TrySU, OldSU)) {
768 BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
769 // Force the current node to be scheduled before the node that
770 // requires the physical reg dep.
771 if (OldSU->isAvailable) {
772 OldSU->isAvailable = false;
773 AvailableQueue->remove(OldSU);
775 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
776 /*Reg=*/0, /*isNormalMemory=*/false,
777 /*isMustAlias=*/false, /*isArtificial=*/true));
778 // If one or more successors has been unscheduled, then the current
779 // node is no longer avaialable. Schedule a successor that's now
780 // available instead.
781 if (!TrySU->isAvailable)
782 CurSU = AvailableQueue->pop();
785 TrySU->isPending = false;
786 NotReady.erase(NotReady.begin()+i);
793 // Can't backtrack. If it's too expensive to copy the value, then try
794 // duplicate the nodes that produces these "too expensive to copy"
795 // values to break the dependency. In case even that doesn't work,
796 // insert cross class copies.
797 // If it's not too expensive, i.e. cost != -1, issue copies.
798 SUnit *TrySU = NotReady[0];
799 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
800 assert(LRegs.size() == 1 && "Can't handle this yet!");
801 unsigned Reg = LRegs[0];
802 SUnit *LRDef = LiveRegDefs[Reg];
803 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
804 const TargetRegisterClass *RC =
805 TRI->getMinimalPhysRegClass(Reg, VT);
806 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
808 // If cross copy register class is null, then it must be possible copy
809 // the value directly. Do not try duplicate the def.
812 NewDef = CopyAndMoveSuccessors(LRDef);
816 // Issue copies, these can be expensive cross register class copies.
817 SmallVector<SUnit*, 2> Copies;
818 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
819 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
820 << " to SU #" << Copies.front()->NodeNum << "\n");
821 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
822 /*Reg=*/0, /*isNormalMemory=*/false,
823 /*isMustAlias=*/false,
824 /*isArtificial=*/true));
825 NewDef = Copies.back();
828 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
829 << " to SU #" << TrySU->NodeNum << "\n");
830 LiveRegDefs[Reg] = NewDef;
831 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
832 /*Reg=*/0, /*isNormalMemory=*/false,
833 /*isMustAlias=*/false,
834 /*isArtificial=*/true));
835 TrySU->isAvailable = false;
839 assert(CurSU && "Unable to resolve live physical register dependencies!");
842 // Add the nodes that aren't ready back onto the available list.
843 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
844 NotReady[i]->isPending = false;
845 // May no longer be available due to backtracking.
846 if (NotReady[i]->isAvailable)
847 AvailableQueue->push(NotReady[i]);
852 ScheduleNodeBottomUp(CurSU, CurCycle);
854 AvailableQueue->setCurCycle(CurCycle);
857 // Reverse the order if it is bottom up.
858 std::reverse(Sequence.begin(), Sequence.end());
861 VerifySchedule(isBottomUp);
865 //===----------------------------------------------------------------------===//
866 // Top-Down Scheduling
867 //===----------------------------------------------------------------------===//
869 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
870 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
871 void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
872 SUnit *SuccSU = SuccEdge->getSUnit();
875 if (SuccSU->NumPredsLeft == 0) {
876 dbgs() << "*** Scheduling failed! ***\n";
878 dbgs() << " has been released too many times!\n";
882 --SuccSU->NumPredsLeft;
884 // If all the node's predecessors are scheduled, this node is ready
885 // to be scheduled. Ignore the special ExitSU node.
886 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
887 SuccSU->isAvailable = true;
888 AvailableQueue->push(SuccSU);
892 void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
893 // Top down: release successors
894 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
896 assert(!I->isAssignedRegDep() &&
897 "The list-tdrr scheduler doesn't yet support physreg dependencies!");
899 ReleaseSucc(SU, &*I);
903 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
904 /// count of its successors. If a successor pending count is zero, add it to
905 /// the Available queue.
906 void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
907 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
908 DEBUG(SU->dump(this));
910 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
911 SU->setDepthToAtLeast(CurCycle);
912 Sequence.push_back(SU);
914 ReleaseSuccessors(SU);
915 SU->isScheduled = true;
916 AvailableQueue->ScheduledNode(SU);
919 /// ListScheduleTopDown - The main loop of list scheduling for top-down
921 void ScheduleDAGRRList::ListScheduleTopDown() {
922 unsigned CurCycle = 0;
923 AvailableQueue->setCurCycle(CurCycle);
925 // Release any successors of the special Entry node.
926 ReleaseSuccessors(&EntrySU);
928 // All leaves to Available queue.
929 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
930 // It is available if it has no predecessors.
931 if (SUnits[i].Preds.empty()) {
932 AvailableQueue->push(&SUnits[i]);
933 SUnits[i].isAvailable = true;
937 // While Available queue is not empty, grab the node with the highest
938 // priority. If it is not ready put it back. Schedule the node.
939 Sequence.reserve(SUnits.size());
940 while (!AvailableQueue->empty()) {
941 SUnit *CurSU = AvailableQueue->pop();
944 ScheduleNodeTopDown(CurSU, CurCycle);
946 AvailableQueue->setCurCycle(CurCycle);
950 VerifySchedule(isBottomUp);
955 //===----------------------------------------------------------------------===//
956 // RegReductionPriorityQueue Implementation
957 //===----------------------------------------------------------------------===//
959 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
960 // to reduce register pressure.
964 class RegReductionPriorityQueue;
966 /// Sorting functions for the Available queue.
967 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
968 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
969 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
970 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
972 bool operator()(const SUnit* left, const SUnit* right) const;
975 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
976 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
977 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
978 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
980 bool operator()(const SUnit* left, const SUnit* right) const;
983 struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
984 RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
985 src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
987 src_ls_rr_sort(const src_ls_rr_sort &RHS)
990 bool operator()(const SUnit* left, const SUnit* right) const;
993 struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
994 RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
995 hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
997 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
1000 bool operator()(const SUnit* left, const SUnit* right) const;
1002 } // end anonymous namespace
1004 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1005 /// Smaller number is the higher priority.
1007 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1008 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1009 if (SethiUllmanNumber != 0)
1010 return SethiUllmanNumber;
1013 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1015 if (I->isCtrl()) continue; // ignore chain preds
1016 SUnit *PredSU = I->getSUnit();
1017 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1018 if (PredSethiUllman > SethiUllmanNumber) {
1019 SethiUllmanNumber = PredSethiUllman;
1021 } else if (PredSethiUllman == SethiUllmanNumber)
1025 SethiUllmanNumber += Extra;
1027 if (SethiUllmanNumber == 0)
1028 SethiUllmanNumber = 1;
1030 return SethiUllmanNumber;
1035 class RegReductionPriorityQueue : public SchedulingPriorityQueue {
1036 std::vector<SUnit*> Queue;
1038 unsigned CurQueueId;
1042 // SUnits - The SUnits for the current graph.
1043 std::vector<SUnit> *SUnits;
1045 MachineFunction &MF;
1046 const TargetInstrInfo *TII;
1047 const TargetRegisterInfo *TRI;
1048 const TargetLowering *TLI;
1049 ScheduleDAGRRList *scheduleDAG;
1051 // SethiUllmanNumbers - The SethiUllman number for each node.
1052 std::vector<unsigned> SethiUllmanNumbers;
1054 /// RegPressure - Tracking current reg pressure per register class.
1056 std::vector<unsigned> RegPressure;
1058 /// RegLimit - Tracking the number of allocatable registers per register
1060 std::vector<unsigned> RegLimit;
1063 RegReductionPriorityQueue(MachineFunction &mf,
1065 const TargetInstrInfo *tii,
1066 const TargetRegisterInfo *tri,
1067 const TargetLowering *tli)
1068 : Picker(this), CurQueueId(0), isBottomUp(isbottomup),
1069 MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
1070 unsigned NumRC = TRI->getNumRegClasses();
1071 RegLimit.resize(NumRC);
1072 RegPressure.resize(NumRC);
1073 std::fill(RegLimit.begin(), RegLimit.end(), 0);
1074 std::fill(RegPressure.begin(), RegPressure.end(), 0);
1075 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1076 E = TRI->regclass_end(); I != E; ++I)
1077 RegLimit[(*I)->getID()] = tri->getAllocatableSet(MF, *I).count() - 1;
1080 void initNodes(std::vector<SUnit> &sunits) {
1082 // Add pseudo dependency edges for two-address nodes.
1083 AddPseudoTwoAddrDeps();
1084 // Reroute edges to nodes with multiple uses.
1085 PrescheduleNodesWithMultipleUses();
1086 // Calculate node priorities.
1087 CalculateSethiUllmanNumbers();
1090 void addNode(const SUnit *SU) {
1091 unsigned SUSize = SethiUllmanNumbers.size();
1092 if (SUnits->size() > SUSize)
1093 SethiUllmanNumbers.resize(SUSize*2, 0);
1094 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1097 void updateNode(const SUnit *SU) {
1098 SethiUllmanNumbers[SU->NodeNum] = 0;
1099 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1102 void releaseState() {
1104 SethiUllmanNumbers.clear();
1105 std::fill(RegPressure.begin(), RegPressure.end(), 0);
1108 unsigned getNodePriority(const SUnit *SU) const {
1109 assert(SU->NodeNum < SethiUllmanNumbers.size());
1110 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1111 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1112 // CopyToReg should be close to its uses to facilitate coalescing and
1115 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1116 Opc == TargetOpcode::SUBREG_TO_REG ||
1117 Opc == TargetOpcode::INSERT_SUBREG)
1118 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1119 // close to their uses to facilitate coalescing.
1121 if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1122 // If SU does not have a register use, i.e. it doesn't produce a value
1123 // that would be consumed (e.g. store), then it terminates a chain of
1124 // computation. Give it a large SethiUllman number so it will be
1125 // scheduled right before its predecessors that it doesn't lengthen
1126 // their live ranges.
1128 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1129 // If SU does not have a register def, schedule it close to its uses
1130 // because it does not lengthen any live ranges.
1132 return SethiUllmanNumbers[SU->NodeNum];
1135 unsigned getNodeOrdering(const SUnit *SU) const {
1136 return scheduleDAG->DAG->GetOrdering(SU->getNode());
1139 bool empty() const { return Queue.empty(); }
1141 void push(SUnit *U) {
1142 assert(!U->NodeQueueId && "Node in the queue already");
1143 U->NodeQueueId = ++CurQueueId;
1148 if (empty()) return NULL;
1149 std::vector<SUnit *>::iterator Best = Queue.begin();
1150 for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
1151 E = Queue.end(); I != E; ++I)
1152 if (Picker(*Best, *I))
1155 if (Best != prior(Queue.end()))
1156 std::swap(*Best, Queue.back());
1162 void remove(SUnit *SU) {
1163 assert(!Queue.empty() && "Queue is empty!");
1164 assert(SU->NodeQueueId != 0 && "Not in queue!");
1165 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1167 if (I != prior(Queue.end()))
1168 std::swap(*I, Queue.back());
1170 SU->NodeQueueId = 0;
1173 bool HighRegPressure(const SUnit *SU) const {
1177 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1181 SUnit *PredSU = I->getSUnit();
1182 const SDNode *PN = PredSU->getNode();
1183 if (!PN->isMachineOpcode()) {
1184 if (PN->getOpcode() == ISD::CopyToReg) {
1185 EVT VT = PN->getOperand(1).getValueType();
1186 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1187 unsigned Cost = TLI->getRepRegClassCostFor(VT);
1188 if (RegLimit[RCId] < (RegPressure[RCId] + Cost))
1193 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1194 for (unsigned i = 0; i != NumDefs; ++i) {
1195 EVT VT = PN->getValueType(i);
1196 if (!PN->hasAnyUseOfValue(i))
1198 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1199 unsigned Cost = TLI->getRepRegClassCostFor(VT);
1200 // Check if this increases register pressure of the specific register
1201 // class to the point where it would cause spills.
1202 if (RegLimit[RCId] < (RegPressure[RCId] + Cost))
1210 void OpenPredLives(SUnit *SU) {
1211 const SDNode *N = SU->getNode();
1212 if (!N->isMachineOpcode())
1214 unsigned Opc = N->getMachineOpcode();
1215 if (Opc == TargetOpcode::COPY_TO_REGCLASS ||
1216 Opc == TargetOpcode::REG_SEQUENCE ||
1217 Opc == TargetOpcode::IMPLICIT_DEF)
1220 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1224 SUnit *PredSU = I->getSUnit();
1225 if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
1227 const SDNode *PN = PredSU->getNode();
1228 if (!PN->isMachineOpcode()) {
1229 if (PN->getOpcode() == ISD::CopyToReg) {
1230 EVT VT = PN->getOperand(1).getValueType();
1231 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1232 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1236 unsigned POpc = PN->getMachineOpcode();
1237 if (POpc == TargetOpcode::IMPLICIT_DEF)
1239 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1240 for (unsigned i = 0; i != NumDefs; ++i) {
1241 EVT VT = PN->getValueType(i);
1242 if (!PN->hasAnyUseOfValue(i))
1244 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1245 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1251 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1252 for (unsigned i = 0; i != NumDefs; ++i) {
1253 EVT VT = N->getValueType(i);
1254 if (!N->hasAnyUseOfValue(i))
1256 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1257 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1258 // Register pressure tracking is imprecise. This can happen.
1259 RegPressure[RCId] = 0;
1261 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1265 void ClosePredLives(SUnit *SU) {
1266 const SDNode *N = SU->getNode();
1267 if (!N->isMachineOpcode())
1269 unsigned Opc = N->getMachineOpcode();
1270 if (Opc == TargetOpcode::COPY_TO_REGCLASS ||
1271 Opc == TargetOpcode::REG_SEQUENCE ||
1272 Opc == TargetOpcode::IMPLICIT_DEF)
1275 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1279 SUnit *PredSU = I->getSUnit();
1280 if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
1282 const SDNode *PN = PredSU->getNode();
1283 if (!PN->isMachineOpcode()) {
1284 if (PN->getOpcode() == ISD::CopyToReg) {
1285 EVT VT = PN->getOperand(1).getValueType();
1286 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1287 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1291 unsigned POpc = PN->getMachineOpcode();
1292 if (POpc == TargetOpcode::IMPLICIT_DEF)
1294 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1295 for (unsigned i = 0; i != NumDefs; ++i) {
1296 EVT VT = PN->getValueType(i);
1297 if (!PN->hasAnyUseOfValue(i))
1299 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1300 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1301 // Register pressure tracking is imprecise. This can happen.
1302 RegPressure[RCId] = 0;
1304 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1310 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1311 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1312 EVT VT = N->getValueType(i);
1313 if (VT == MVT::Flag || VT == MVT::Other)
1315 if (!N->hasAnyUseOfValue(i))
1317 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1318 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1322 void ScheduledNode(SUnit *SU) {
1323 if (!TLI || !isBottomUp)
1329 void UnscheduledNode(SUnit *SU) {
1330 if (!TLI || !isBottomUp)
1336 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1337 scheduleDAG = scheduleDag;
1340 void dumpRegPressure() const {
1341 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1342 E = TRI->regclass_end(); I != E; ++I) {
1343 const TargetRegisterClass *RC = *I;
1344 unsigned Id = RC->getID();
1345 unsigned RP = RegPressure[Id];
1347 DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1353 bool canClobber(const SUnit *SU, const SUnit *Op);
1354 void AddPseudoTwoAddrDeps();
1355 void PrescheduleNodesWithMultipleUses();
1356 void CalculateSethiUllmanNumbers();
1359 typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1360 BURegReductionPriorityQueue;
1362 typedef RegReductionPriorityQueue<td_ls_rr_sort>
1363 TDRegReductionPriorityQueue;
1365 typedef RegReductionPriorityQueue<src_ls_rr_sort>
1366 SrcRegReductionPriorityQueue;
1368 typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1369 HybridBURRPriorityQueue;
1372 /// closestSucc - Returns the scheduled cycle of the successor which is
1373 /// closest to the current cycle.
1374 static unsigned closestSucc(const SUnit *SU) {
1375 unsigned MaxHeight = 0;
1376 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1378 if (I->isCtrl()) continue; // ignore chain succs
1379 unsigned Height = I->getSUnit()->getHeight();
1380 // If there are bunch of CopyToRegs stacked up, they should be considered
1381 // to be at the same position.
1382 if (I->getSUnit()->getNode() &&
1383 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1384 Height = closestSucc(I->getSUnit())+1;
1385 if (Height > MaxHeight)
1391 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
1392 /// for scratch registers, i.e. number of data dependencies.
1393 static unsigned calcMaxScratches(const SUnit *SU) {
1394 unsigned Scratches = 0;
1395 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1397 if (I->isCtrl()) continue; // ignore chain preds
1403 template <typename RRSort>
1404 static bool BURRSort(const SUnit *left, const SUnit *right,
1405 const RegReductionPriorityQueue<RRSort> *SPQ) {
1406 unsigned LPriority = SPQ->getNodePriority(left);
1407 unsigned RPriority = SPQ->getNodePriority(right);
1408 if (LPriority != RPriority)
1409 return LPriority > RPriority;
1411 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1416 // and the following instructions are both ready.
1420 // Then schedule t2 = op first.
1427 // This creates more short live intervals.
1428 unsigned LDist = closestSucc(left);
1429 unsigned RDist = closestSucc(right);
1431 return LDist < RDist;
1433 // How many registers becomes live when the node is scheduled.
1434 unsigned LScratch = calcMaxScratches(left);
1435 unsigned RScratch = calcMaxScratches(right);
1436 if (LScratch != RScratch)
1437 return LScratch > RScratch;
1439 if (left->getHeight() != right->getHeight())
1440 return left->getHeight() > right->getHeight();
1442 if (left->getDepth() != right->getDepth())
1443 return left->getDepth() < right->getDepth();
1445 assert(left->NodeQueueId && right->NodeQueueId &&
1446 "NodeQueueId cannot be zero");
1447 return (left->NodeQueueId > right->NodeQueueId);
1451 bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1452 return BURRSort(left, right, SPQ);
1455 // Source order, otherwise bottom up.
1456 bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1457 unsigned LOrder = SPQ->getNodeOrdering(left);
1458 unsigned ROrder = SPQ->getNodeOrdering(right);
1460 // Prefer an ordering where the lower the non-zero order number, the higher
1462 if ((LOrder || ROrder) && LOrder != ROrder)
1463 return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
1465 return BURRSort(left, right, SPQ);
1468 bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
1469 bool LHigh = SPQ->HighRegPressure(left);
1470 bool RHigh = SPQ->HighRegPressure(right);
1471 if (LHigh && !RHigh)
1473 else if (!LHigh && RHigh)
1475 else if (!LHigh && !RHigh) {
1476 // Low register pressure situation, schedule for latency if possible.
1477 bool LStall = left->SchedulingPref == Sched::Latency &&
1478 SPQ->getCurCycle() < left->getHeight();
1479 bool RStall = right->SchedulingPref == Sched::Latency &&
1480 SPQ->getCurCycle() < right->getHeight();
1481 // If scheduling one of the node will cause a pipeline stall, delay it.
1482 // If scheduling either one of the node will cause a pipeline stall, sort
1483 // them according to their height.
1484 // If neither will cause a pipeline stall, try to reduce register pressure.
1488 if (left->getHeight() != right->getHeight())
1489 return left->getHeight() > right->getHeight();
1493 // If either node is scheduling for latency, sort them by height and latency
1495 if (left->SchedulingPref == Sched::Latency ||
1496 right->SchedulingPref == Sched::Latency) {
1497 if (left->getHeight() != right->getHeight())
1498 return left->getHeight() > right->getHeight();
1499 if (left->Latency != right->Latency)
1500 return left->Latency > right->Latency;
1504 return BURRSort(left, right, SPQ);
1509 RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
1510 if (SU->isTwoAddress) {
1511 unsigned Opc = SU->getNode()->getMachineOpcode();
1512 const TargetInstrDesc &TID = TII->get(Opc);
1513 unsigned NumRes = TID.getNumDefs();
1514 unsigned NumOps = TID.getNumOperands() - NumRes;
1515 for (unsigned i = 0; i != NumOps; ++i) {
1516 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
1517 SDNode *DU = SU->getNode()->getOperand(i).getNode();
1518 if (DU->getNodeId() != -1 &&
1519 Op->OrigNode == &(*SUnits)[DU->getNodeId()])
1527 /// hasCopyToRegUse - Return true if SU has a value successor that is a
1529 static bool hasCopyToRegUse(const SUnit *SU) {
1530 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1532 if (I->isCtrl()) continue;
1533 const SUnit *SuccSU = I->getSUnit();
1534 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
1540 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1541 /// physical register defs.
1542 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
1543 const TargetInstrInfo *TII,
1544 const TargetRegisterInfo *TRI) {
1545 SDNode *N = SuccSU->getNode();
1546 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1547 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
1548 assert(ImpDefs && "Caller should check hasPhysRegDefs");
1549 for (const SDNode *SUNode = SU->getNode(); SUNode;
1550 SUNode = SUNode->getFlaggedNode()) {
1551 if (!SUNode->isMachineOpcode())
1553 const unsigned *SUImpDefs =
1554 TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
1557 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1558 EVT VT = N->getValueType(i);
1559 if (VT == MVT::Flag || VT == MVT::Other)
1561 if (!N->hasAnyUseOfValue(i))
1563 unsigned Reg = ImpDefs[i - NumDefs];
1564 for (;*SUImpDefs; ++SUImpDefs) {
1565 unsigned SUReg = *SUImpDefs;
1566 if (TRI->regsOverlap(Reg, SUReg))
1574 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
1575 /// are not handled well by the general register pressure reduction
1576 /// heuristics. When presented with code like this:
1585 /// the heuristics tend to push the store up, but since the
1586 /// operand of the store has another use (U), this would increase
1587 /// the length of that other use (the U->N edge).
1589 /// This function transforms code like the above to route U's
1590 /// dependence through the store when possible, like this:
1601 /// This results in the store being scheduled immediately
1602 /// after N, which shortens the U->N live range, reducing
1603 /// register pressure.
1606 void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
1607 // Visit all the nodes in topological order, working top-down.
1608 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1609 SUnit *SU = &(*SUnits)[i];
1610 // For now, only look at nodes with no data successors, such as stores.
1611 // These are especially important, due to the heuristics in
1612 // getNodePriority for nodes with no data successors.
1613 if (SU->NumSuccs != 0)
1615 // For now, only look at nodes with exactly one data predecessor.
1616 if (SU->NumPreds != 1)
1618 // Avoid prescheduling copies to virtual registers, which don't behave
1619 // like other nodes from the perspective of scheduling heuristics.
1620 if (SDNode *N = SU->getNode())
1621 if (N->getOpcode() == ISD::CopyToReg &&
1622 TargetRegisterInfo::isVirtualRegister
1623 (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1626 // Locate the single data predecessor.
1628 for (SUnit::const_pred_iterator II = SU->Preds.begin(),
1629 EE = SU->Preds.end(); II != EE; ++II)
1630 if (!II->isCtrl()) {
1631 PredSU = II->getSUnit();
1636 // Don't rewrite edges that carry physregs, because that requires additional
1637 // support infrastructure.
1638 if (PredSU->hasPhysRegDefs)
1640 // Short-circuit the case where SU is PredSU's only data successor.
1641 if (PredSU->NumSuccs == 1)
1643 // Avoid prescheduling to copies from virtual registers, which don't behave
1644 // like other nodes from the perspective of scheduling // heuristics.
1645 if (SDNode *N = SU->getNode())
1646 if (N->getOpcode() == ISD::CopyFromReg &&
1647 TargetRegisterInfo::isVirtualRegister
1648 (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1651 // Perform checks on the successors of PredSU.
1652 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
1653 EE = PredSU->Succs.end(); II != EE; ++II) {
1654 SUnit *PredSuccSU = II->getSUnit();
1655 if (PredSuccSU == SU) continue;
1656 // If PredSU has another successor with no data successors, for
1657 // now don't attempt to choose either over the other.
1658 if (PredSuccSU->NumSuccs == 0)
1659 goto outer_loop_continue;
1660 // Don't break physical register dependencies.
1661 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
1662 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
1663 goto outer_loop_continue;
1664 // Don't introduce graph cycles.
1665 if (scheduleDAG->IsReachable(SU, PredSuccSU))
1666 goto outer_loop_continue;
1669 // Ok, the transformation is safe and the heuristics suggest it is
1670 // profitable. Update the graph.
1671 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum
1672 << " next to PredSU #" << PredSU->NodeNum
1673 << " to guide scheduling in the presence of multiple uses\n");
1674 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
1675 SDep Edge = PredSU->Succs[i];
1676 assert(!Edge.isAssignedRegDep());
1677 SUnit *SuccSU = Edge.getSUnit();
1679 Edge.setSUnit(PredSU);
1680 scheduleDAG->RemovePred(SuccSU, Edge);
1681 scheduleDAG->AddPred(SU, Edge);
1683 scheduleDAG->AddPred(SuccSU, Edge);
1687 outer_loop_continue:;
1691 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1692 /// it as a def&use operand. Add a pseudo control edge from it to the other
1693 /// node (if it won't create a cycle) so the two-address one will be scheduled
1694 /// first (lower in the schedule). If both nodes are two-address, favor the
1695 /// one that has a CopyToReg use (more likely to be a loop induction update).
1696 /// If both are two-address, but one is commutable while the other is not
1697 /// commutable, favor the one that's not commutable.
1699 void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1700 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1701 SUnit *SU = &(*SUnits)[i];
1702 if (!SU->isTwoAddress)
1705 SDNode *Node = SU->getNode();
1706 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
1709 unsigned Opc = Node->getMachineOpcode();
1710 const TargetInstrDesc &TID = TII->get(Opc);
1711 unsigned NumRes = TID.getNumDefs();
1712 unsigned NumOps = TID.getNumOperands() - NumRes;
1713 for (unsigned j = 0; j != NumOps; ++j) {
1714 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
1716 SDNode *DU = SU->getNode()->getOperand(j).getNode();
1717 if (DU->getNodeId() == -1)
1719 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
1720 if (!DUSU) continue;
1721 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
1722 E = DUSU->Succs.end(); I != E; ++I) {
1723 if (I->isCtrl()) continue;
1724 SUnit *SuccSU = I->getSUnit();
1727 // Be conservative. Ignore if nodes aren't at roughly the same
1728 // depth and height.
1729 if (SuccSU->getHeight() < SU->getHeight() &&
1730 (SU->getHeight() - SuccSU->getHeight()) > 1)
1732 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
1733 // constrains whatever is using the copy, instead of the copy
1734 // itself. In the case that the copy is coalesced, this
1735 // preserves the intent of the pseudo two-address heurietics.
1736 while (SuccSU->Succs.size() == 1 &&
1737 SuccSU->getNode()->isMachineOpcode() &&
1738 SuccSU->getNode()->getMachineOpcode() ==
1739 TargetOpcode::COPY_TO_REGCLASS)
1740 SuccSU = SuccSU->Succs.front().getSUnit();
1741 // Don't constrain non-instruction nodes.
1742 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
1744 // Don't constrain nodes with physical register defs if the
1745 // predecessor can clobber them.
1746 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
1747 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
1750 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
1751 // these may be coalesced away. We want them close to their uses.
1752 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
1753 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
1754 SuccOpc == TargetOpcode::INSERT_SUBREG ||
1755 SuccOpc == TargetOpcode::SUBREG_TO_REG)
1757 if ((!canClobber(SuccSU, DUSU) ||
1758 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
1759 (!SU->isCommutable && SuccSU->isCommutable)) &&
1760 !scheduleDAG->IsReachable(SuccSU, SU)) {
1761 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
1762 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
1763 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
1764 /*Reg=*/0, /*isNormalMemory=*/false,
1765 /*isMustAlias=*/false,
1766 /*isArtificial=*/true));
1773 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1774 /// scheduling units.
1776 void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1777 SethiUllmanNumbers.assign(SUnits->size(), 0);
1779 for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1780 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1783 /// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1784 /// predecessors of the successors of the SUnit SU. Stop when the provided
1785 /// limit is exceeded.
1786 static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
1789 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1791 const SUnit *SuccSU = I->getSUnit();
1792 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1793 EE = SuccSU->Preds.end(); II != EE; ++II) {
1794 SUnit *PredSU = II->getSUnit();
1795 if (!PredSU->isScheduled)
1805 bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1806 unsigned LPriority = SPQ->getNodePriority(left);
1807 unsigned RPriority = SPQ->getNodePriority(right);
1808 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
1809 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
1810 bool LIsFloater = LIsTarget && left->NumPreds == 0;
1811 bool RIsFloater = RIsTarget && right->NumPreds == 0;
1812 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
1813 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
1815 if (left->NumSuccs == 0 && right->NumSuccs != 0)
1817 else if (left->NumSuccs != 0 && right->NumSuccs == 0)
1824 if (left->NumSuccs == 1)
1826 if (right->NumSuccs == 1)
1829 if (LPriority+LBonus != RPriority+RBonus)
1830 return LPriority+LBonus < RPriority+RBonus;
1832 if (left->getDepth() != right->getDepth())
1833 return left->getDepth() < right->getDepth();
1835 if (left->NumSuccsLeft != right->NumSuccsLeft)
1836 return left->NumSuccsLeft > right->NumSuccsLeft;
1838 assert(left->NodeQueueId && right->NodeQueueId &&
1839 "NodeQueueId cannot be zero");
1840 return (left->NodeQueueId > right->NodeQueueId);
1843 //===----------------------------------------------------------------------===//
1844 // Public Constructor Functions
1845 //===----------------------------------------------------------------------===//
1847 llvm::ScheduleDAGSDNodes *
1848 llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1849 const TargetMachine &TM = IS->TM;
1850 const TargetInstrInfo *TII = TM.getInstrInfo();
1851 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1853 BURegReductionPriorityQueue *PQ =
1854 new BURegReductionPriorityQueue(*IS->MF, true, TII, TRI, 0);
1855 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
1856 PQ->setScheduleDAG(SD);
1860 llvm::ScheduleDAGSDNodes *
1861 llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1862 const TargetMachine &TM = IS->TM;
1863 const TargetInstrInfo *TII = TM.getInstrInfo();
1864 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1866 TDRegReductionPriorityQueue *PQ =
1867 new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
1868 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
1869 PQ->setScheduleDAG(SD);
1873 llvm::ScheduleDAGSDNodes *
1874 llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1875 const TargetMachine &TM = IS->TM;
1876 const TargetInstrInfo *TII = TM.getInstrInfo();
1877 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1879 SrcRegReductionPriorityQueue *PQ =
1880 new SrcRegReductionPriorityQueue(*IS->MF, true, TII, TRI, 0);
1881 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
1882 PQ->setScheduleDAG(SD);
1886 llvm::ScheduleDAGSDNodes *
1887 llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1888 const TargetMachine &TM = IS->TM;
1889 const TargetInstrInfo *TII = TM.getInstrInfo();
1890 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1891 const TargetLowering *TLI = &IS->getTargetLowering();
1893 HybridBURRPriorityQueue *PQ =
1894 new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI,
1895 (RegPressureAware ? TLI : 0));
1896 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
1897 PQ->setScheduleDAG(SD);