1 //===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements bottom-up and top-down register pressure reduction list
11 // schedulers, using standard algorithms. The basic approach uses a priority
12 // queue of available nodes to schedule. One at a time, nodes are taken from
13 // the priority queue (thus in priority order), checked for legality to
14 // schedule, and emitted if legal.
16 //===----------------------------------------------------------------------===//
18 #define DEBUG_TYPE "pre-RA-sched"
19 #include "ScheduleDAGSDNodes.h"
20 #include "llvm/InlineAsm.h"
21 #include "llvm/CodeGen/SchedulerRegistry.h"
22 #include "llvm/CodeGen/SelectionDAGISel.h"
23 #include "llvm/Target/TargetRegisterInfo.h"
24 #include "llvm/Target/TargetData.h"
25 #include "llvm/Target/TargetMachine.h"
26 #include "llvm/Target/TargetInstrInfo.h"
27 #include "llvm/ADT/SmallSet.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/raw_ostream.h"
36 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
37 STATISTIC(NumUnfolds, "Number of nodes unfolded");
38 STATISTIC(NumDups, "Number of duplicated nodes");
39 STATISTIC(NumPRCopies, "Number of physical register copies");
41 static RegisterScheduler
42 burrListDAGScheduler("list-burr",
43 "Bottom-up register reduction list scheduling",
44 createBURRListDAGScheduler);
45 static RegisterScheduler
46 tdrListrDAGScheduler("list-tdrr",
47 "Top-down register reduction list scheduling",
48 createTDRRListDAGScheduler);
49 static RegisterScheduler
50 sourceListDAGScheduler("source",
51 "Similar to list-burr but schedules in source "
52 "order when possible",
53 createSourceListDAGScheduler);
55 static RegisterScheduler
56 hybridListDAGScheduler("list-hybrid",
57 "Bottom-up rr list scheduling which avoid stalls for "
58 "long latency instructions",
59 createHybridListDAGScheduler);
62 //===----------------------------------------------------------------------===//
63 /// ScheduleDAGRRList - The actual register reduction list scheduler
64 /// implementation. This supports both top-down and bottom-up scheduling.
66 class ScheduleDAGRRList : public ScheduleDAGSDNodes {
68 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
72 /// NeedLatency - True if the scheduler will make use of latency information.
76 /// AvailableQueue - The priority queue to use for the available SUnits.
77 SchedulingPriorityQueue *AvailableQueue;
79 /// LiveRegDefs - A set of physical registers and their definition
80 /// that are "live". These nodes must be scheduled before any other nodes that
81 /// modifies the registers can be scheduled.
83 std::vector<SUnit*> LiveRegDefs;
84 std::vector<unsigned> LiveRegCycles;
86 /// Topo - A topological ordering for SUnits which permits fast IsReachable
87 /// and similar queries.
88 ScheduleDAGTopologicalSort Topo;
91 ScheduleDAGRRList(MachineFunction &mf,
92 bool isbottomup, bool needlatency,
93 SchedulingPriorityQueue *availqueue)
94 : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency),
95 AvailableQueue(availqueue), Topo(SUnits) {
98 ~ScheduleDAGRRList() {
99 delete AvailableQueue;
104 /// IsReachable - Checks if SU is reachable from TargetSU.
105 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
106 return Topo.IsReachable(SU, TargetSU);
109 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
111 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
112 return Topo.WillCreateCycle(SU, TargetSU);
115 /// AddPred - adds a predecessor edge to SUnit SU.
116 /// This returns true if this is a new predecessor.
117 /// Updates the topological ordering if required.
118 void AddPred(SUnit *SU, const SDep &D) {
119 Topo.AddPred(SU, D.getSUnit());
123 /// RemovePred - removes a predecessor edge from SUnit SU.
124 /// This returns true if an edge was removed.
125 /// Updates the topological ordering if required.
126 void RemovePred(SUnit *SU, const SDep &D) {
127 Topo.RemovePred(SU, D.getSUnit());
132 void ReleasePred(SUnit *SU, const SDep *PredEdge);
133 void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
134 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
135 void ReleaseSuccessors(SUnit *SU);
136 void CapturePred(SDep *PredEdge);
137 void ScheduleNodeBottomUp(SUnit*, unsigned);
138 void ScheduleNodeTopDown(SUnit*, unsigned);
139 void UnscheduleNodeBottomUp(SUnit*);
140 void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
141 SUnit *CopyAndMoveSuccessors(SUnit*);
142 void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
143 const TargetRegisterClass*,
144 const TargetRegisterClass*,
145 SmallVector<SUnit*, 2>&);
146 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
147 void ListScheduleTopDown();
148 void ListScheduleBottomUp();
151 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
152 /// Updates the topological ordering if required.
153 SUnit *CreateNewSUnit(SDNode *N) {
154 unsigned NumSUnits = SUnits.size();
155 SUnit *NewNode = NewSUnit(N);
156 // Update the topological ordering.
157 if (NewNode->NodeNum >= NumSUnits)
158 Topo.InitDAGTopologicalSorting();
162 /// CreateClone - Creates a new SUnit from an existing one.
163 /// Updates the topological ordering if required.
164 SUnit *CreateClone(SUnit *N) {
165 unsigned NumSUnits = SUnits.size();
166 SUnit *NewNode = Clone(N);
167 // Update the topological ordering.
168 if (NewNode->NodeNum >= NumSUnits)
169 Topo.InitDAGTopologicalSorting();
173 /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
174 /// need actual latency information but the hybrid scheduler does.
175 bool ForceUnitLatencies() const {
179 } // end anonymous namespace
182 /// Schedule - Schedule the DAG using list scheduling.
183 void ScheduleDAGRRList::Schedule() {
184 DEBUG(dbgs() << "********** List Scheduling **********\n");
187 LiveRegDefs.resize(TRI->getNumRegs(), NULL);
188 LiveRegCycles.resize(TRI->getNumRegs(), 0);
190 // Build the scheduling graph.
191 BuildSchedGraph(NULL);
193 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
194 SUnits[su].dumpAll(this));
195 Topo.InitDAGTopologicalSorting();
197 AvailableQueue->initNodes(SUnits);
199 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
201 ListScheduleBottomUp();
203 ListScheduleTopDown();
205 AvailableQueue->releaseState();
208 //===----------------------------------------------------------------------===//
209 // Bottom-Up Scheduling
210 //===----------------------------------------------------------------------===//
212 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
213 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
214 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
215 SUnit *PredSU = PredEdge->getSUnit();
218 if (PredSU->NumSuccsLeft == 0) {
219 dbgs() << "*** Scheduling failed! ***\n";
221 dbgs() << " has been released too many times!\n";
225 --PredSU->NumSuccsLeft;
227 if (!ForceUnitLatencies()) {
228 // Updating predecessor's height. This is now the cycle when the
229 // predecessor can be scheduled without causing a pipeline stall.
230 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
233 // If all the node's successors are scheduled, this node is ready
234 // to be scheduled. Ignore the special EntrySU node.
235 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
236 PredSU->isAvailable = true;
237 AvailableQueue->push(PredSU);
241 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
242 // Bottom up: release predecessors
243 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
245 ReleasePred(SU, &*I);
246 if (I->isAssignedRegDep()) {
247 // This is a physical register dependency and it's impossible or
248 // expensive to copy the register. Make sure nothing that can
249 // clobber the register is scheduled between the predecessor and
251 if (!LiveRegDefs[I->getReg()]) {
253 LiveRegDefs[I->getReg()] = I->getSUnit();
254 LiveRegCycles[I->getReg()] = CurCycle;
260 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
261 /// count of its predecessors. If a predecessor pending count is zero, add it to
262 /// the Available queue.
263 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
264 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
265 DEBUG(SU->dump(this));
268 if (CurCycle < SU->getHeight())
269 DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n");
272 // FIXME: Handle noop hazard.
273 SU->setHeightToAtLeast(CurCycle);
274 Sequence.push_back(SU);
276 ReleasePredecessors(SU, CurCycle);
278 // Release all the implicit physical register defs that are live.
279 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
281 if (I->isAssignedRegDep()) {
282 if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
283 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
284 assert(LiveRegDefs[I->getReg()] == SU &&
285 "Physical register dependency violated?");
287 LiveRegDefs[I->getReg()] = NULL;
288 LiveRegCycles[I->getReg()] = 0;
293 SU->isScheduled = true;
294 AvailableQueue->ScheduledNode(SU);
297 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
298 /// unscheduled, incrcease the succ left count of its predecessors. Remove
299 /// them from AvailableQueue if necessary.
300 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
301 SUnit *PredSU = PredEdge->getSUnit();
302 if (PredSU->isAvailable) {
303 PredSU->isAvailable = false;
304 if (!PredSU->isPending)
305 AvailableQueue->remove(PredSU);
308 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
309 ++PredSU->NumSuccsLeft;
312 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
313 /// its predecessor states to reflect the change.
314 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
315 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
316 DEBUG(SU->dump(this));
318 AvailableQueue->UnscheduledNode(SU);
320 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
323 if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
324 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
325 assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
326 "Physical register dependency violated?");
328 LiveRegDefs[I->getReg()] = NULL;
329 LiveRegCycles[I->getReg()] = 0;
333 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
335 if (I->isAssignedRegDep()) {
336 if (!LiveRegDefs[I->getReg()]) {
337 LiveRegDefs[I->getReg()] = SU;
340 if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
341 LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
345 SU->setHeightDirty();
346 SU->isScheduled = false;
347 SU->isAvailable = true;
348 AvailableQueue->push(SU);
351 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
352 /// BTCycle in order to schedule a specific node.
353 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
354 unsigned &CurCycle) {
356 while (CurCycle > BtCycle) {
357 OldSU = Sequence.back();
359 if (SU->isSucc(OldSU))
360 // Don't try to remove SU from AvailableQueue.
361 SU->isAvailable = false;
362 UnscheduleNodeBottomUp(OldSU);
364 AvailableQueue->setCurCycle(CurCycle);
367 assert(!SU->isSucc(OldSU) && "Something is wrong!");
372 static bool isOperandOf(const SUnit *SU, SDNode *N) {
373 for (const SDNode *SUNode = SU->getNode(); SUNode;
374 SUNode = SUNode->getFlaggedNode()) {
375 if (SUNode->isOperandOf(N))
381 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
382 /// successors to the newly created node.
383 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
384 if (SU->getNode()->getFlaggedNode())
387 SDNode *N = SU->getNode();
392 bool TryUnfold = false;
393 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
394 EVT VT = N->getValueType(i);
397 else if (VT == MVT::Other)
400 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
401 const SDValue &Op = N->getOperand(i);
402 EVT VT = Op.getNode()->getValueType(Op.getResNo());
408 SmallVector<SDNode*, 2> NewNodes;
409 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
412 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
413 assert(NewNodes.size() == 2 && "Expected a load folding node!");
416 SDNode *LoadNode = NewNodes[0];
417 unsigned NumVals = N->getNumValues();
418 unsigned OldNumVals = SU->getNode()->getNumValues();
419 for (unsigned i = 0; i != NumVals; ++i)
420 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
421 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
422 SDValue(LoadNode, 1));
424 // LoadNode may already exist. This can happen when there is another
425 // load from the same location and producing the same type of value
426 // but it has different alignment or volatileness.
427 bool isNewLoad = true;
429 if (LoadNode->getNodeId() != -1) {
430 LoadSU = &SUnits[LoadNode->getNodeId()];
433 LoadSU = CreateNewSUnit(LoadNode);
434 LoadNode->setNodeId(LoadSU->NodeNum);
435 ComputeLatency(LoadSU);
438 SUnit *NewSU = CreateNewSUnit(N);
439 assert(N->getNodeId() == -1 && "Node already inserted!");
440 N->setNodeId(NewSU->NodeNum);
442 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
443 for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
444 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
445 NewSU->isTwoAddress = true;
449 if (TID.isCommutable())
450 NewSU->isCommutable = true;
451 ComputeLatency(NewSU);
453 // Record all the edges to and from the old SU, by category.
454 SmallVector<SDep, 4> ChainPreds;
455 SmallVector<SDep, 4> ChainSuccs;
456 SmallVector<SDep, 4> LoadPreds;
457 SmallVector<SDep, 4> NodePreds;
458 SmallVector<SDep, 4> NodeSuccs;
459 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
462 ChainPreds.push_back(*I);
463 else if (isOperandOf(I->getSUnit(), LoadNode))
464 LoadPreds.push_back(*I);
466 NodePreds.push_back(*I);
468 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
471 ChainSuccs.push_back(*I);
473 NodeSuccs.push_back(*I);
476 // Now assign edges to the newly-created nodes.
477 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
478 const SDep &Pred = ChainPreds[i];
479 RemovePred(SU, Pred);
481 AddPred(LoadSU, Pred);
483 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
484 const SDep &Pred = LoadPreds[i];
485 RemovePred(SU, Pred);
487 AddPred(LoadSU, Pred);
489 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
490 const SDep &Pred = NodePreds[i];
491 RemovePred(SU, Pred);
492 AddPred(NewSU, Pred);
494 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
495 SDep D = NodeSuccs[i];
496 SUnit *SuccDep = D.getSUnit();
498 RemovePred(SuccDep, D);
502 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
503 SDep D = ChainSuccs[i];
504 SUnit *SuccDep = D.getSUnit();
506 RemovePred(SuccDep, D);
513 // Add a data dependency to reflect that NewSU reads the value defined
515 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
518 AvailableQueue->addNode(LoadSU);
519 AvailableQueue->addNode(NewSU);
523 if (NewSU->NumSuccsLeft == 0) {
524 NewSU->isAvailable = true;
530 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
531 NewSU = CreateClone(SU);
533 // New SUnit has the exact same predecessors.
534 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
536 if (!I->isArtificial())
539 // Only copy scheduled successors. Cut them from old node's successor
540 // list and move them over.
541 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
542 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
544 if (I->isArtificial())
546 SUnit *SuccSU = I->getSUnit();
547 if (SuccSU->isScheduled) {
552 DelDeps.push_back(std::make_pair(SuccSU, D));
555 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
556 RemovePred(DelDeps[i].first, DelDeps[i].second);
558 AvailableQueue->updateNode(SU);
559 AvailableQueue->addNode(NewSU);
565 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
566 /// scheduled successors of the given SUnit to the last copy.
567 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
568 const TargetRegisterClass *DestRC,
569 const TargetRegisterClass *SrcRC,
570 SmallVector<SUnit*, 2> &Copies) {
571 SUnit *CopyFromSU = CreateNewSUnit(NULL);
572 CopyFromSU->CopySrcRC = SrcRC;
573 CopyFromSU->CopyDstRC = DestRC;
575 SUnit *CopyToSU = CreateNewSUnit(NULL);
576 CopyToSU->CopySrcRC = DestRC;
577 CopyToSU->CopyDstRC = SrcRC;
579 // Only copy scheduled successors. Cut them from old node's successor
580 // list and move them over.
581 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
582 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
584 if (I->isArtificial())
586 SUnit *SuccSU = I->getSUnit();
587 if (SuccSU->isScheduled) {
589 D.setSUnit(CopyToSU);
591 DelDeps.push_back(std::make_pair(SuccSU, *I));
594 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
595 RemovePred(DelDeps[i].first, DelDeps[i].second);
597 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
598 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
600 AvailableQueue->updateNode(SU);
601 AvailableQueue->addNode(CopyFromSU);
602 AvailableQueue->addNode(CopyToSU);
603 Copies.push_back(CopyFromSU);
604 Copies.push_back(CopyToSU);
609 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
610 /// definition of the specified node.
611 /// FIXME: Move to SelectionDAG?
612 static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
613 const TargetInstrInfo *TII) {
614 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
615 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
616 unsigned NumRes = TID.getNumDefs();
617 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
622 return N->getValueType(NumRes);
625 /// CheckForLiveRegDef - Return true and update live register vector if the
626 /// specified register def of the specified SUnit clobbers any "live" registers.
627 static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
628 std::vector<SUnit*> &LiveRegDefs,
629 SmallSet<unsigned, 4> &RegAdded,
630 SmallVector<unsigned, 4> &LRegs,
631 const TargetRegisterInfo *TRI) {
633 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
634 if (RegAdded.insert(Reg)) {
635 LRegs.push_back(Reg);
639 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
640 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
641 if (RegAdded.insert(*Alias)) {
642 LRegs.push_back(*Alias);
649 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
650 /// scheduling of the given node to satisfy live physical register dependencies.
651 /// If the specific node is the last one that's available to schedule, do
652 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
653 bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
654 SmallVector<unsigned, 4> &LRegs){
655 if (NumLiveRegs == 0)
658 SmallSet<unsigned, 4> RegAdded;
659 // If this node would clobber any "live" register, then it's not ready.
660 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
662 if (I->isAssignedRegDep())
663 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
664 RegAdded, LRegs, TRI);
667 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
668 if (Node->getOpcode() == ISD::INLINEASM) {
669 // Inline asm can clobber physical defs.
670 unsigned NumOps = Node->getNumOperands();
671 if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
672 --NumOps; // Ignore the flag operand.
674 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
676 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
677 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
679 ++i; // Skip the ID value.
680 if (InlineAsm::isRegDefKind(Flags) ||
681 InlineAsm::isRegDefEarlyClobberKind(Flags)) {
682 // Check for def of register or earlyclobber register.
683 for (; NumVals; --NumVals, ++i) {
684 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
685 if (TargetRegisterInfo::isPhysicalRegister(Reg))
686 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
694 if (!Node->isMachineOpcode())
696 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
697 if (!TID.ImplicitDefs)
699 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
700 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
702 return !LRegs.empty();
706 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
708 void ScheduleDAGRRList::ListScheduleBottomUp() {
709 unsigned CurCycle = 0;
711 // Release any predecessors of the special Exit node.
712 ReleasePredecessors(&ExitSU, CurCycle);
714 // Add root to Available queue.
715 if (!SUnits.empty()) {
716 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
717 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
718 RootSU->isAvailable = true;
719 AvailableQueue->push(RootSU);
722 // While Available queue is not empty, grab the node with the highest
723 // priority. If it is not ready put it back. Schedule the node.
724 SmallVector<SUnit*, 4> NotReady;
725 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
726 Sequence.reserve(SUnits.size());
727 while (!AvailableQueue->empty()) {
728 bool Delayed = false;
730 SUnit *CurSU = AvailableQueue->pop();
732 SmallVector<unsigned, 4> LRegs;
733 if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
736 LRegsMap.insert(std::make_pair(CurSU, LRegs));
738 CurSU->isPending = true; // This SU is not in AvailableQueue right now.
739 NotReady.push_back(CurSU);
740 CurSU = AvailableQueue->pop();
743 // All candidates are delayed due to live physical reg dependencies.
744 // Try backtracking, code duplication, or inserting cross class copies
746 if (Delayed && !CurSU) {
747 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
748 SUnit *TrySU = NotReady[i];
749 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
751 // Try unscheduling up to the point where it's safe to schedule
753 unsigned LiveCycle = CurCycle;
754 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
755 unsigned Reg = LRegs[j];
756 unsigned LCycle = LiveRegCycles[Reg];
757 LiveCycle = std::min(LiveCycle, LCycle);
759 SUnit *OldSU = Sequence[LiveCycle];
760 if (!WillCreateCycle(TrySU, OldSU)) {
761 BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
762 // Force the current node to be scheduled before the node that
763 // requires the physical reg dep.
764 if (OldSU->isAvailable) {
765 OldSU->isAvailable = false;
766 AvailableQueue->remove(OldSU);
768 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
769 /*Reg=*/0, /*isNormalMemory=*/false,
770 /*isMustAlias=*/false, /*isArtificial=*/true));
771 // If one or more successors has been unscheduled, then the current
772 // node is no longer avaialable. Schedule a successor that's now
773 // available instead.
774 if (!TrySU->isAvailable)
775 CurSU = AvailableQueue->pop();
778 TrySU->isPending = false;
779 NotReady.erase(NotReady.begin()+i);
786 // Can't backtrack. If it's too expensive to copy the value, then try
787 // duplicate the nodes that produces these "too expensive to copy"
788 // values to break the dependency. In case even that doesn't work,
789 // insert cross class copies.
790 // If it's not too expensive, i.e. cost != -1, issue copies.
791 SUnit *TrySU = NotReady[0];
792 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
793 assert(LRegs.size() == 1 && "Can't handle this yet!");
794 unsigned Reg = LRegs[0];
795 SUnit *LRDef = LiveRegDefs[Reg];
796 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
797 const TargetRegisterClass *RC =
798 TRI->getPhysicalRegisterRegClass(Reg, VT);
799 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
801 // If cross copy register class is null, then it must be possible copy
802 // the value directly. Do not try duplicate the def.
805 NewDef = CopyAndMoveSuccessors(LRDef);
809 // Issue copies, these can be expensive cross register class copies.
810 SmallVector<SUnit*, 2> Copies;
811 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
812 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
813 << " to SU #" << Copies.front()->NodeNum << "\n");
814 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
815 /*Reg=*/0, /*isNormalMemory=*/false,
816 /*isMustAlias=*/false,
817 /*isArtificial=*/true));
818 NewDef = Copies.back();
821 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
822 << " to SU #" << TrySU->NodeNum << "\n");
823 LiveRegDefs[Reg] = NewDef;
824 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
825 /*Reg=*/0, /*isNormalMemory=*/false,
826 /*isMustAlias=*/false,
827 /*isArtificial=*/true));
828 TrySU->isAvailable = false;
832 assert(CurSU && "Unable to resolve live physical register dependencies!");
835 // Add the nodes that aren't ready back onto the available list.
836 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
837 NotReady[i]->isPending = false;
838 // May no longer be available due to backtracking.
839 if (NotReady[i]->isAvailable)
840 AvailableQueue->push(NotReady[i]);
845 ScheduleNodeBottomUp(CurSU, CurCycle);
847 AvailableQueue->setCurCycle(CurCycle);
850 // Reverse the order if it is bottom up.
851 std::reverse(Sequence.begin(), Sequence.end());
854 VerifySchedule(isBottomUp);
858 //===----------------------------------------------------------------------===//
859 // Top-Down Scheduling
860 //===----------------------------------------------------------------------===//
862 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
863 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
864 void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
865 SUnit *SuccSU = SuccEdge->getSUnit();
868 if (SuccSU->NumPredsLeft == 0) {
869 dbgs() << "*** Scheduling failed! ***\n";
871 dbgs() << " has been released too many times!\n";
875 --SuccSU->NumPredsLeft;
877 // If all the node's predecessors are scheduled, this node is ready
878 // to be scheduled. Ignore the special ExitSU node.
879 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
880 SuccSU->isAvailable = true;
881 AvailableQueue->push(SuccSU);
885 void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
886 // Top down: release successors
887 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
889 assert(!I->isAssignedRegDep() &&
890 "The list-tdrr scheduler doesn't yet support physreg dependencies!");
892 ReleaseSucc(SU, &*I);
896 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
897 /// count of its successors. If a successor pending count is zero, add it to
898 /// the Available queue.
899 void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
900 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
901 DEBUG(SU->dump(this));
903 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
904 SU->setDepthToAtLeast(CurCycle);
905 Sequence.push_back(SU);
907 ReleaseSuccessors(SU);
908 SU->isScheduled = true;
909 AvailableQueue->ScheduledNode(SU);
912 /// ListScheduleTopDown - The main loop of list scheduling for top-down
914 void ScheduleDAGRRList::ListScheduleTopDown() {
915 unsigned CurCycle = 0;
916 AvailableQueue->setCurCycle(CurCycle);
918 // Release any successors of the special Entry node.
919 ReleaseSuccessors(&EntrySU);
921 // All leaves to Available queue.
922 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
923 // It is available if it has no predecessors.
924 if (SUnits[i].Preds.empty()) {
925 AvailableQueue->push(&SUnits[i]);
926 SUnits[i].isAvailable = true;
930 // While Available queue is not empty, grab the node with the highest
931 // priority. If it is not ready put it back. Schedule the node.
932 Sequence.reserve(SUnits.size());
933 while (!AvailableQueue->empty()) {
934 SUnit *CurSU = AvailableQueue->pop();
937 ScheduleNodeTopDown(CurSU, CurCycle);
939 AvailableQueue->setCurCycle(CurCycle);
943 VerifySchedule(isBottomUp);
948 //===----------------------------------------------------------------------===//
949 // RegReductionPriorityQueue Implementation
950 //===----------------------------------------------------------------------===//
952 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
953 // to reduce register pressure.
957 class RegReductionPriorityQueue;
959 /// Sorting functions for the Available queue.
960 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
961 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
962 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
963 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
965 bool operator()(const SUnit* left, const SUnit* right) const;
968 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
969 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
970 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
971 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
973 bool operator()(const SUnit* left, const SUnit* right) const;
976 struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
977 RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
978 src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
980 src_ls_rr_sort(const src_ls_rr_sort &RHS)
983 bool operator()(const SUnit* left, const SUnit* right) const;
986 struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
987 RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
988 hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
990 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
993 bool operator()(const SUnit* left, const SUnit* right) const;
995 } // end anonymous namespace
997 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
998 /// Smaller number is the higher priority.
1000 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1001 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1002 if (SethiUllmanNumber != 0)
1003 return SethiUllmanNumber;
1006 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1008 if (I->isCtrl()) continue; // ignore chain preds
1009 SUnit *PredSU = I->getSUnit();
1010 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1011 if (PredSethiUllman > SethiUllmanNumber) {
1012 SethiUllmanNumber = PredSethiUllman;
1014 } else if (PredSethiUllman == SethiUllmanNumber)
1018 SethiUllmanNumber += Extra;
1020 if (SethiUllmanNumber == 0)
1021 SethiUllmanNumber = 1;
1023 return SethiUllmanNumber;
1028 class RegReductionPriorityQueue : public SchedulingPriorityQueue {
1029 std::vector<SUnit*> Queue;
1031 unsigned CurQueueId;
1034 // SUnits - The SUnits for the current graph.
1035 std::vector<SUnit> *SUnits;
1037 const TargetInstrInfo *TII;
1038 const TargetRegisterInfo *TRI;
1039 ScheduleDAGRRList *scheduleDAG;
1041 // SethiUllmanNumbers - The SethiUllman number for each node.
1042 std::vector<unsigned> SethiUllmanNumbers;
1045 RegReductionPriorityQueue(const TargetInstrInfo *tii,
1046 const TargetRegisterInfo *tri)
1047 : Picker(this), CurQueueId(0),
1048 TII(tii), TRI(tri), scheduleDAG(NULL) {}
1050 void initNodes(std::vector<SUnit> &sunits) {
1052 // Add pseudo dependency edges for two-address nodes.
1053 AddPseudoTwoAddrDeps();
1054 // Reroute edges to nodes with multiple uses.
1055 PrescheduleNodesWithMultipleUses();
1056 // Calculate node priorities.
1057 CalculateSethiUllmanNumbers();
1060 void addNode(const SUnit *SU) {
1061 unsigned SUSize = SethiUllmanNumbers.size();
1062 if (SUnits->size() > SUSize)
1063 SethiUllmanNumbers.resize(SUSize*2, 0);
1064 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1067 void updateNode(const SUnit *SU) {
1068 SethiUllmanNumbers[SU->NodeNum] = 0;
1069 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1072 void releaseState() {
1074 SethiUllmanNumbers.clear();
1077 unsigned getNodePriority(const SUnit *SU) const {
1078 assert(SU->NodeNum < SethiUllmanNumbers.size());
1079 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1080 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1081 // CopyToReg should be close to its uses to facilitate coalescing and
1084 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1085 Opc == TargetOpcode::SUBREG_TO_REG ||
1086 Opc == TargetOpcode::INSERT_SUBREG)
1087 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1088 // close to their uses to facilitate coalescing.
1090 if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1091 // If SU does not have a register use, i.e. it doesn't produce a value
1092 // that would be consumed (e.g. store), then it terminates a chain of
1093 // computation. Give it a large SethiUllman number so it will be
1094 // scheduled right before its predecessors that it doesn't lengthen
1095 // their live ranges.
1097 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1098 // If SU does not have a register def, schedule it close to its uses
1099 // because it does not lengthen any live ranges.
1101 return SethiUllmanNumbers[SU->NodeNum];
1104 unsigned getNodeOrdering(const SUnit *SU) const {
1105 return scheduleDAG->DAG->GetOrdering(SU->getNode());
1108 bool empty() const { return Queue.empty(); }
1110 void push(SUnit *U) {
1111 assert(!U->NodeQueueId && "Node in the queue already");
1112 U->NodeQueueId = ++CurQueueId;
1117 if (empty()) return NULL;
1118 std::vector<SUnit *>::iterator Best = Queue.begin();
1119 for (std::vector<SUnit *>::iterator I = next(Queue.begin()),
1120 E = Queue.end(); I != E; ++I)
1121 if (Picker(*Best, *I))
1124 if (Best != prior(Queue.end()))
1125 std::swap(*Best, Queue.back());
1131 void remove(SUnit *SU) {
1132 assert(!Queue.empty() && "Queue is empty!");
1133 assert(SU->NodeQueueId != 0 && "Not in queue!");
1134 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1136 if (I != prior(Queue.end()))
1137 std::swap(*I, Queue.back());
1139 SU->NodeQueueId = 0;
1142 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1143 scheduleDAG = scheduleDag;
1147 bool canClobber(const SUnit *SU, const SUnit *Op);
1148 void AddPseudoTwoAddrDeps();
1149 void PrescheduleNodesWithMultipleUses();
1150 void CalculateSethiUllmanNumbers();
1153 typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1154 BURegReductionPriorityQueue;
1156 typedef RegReductionPriorityQueue<td_ls_rr_sort>
1157 TDRegReductionPriorityQueue;
1159 typedef RegReductionPriorityQueue<src_ls_rr_sort>
1160 SrcRegReductionPriorityQueue;
1162 typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1163 HybridBURRPriorityQueue;
1166 /// closestSucc - Returns the scheduled cycle of the successor which is
1167 /// closest to the current cycle.
1168 static unsigned closestSucc(const SUnit *SU) {
1169 unsigned MaxHeight = 0;
1170 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1172 if (I->isCtrl()) continue; // ignore chain succs
1173 unsigned Height = I->getSUnit()->getHeight();
1174 // If there are bunch of CopyToRegs stacked up, they should be considered
1175 // to be at the same position.
1176 if (I->getSUnit()->getNode() &&
1177 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1178 Height = closestSucc(I->getSUnit())+1;
1179 if (Height > MaxHeight)
1185 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
1186 /// for scratch registers, i.e. number of data dependencies.
1187 static unsigned calcMaxScratches(const SUnit *SU) {
1188 unsigned Scratches = 0;
1189 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1191 if (I->isCtrl()) continue; // ignore chain preds
1197 template <typename RRSort>
1198 static bool BURRSort(const SUnit *left, const SUnit *right,
1199 const RegReductionPriorityQueue<RRSort> *SPQ) {
1200 unsigned LPriority = SPQ->getNodePriority(left);
1201 unsigned RPriority = SPQ->getNodePriority(right);
1202 if (LPriority != RPriority)
1203 return LPriority > RPriority;
1205 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1210 // and the following instructions are both ready.
1214 // Then schedule t2 = op first.
1221 // This creates more short live intervals.
1222 unsigned LDist = closestSucc(left);
1223 unsigned RDist = closestSucc(right);
1225 return LDist < RDist;
1227 // How many registers becomes live when the node is scheduled.
1228 unsigned LScratch = calcMaxScratches(left);
1229 unsigned RScratch = calcMaxScratches(right);
1230 if (LScratch != RScratch)
1231 return LScratch > RScratch;
1233 if (left->getHeight() != right->getHeight())
1234 return left->getHeight() > right->getHeight();
1236 if (left->getDepth() != right->getDepth())
1237 return left->getDepth() < right->getDepth();
1239 assert(left->NodeQueueId && right->NodeQueueId &&
1240 "NodeQueueId cannot be zero");
1241 return (left->NodeQueueId > right->NodeQueueId);
1245 bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1246 return BURRSort(left, right, SPQ);
1249 // Source order, otherwise bottom up.
1250 bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1251 unsigned LOrder = SPQ->getNodeOrdering(left);
1252 unsigned ROrder = SPQ->getNodeOrdering(right);
1254 // Prefer an ordering where the lower the non-zero order number, the higher
1256 if ((LOrder || ROrder) && LOrder != ROrder)
1257 return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
1259 return BURRSort(left, right, SPQ);
1262 bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
1263 bool LStall = left->SchedulingPref == Sched::Latency &&
1264 SPQ->getCurCycle() < left->getHeight();
1265 bool RStall = right->SchedulingPref == Sched::Latency &&
1266 SPQ->getCurCycle() < right->getHeight();
1267 // If scheduling one of the node will cause a pipeline stall, delay it.
1268 // If scheduling either one of the node will cause a pipeline stall, sort them
1269 // according to their height.
1270 // If neither will cause a pipeline stall, try to reduce register pressure.
1274 if (left->getHeight() != right->getHeight())
1275 return left->getHeight() > right->getHeight();
1278 return BURRSort(left, right, SPQ);
1283 RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
1284 if (SU->isTwoAddress) {
1285 unsigned Opc = SU->getNode()->getMachineOpcode();
1286 const TargetInstrDesc &TID = TII->get(Opc);
1287 unsigned NumRes = TID.getNumDefs();
1288 unsigned NumOps = TID.getNumOperands() - NumRes;
1289 for (unsigned i = 0; i != NumOps; ++i) {
1290 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
1291 SDNode *DU = SU->getNode()->getOperand(i).getNode();
1292 if (DU->getNodeId() != -1 &&
1293 Op->OrigNode == &(*SUnits)[DU->getNodeId()])
1301 /// hasCopyToRegUse - Return true if SU has a value successor that is a
1303 static bool hasCopyToRegUse(const SUnit *SU) {
1304 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1306 if (I->isCtrl()) continue;
1307 const SUnit *SuccSU = I->getSUnit();
1308 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
1314 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1315 /// physical register defs.
1316 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
1317 const TargetInstrInfo *TII,
1318 const TargetRegisterInfo *TRI) {
1319 SDNode *N = SuccSU->getNode();
1320 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1321 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
1322 assert(ImpDefs && "Caller should check hasPhysRegDefs");
1323 for (const SDNode *SUNode = SU->getNode(); SUNode;
1324 SUNode = SUNode->getFlaggedNode()) {
1325 if (!SUNode->isMachineOpcode())
1327 const unsigned *SUImpDefs =
1328 TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
1331 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1332 EVT VT = N->getValueType(i);
1333 if (VT == MVT::Flag || VT == MVT::Other)
1335 if (!N->hasAnyUseOfValue(i))
1337 unsigned Reg = ImpDefs[i - NumDefs];
1338 for (;*SUImpDefs; ++SUImpDefs) {
1339 unsigned SUReg = *SUImpDefs;
1340 if (TRI->regsOverlap(Reg, SUReg))
1348 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
1349 /// are not handled well by the general register pressure reduction
1350 /// heuristics. When presented with code like this:
1359 /// the heuristics tend to push the store up, but since the
1360 /// operand of the store has another use (U), this would increase
1361 /// the length of that other use (the U->N edge).
1363 /// This function transforms code like the above to route U's
1364 /// dependence through the store when possible, like this:
1375 /// This results in the store being scheduled immediately
1376 /// after N, which shortens the U->N live range, reducing
1377 /// register pressure.
1380 void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
1381 // Visit all the nodes in topological order, working top-down.
1382 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1383 SUnit *SU = &(*SUnits)[i];
1384 // For now, only look at nodes with no data successors, such as stores.
1385 // These are especially important, due to the heuristics in
1386 // getNodePriority for nodes with no data successors.
1387 if (SU->NumSuccs != 0)
1389 // For now, only look at nodes with exactly one data predecessor.
1390 if (SU->NumPreds != 1)
1392 // Avoid prescheduling copies to virtual registers, which don't behave
1393 // like other nodes from the perspective of scheduling heuristics.
1394 if (SDNode *N = SU->getNode())
1395 if (N->getOpcode() == ISD::CopyToReg &&
1396 TargetRegisterInfo::isVirtualRegister
1397 (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1400 // Locate the single data predecessor.
1402 for (SUnit::const_pred_iterator II = SU->Preds.begin(),
1403 EE = SU->Preds.end(); II != EE; ++II)
1404 if (!II->isCtrl()) {
1405 PredSU = II->getSUnit();
1410 // Don't rewrite edges that carry physregs, because that requires additional
1411 // support infrastructure.
1412 if (PredSU->hasPhysRegDefs)
1414 // Short-circuit the case where SU is PredSU's only data successor.
1415 if (PredSU->NumSuccs == 1)
1417 // Avoid prescheduling to copies from virtual registers, which don't behave
1418 // like other nodes from the perspective of scheduling // heuristics.
1419 if (SDNode *N = SU->getNode())
1420 if (N->getOpcode() == ISD::CopyFromReg &&
1421 TargetRegisterInfo::isVirtualRegister
1422 (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1425 // Perform checks on the successors of PredSU.
1426 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
1427 EE = PredSU->Succs.end(); II != EE; ++II) {
1428 SUnit *PredSuccSU = II->getSUnit();
1429 if (PredSuccSU == SU) continue;
1430 // If PredSU has another successor with no data successors, for
1431 // now don't attempt to choose either over the other.
1432 if (PredSuccSU->NumSuccs == 0)
1433 goto outer_loop_continue;
1434 // Don't break physical register dependencies.
1435 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
1436 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
1437 goto outer_loop_continue;
1438 // Don't introduce graph cycles.
1439 if (scheduleDAG->IsReachable(SU, PredSuccSU))
1440 goto outer_loop_continue;
1443 // Ok, the transformation is safe and the heuristics suggest it is
1444 // profitable. Update the graph.
1445 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum
1446 << " next to PredSU #" << PredSU->NodeNum
1447 << " to guide scheduling in the presence of multiple uses\n");
1448 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
1449 SDep Edge = PredSU->Succs[i];
1450 assert(!Edge.isAssignedRegDep());
1451 SUnit *SuccSU = Edge.getSUnit();
1453 Edge.setSUnit(PredSU);
1454 scheduleDAG->RemovePred(SuccSU, Edge);
1455 scheduleDAG->AddPred(SU, Edge);
1457 scheduleDAG->AddPred(SuccSU, Edge);
1461 outer_loop_continue:;
1465 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1466 /// it as a def&use operand. Add a pseudo control edge from it to the other
1467 /// node (if it won't create a cycle) so the two-address one will be scheduled
1468 /// first (lower in the schedule). If both nodes are two-address, favor the
1469 /// one that has a CopyToReg use (more likely to be a loop induction update).
1470 /// If both are two-address, but one is commutable while the other is not
1471 /// commutable, favor the one that's not commutable.
1473 void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1474 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1475 SUnit *SU = &(*SUnits)[i];
1476 if (!SU->isTwoAddress)
1479 SDNode *Node = SU->getNode();
1480 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
1483 unsigned Opc = Node->getMachineOpcode();
1484 const TargetInstrDesc &TID = TII->get(Opc);
1485 unsigned NumRes = TID.getNumDefs();
1486 unsigned NumOps = TID.getNumOperands() - NumRes;
1487 for (unsigned j = 0; j != NumOps; ++j) {
1488 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
1490 SDNode *DU = SU->getNode()->getOperand(j).getNode();
1491 if (DU->getNodeId() == -1)
1493 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
1494 if (!DUSU) continue;
1495 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
1496 E = DUSU->Succs.end(); I != E; ++I) {
1497 if (I->isCtrl()) continue;
1498 SUnit *SuccSU = I->getSUnit();
1501 // Be conservative. Ignore if nodes aren't at roughly the same
1502 // depth and height.
1503 if (SuccSU->getHeight() < SU->getHeight() &&
1504 (SU->getHeight() - SuccSU->getHeight()) > 1)
1506 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
1507 // constrains whatever is using the copy, instead of the copy
1508 // itself. In the case that the copy is coalesced, this
1509 // preserves the intent of the pseudo two-address heurietics.
1510 while (SuccSU->Succs.size() == 1 &&
1511 SuccSU->getNode()->isMachineOpcode() &&
1512 SuccSU->getNode()->getMachineOpcode() ==
1513 TargetOpcode::COPY_TO_REGCLASS)
1514 SuccSU = SuccSU->Succs.front().getSUnit();
1515 // Don't constrain non-instruction nodes.
1516 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
1518 // Don't constrain nodes with physical register defs if the
1519 // predecessor can clobber them.
1520 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
1521 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
1524 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
1525 // these may be coalesced away. We want them close to their uses.
1526 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
1527 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
1528 SuccOpc == TargetOpcode::INSERT_SUBREG ||
1529 SuccOpc == TargetOpcode::SUBREG_TO_REG)
1531 if ((!canClobber(SuccSU, DUSU) ||
1532 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
1533 (!SU->isCommutable && SuccSU->isCommutable)) &&
1534 !scheduleDAG->IsReachable(SuccSU, SU)) {
1535 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
1536 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
1537 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
1538 /*Reg=*/0, /*isNormalMemory=*/false,
1539 /*isMustAlias=*/false,
1540 /*isArtificial=*/true));
1547 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1548 /// scheduling units.
1550 void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1551 SethiUllmanNumbers.assign(SUnits->size(), 0);
1553 for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1554 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1557 /// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1558 /// predecessors of the successors of the SUnit SU. Stop when the provided
1559 /// limit is exceeded.
1560 static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
1563 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1565 const SUnit *SuccSU = I->getSUnit();
1566 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1567 EE = SuccSU->Preds.end(); II != EE; ++II) {
1568 SUnit *PredSU = II->getSUnit();
1569 if (!PredSU->isScheduled)
1579 bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1580 unsigned LPriority = SPQ->getNodePriority(left);
1581 unsigned RPriority = SPQ->getNodePriority(right);
1582 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
1583 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
1584 bool LIsFloater = LIsTarget && left->NumPreds == 0;
1585 bool RIsFloater = RIsTarget && right->NumPreds == 0;
1586 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
1587 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
1589 if (left->NumSuccs == 0 && right->NumSuccs != 0)
1591 else if (left->NumSuccs != 0 && right->NumSuccs == 0)
1598 if (left->NumSuccs == 1)
1600 if (right->NumSuccs == 1)
1603 if (LPriority+LBonus != RPriority+RBonus)
1604 return LPriority+LBonus < RPriority+RBonus;
1606 if (left->getDepth() != right->getDepth())
1607 return left->getDepth() < right->getDepth();
1609 if (left->NumSuccsLeft != right->NumSuccsLeft)
1610 return left->NumSuccsLeft > right->NumSuccsLeft;
1612 assert(left->NodeQueueId && right->NodeQueueId &&
1613 "NodeQueueId cannot be zero");
1614 return (left->NodeQueueId > right->NodeQueueId);
1617 //===----------------------------------------------------------------------===//
1618 // Public Constructor Functions
1619 //===----------------------------------------------------------------------===//
1621 llvm::ScheduleDAGSDNodes *
1622 llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1623 const TargetMachine &TM = IS->TM;
1624 const TargetInstrInfo *TII = TM.getInstrInfo();
1625 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1627 BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
1629 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
1630 PQ->setScheduleDAG(SD);
1634 llvm::ScheduleDAGSDNodes *
1635 llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1636 const TargetMachine &TM = IS->TM;
1637 const TargetInstrInfo *TII = TM.getInstrInfo();
1638 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1640 TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
1642 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
1643 PQ->setScheduleDAG(SD);
1647 llvm::ScheduleDAGSDNodes *
1648 llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1649 const TargetMachine &TM = IS->TM;
1650 const TargetInstrInfo *TII = TM.getInstrInfo();
1651 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1653 SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI);
1655 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
1656 PQ->setScheduleDAG(SD);
1660 llvm::ScheduleDAGSDNodes *
1661 llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1662 const TargetMachine &TM = IS->TM;
1663 const TargetInstrInfo *TII = TM.getInstrInfo();
1664 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1666 HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(TII, TRI);
1668 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
1669 PQ->setScheduleDAG(SD);