1 //===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the ResourcePriorityQueue class, which is a
11 // SchedulingPriorityQueue that prioritizes instructions using DFA state to
12 // reduce the length of the critical path through the basic block
14 // The scheduler is basically a top-down adaptable list scheduler with DFA
15 // resource tracking added to the cost function.
16 // DFA is queried as a state machine to model "packets/bundles" during
17 // schedule. Currently packets/bundles are discarded at the end of
18 // scheduling, affecting only order of instructions.
20 //===----------------------------------------------------------------------===//
22 #define DEBUG_TYPE "scheduler"
23 #include "llvm/CodeGen/ResourcePriorityQueue.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include "llvm/CodeGen/MachineInstr.h"
28 #include "llvm/CodeGen/SelectionDAGNodes.h"
29 #include "llvm/Target/TargetMachine.h"
30 #include "llvm/Target/TargetLowering.h"
34 static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
35 cl::ZeroOrMore, cl::init(false),
36 cl::desc("Disable use of DFA during scheduling"));
38 static cl::opt<signed> RegPressureThreshold(
39 "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
40 cl::desc("Track reg pressure and switch priority to in-depth"));
43 ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) :
45 InstrItins(IS->getTargetLowering().getTargetMachine().getInstrItineraryData())
47 TII = IS->getTargetLowering().getTargetMachine().getInstrInfo();
48 TRI = IS->getTargetLowering().getTargetMachine().getRegisterInfo();
49 TLI = &IS->getTargetLowering();
51 const TargetMachine &tm = (*IS->MF).getTarget();
52 ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL);
53 // This hard requirment could be relaxed, but for now
54 // do not let it procede.
55 assert (ResourcesModel && "Unimplemented CreateTargetScheduleState.");
57 unsigned NumRC = TRI->getNumRegClasses();
58 RegLimit.resize(NumRC);
59 RegPressure.resize(NumRC);
60 std::fill(RegLimit.begin(), RegLimit.end(), 0);
61 std::fill(RegPressure.begin(), RegPressure.end(), 0);
62 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
63 E = TRI->regclass_end(); I != E; ++I)
64 RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF);
66 ParallelLiveRanges = 0;
67 HorizontalVerticalBalance = 0;
71 ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
72 unsigned NumberDeps = 0;
73 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
78 SUnit *PredSU = I->getSUnit();
79 const SDNode *ScegN = PredSU->getNode();
84 // If value is passed to CopyToReg, it is probably
86 switch (ScegN->getOpcode()) {
88 case ISD::TokenFactor: break;
89 case ISD::CopyFromReg: NumberDeps++; break;
90 case ISD::CopyToReg: break;
91 case ISD::INLINEASM: break;
93 if (!ScegN->isMachineOpcode())
96 for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
97 EVT VT = ScegN->getValueType(i);
98 if (TLI->isTypeLegal(VT)
99 && (TLI->getRegClassFor(VT)->getID() == RCId)) {
108 unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
110 unsigned NumberDeps = 0;
111 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
116 SUnit *SuccSU = I->getSUnit();
117 const SDNode *ScegN = SuccSU->getNode();
121 // If value is passed to CopyToReg, it is probably
123 switch (ScegN->getOpcode()) {
125 case ISD::TokenFactor: break;
126 case ISD::CopyFromReg: break;
127 case ISD::CopyToReg: NumberDeps++; break;
128 case ISD::INLINEASM: break;
130 if (!ScegN->isMachineOpcode())
133 for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
134 const SDValue &Op = ScegN->getOperand(i);
135 EVT VT = Op.getNode()->getValueType(Op.getResNo());
136 if (TLI->isTypeLegal(VT)
137 && (TLI->getRegClassFor(VT)->getID() == RCId)) {
146 static unsigned numberCtrlDepsInSU(SUnit *SU) {
147 unsigned NumberDeps = 0;
148 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
156 static unsigned numberCtrlPredInSU(SUnit *SU) {
157 unsigned NumberDeps = 0;
158 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
167 /// Initialize nodes.
169 void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
171 NumNodesSolelyBlocking.resize(SUnits->size(), 0);
173 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
174 SUnit *SU = &(*SUnits)[i];
175 initNumRegDefsLeft(SU);
180 /// This heuristic is used if DFA scheduling is not desired
181 /// for some VLIW platform.
182 bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
183 // The isScheduleHigh flag allows nodes with wraparound dependencies that
184 // cannot easily be modeled as edges with latencies to be scheduled as
185 // soon as possible in a top-down schedule.
186 if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
189 if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
192 unsigned LHSNum = LHS->NodeNum;
193 unsigned RHSNum = RHS->NodeNum;
195 // The most important heuristic is scheduling the critical path.
196 unsigned LHSLatency = PQ->getLatency(LHSNum);
197 unsigned RHSLatency = PQ->getLatency(RHSNum);
198 if (LHSLatency < RHSLatency) return true;
199 if (LHSLatency > RHSLatency) return false;
201 // After that, if two nodes have identical latencies, look to see if one will
202 // unblock more other nodes than the other.
203 unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
204 unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
205 if (LHSBlocked < RHSBlocked) return true;
206 if (LHSBlocked > RHSBlocked) return false;
208 // Finally, just to provide a stable ordering, use the node number as a
210 return LHSNum < RHSNum;
214 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
215 /// of SU, return it, otherwise return null.
216 SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
217 SUnit *OnlyAvailablePred = 0;
218 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
220 SUnit &Pred = *I->getSUnit();
221 if (!Pred.isScheduled) {
222 // We found an available, but not scheduled, predecessor. If it's the
223 // only one we have found, keep track of it... otherwise give up.
224 if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
226 OnlyAvailablePred = &Pred;
229 return OnlyAvailablePred;
232 void ResourcePriorityQueue::push(SUnit *SU) {
233 // Look at all of the successors of this node. Count the number of nodes that
234 // this node is the sole unscheduled node for.
235 unsigned NumNodesBlocking = 0;
236 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
238 if (getSingleUnscheduledPred(I->getSUnit()) == SU)
241 NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
245 /// Check if scheduling of this SU is possible
246 /// in the current packet.
247 bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
248 if (!SU || !SU->getNode())
251 // If this is a compound instruction,
252 // it is likely to be a call. Do not delay it.
253 if (SU->getNode()->getGluedNode())
256 // First see if the pipeline could receive this instruction
257 // in the current cycle.
258 if (SU->getNode()->isMachineOpcode())
259 switch (SU->getNode()->getMachineOpcode()) {
261 if (!ResourcesModel->canReserveResources(&TII->get(
262 SU->getNode()->getMachineOpcode())))
264 case TargetOpcode::EXTRACT_SUBREG:
265 case TargetOpcode::INSERT_SUBREG:
266 case TargetOpcode::SUBREG_TO_REG:
267 case TargetOpcode::REG_SEQUENCE:
268 case TargetOpcode::IMPLICIT_DEF:
272 // Now see if there are no other dependencies
273 // to instructions alredy in the packet.
274 for (unsigned i = 0, e = Packet.size(); i != e; ++i)
275 for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
276 E = Packet[i]->Succs.end(); I != E; ++I) {
277 // Since we do not add pseudos to packets, might as well
282 if (I->getSUnit() == SU)
289 /// Keep track of available resources.
290 void ResourcePriorityQueue::reserveResources(SUnit *SU) {
291 // If this SU does not fit in the packet
293 if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
294 ResourcesModel->clearResources();
298 if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
299 switch (SU->getNode()->getMachineOpcode()) {
301 ResourcesModel->reserveResources(&TII->get(
302 SU->getNode()->getMachineOpcode()));
304 case TargetOpcode::EXTRACT_SUBREG:
305 case TargetOpcode::INSERT_SUBREG:
306 case TargetOpcode::SUBREG_TO_REG:
307 case TargetOpcode::REG_SEQUENCE:
308 case TargetOpcode::IMPLICIT_DEF:
311 Packet.push_back(SU);
313 // Forcefully end packet for PseudoOps.
315 ResourcesModel->clearResources();
319 // If packet is now full, reset the state so in the next cycle
321 if (Packet.size() >= InstrItins->IssueWidth) {
322 ResourcesModel->clearResources();
327 signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
328 signed RegBalance = 0;
330 if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
334 for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
335 EVT VT = SU->getNode()->getValueType(i);
336 if (TLI->isTypeLegal(VT)
337 && TLI->getRegClassFor(VT)
338 && TLI->getRegClassFor(VT)->getID() == RCId)
339 RegBalance += numberRCValSuccInSU(SU, RCId);
342 for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
343 const SDValue &Op = SU->getNode()->getOperand(i);
344 EVT VT = Op.getNode()->getValueType(Op.getResNo());
345 if (isa<ConstantSDNode>(Op.getNode()))
348 if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
349 && TLI->getRegClassFor(VT)->getID() == RCId)
350 RegBalance -= numberRCValPredInSU(SU, RCId);
355 /// Estimates change in reg pressure from this SU.
356 /// It is acheived by trivial tracking of defined
357 /// and used vregs in dependent instructions.
358 /// The RawPressure flag makes this function to ignore
359 /// existing reg file sizes, and report raw def/use
361 signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
362 signed RegBalance = 0;
364 if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
368 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
369 E = TRI->regclass_end(); I != E; ++I) {
370 const TargetRegisterClass *RC = *I;
371 RegBalance += rawRegPressureDelta(SU, RC->getID());
375 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
376 E = TRI->regclass_end(); I != E; ++I) {
377 const TargetRegisterClass *RC = *I;
378 if ((RegPressure[RC->getID()] +
379 rawRegPressureDelta(SU, RC->getID()) > 0) &&
380 (RegPressure[RC->getID()] +
381 rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()]))
382 RegBalance += rawRegPressureDelta(SU, RC->getID());
389 // Constants used to denote relative importance of
390 // heuristic components for cost computation.
391 static const unsigned PriorityOne = 200;
392 static const unsigned PriorityTwo = 100;
393 static const unsigned PriorityThree = 50;
394 static const unsigned PriorityFour = 15;
395 static const unsigned PriorityFive = 5;
396 static const unsigned ScaleOne = 20;
397 static const unsigned ScaleTwo = 10;
398 static const unsigned ScaleThree = 5;
399 static const unsigned FactorOne = 2;
401 /// Returns single number reflecting benefit of scheduling SU
402 /// in the current cycle.
403 signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
404 // Initial trivial priority.
407 // Do not waste time on a node that is already scheduled.
411 // Forced priority is high.
412 if (SU->isScheduleHigh)
413 ResCount += PriorityOne;
415 // Adaptable scheduling
416 // A small, but very parallel
417 // region, where reg pressure is an issue.
418 if (HorizontalVerticalBalance > RegPressureThreshold) {
419 // Critical path first
420 ResCount += (SU->getHeight() * ScaleTwo);
421 // If resources are available for it, multiply the
422 // chance of scheduling.
423 if (isResourceAvailable(SU))
424 ResCount <<= FactorOne;
426 // Consider change to reg pressure from scheduling
428 ResCount -= (regPressureDelta(SU,true) * ScaleOne);
430 // Default heuristic, greeady and
431 // critical path driven.
433 // Critical path first.
434 ResCount += (SU->getHeight() * ScaleTwo);
435 // Now see how many instructions is blocked by this SU.
436 ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
437 // If resources are available for it, multiply the
438 // chance of scheduling.
439 if (isResourceAvailable(SU))
440 ResCount <<= FactorOne;
442 ResCount -= (regPressureDelta(SU) * ScaleTwo);
445 // These are platform specific things.
446 // Will need to go into the back end
447 // and accessed from here via a hook.
448 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
449 if (N->isMachineOpcode()) {
450 const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
452 ResCount += (PriorityThree + (ScaleThree*N->getNumValues()));
455 switch (N->getOpcode()) {
457 case ISD::TokenFactor:
458 case ISD::CopyFromReg:
460 ResCount += PriorityFive;
464 ResCount += PriorityFour;
472 /// Main resource tracking point.
473 void ResourcePriorityQueue::ScheduledNode(SUnit *SU) {
474 // Use NULL entry as an event marker to reset
477 ResourcesModel->clearResources();
482 const SDNode *ScegN = SU->getNode();
483 // Update reg pressure tracking.
484 // First update current node.
485 if (ScegN->isMachineOpcode()) {
486 // Estimate generated regs.
487 for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
488 EVT VT = ScegN->getValueType(i);
490 if (TLI->isTypeLegal(VT)) {
491 const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
493 RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
496 // Estimate killed regs.
497 for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
498 const SDValue &Op = ScegN->getOperand(i);
499 EVT VT = Op.getNode()->getValueType(Op.getResNo());
501 if (TLI->isTypeLegal(VT)) {
502 const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
504 if (RegPressure[RC->getID()] >
505 (numberRCValPredInSU(SU, RC->getID())))
506 RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
507 else RegPressure[RC->getID()] = 0;
511 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
513 if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0))
515 --I->getSUnit()->NumRegDefsLeft;
519 // Reserve resources for this SU.
520 reserveResources(SU);
522 // Adjust number of parallel live ranges.
523 // Heuristic is simple - node with no data successors reduces
524 // number of live ranges. All others, increase it.
525 unsigned NumberNonControlDeps = 0;
527 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
529 adjustPriorityOfUnscheduledPreds(I->getSUnit());
531 NumberNonControlDeps++;
534 if (!NumberNonControlDeps) {
535 if (ParallelLiveRanges >= SU->NumPreds)
536 ParallelLiveRanges -= SU->NumPreds;
538 ParallelLiveRanges = 0;
542 ParallelLiveRanges += SU->NumRegDefsLeft;
544 // Track parallel live chains.
545 HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
546 HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
549 void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
550 unsigned NodeNumDefs = 0;
551 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
552 if (N->isMachineOpcode()) {
553 const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
554 // No register need be allocated for this.
555 if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
559 NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
562 switch(N->getOpcode()) {
564 case ISD::CopyFromReg:
572 SU->NumRegDefsLeft = NodeNumDefs;
575 /// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
576 /// scheduled. If SU is not itself available, then there is at least one
577 /// predecessor node that has not been scheduled yet. If SU has exactly ONE
578 /// unscheduled predecessor, we want to increase its priority: it getting
579 /// scheduled will make this node available, so it is better than some other
580 /// node of the same priority that will not make a node available.
581 void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
582 if (SU->isAvailable) return; // All preds scheduled.
584 SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
585 if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable)
588 // Okay, we found a single predecessor that is available, but not scheduled.
589 // Since it is available, it must be in the priority queue. First remove it.
590 remove(OnlyAvailablePred);
592 // Reinsert the node into the priority queue, which recomputes its
593 // NumNodesSolelyBlocking value.
594 push(OnlyAvailablePred);
598 /// Main access point - returns next instructions
599 /// to be placed in scheduling sequence.
600 SUnit *ResourcePriorityQueue::pop() {
604 std::vector<SUnit *>::iterator Best = Queue.begin();
605 if (!DisableDFASched) {
606 signed BestCost = SUSchedulingCost(*Best);
607 for (std::vector<SUnit *>::iterator I = Queue.begin(),
608 E = Queue.end(); I != E; ++I) {
612 if (SUSchedulingCost(*I) > BestCost) {
613 BestCost = SUSchedulingCost(*I);
618 // Use default TD scheduling mechanism.
620 for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
621 E = Queue.end(); I != E; ++I)
622 if (Picker(*Best, *I))
627 if (Best != prior(Queue.end()))
628 std::swap(*Best, Queue.back());
636 void ResourcePriorityQueue::remove(SUnit *SU) {
637 assert(!Queue.empty() && "Queue is empty!");
638 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
639 if (I != prior(Queue.end()))
640 std::swap(*I, Queue.back());
647 void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {}
649 void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {
650 ResourcePriorityQueue q = *this;
653 dbgs() << "Height " << su->getHeight() << ": ";