1 //===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the ResourcePriorityQueue class, which is a
11 // SchedulingPriorityQueue that prioritizes instructions using DFA state to
12 // reduce the length of the critical path through the basic block
14 // The scheduler is basically a top-down adaptable list scheduler with DFA
15 // resource tracking added to the cost function.
16 // DFA is queried as a state machine to model "packets/bundles" during
17 // schedule. Currently packets/bundles are discarded at the end of
18 // scheduling, affecting only order of instructions.
20 //===----------------------------------------------------------------------===//
22 #include "llvm/CodeGen/ResourcePriorityQueue.h"
23 #include "llvm/CodeGen/MachineInstr.h"
24 #include "llvm/CodeGen/SelectionDAGNodes.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include "llvm/Target/TargetLowering.h"
29 #include "llvm/Target/TargetMachine.h"
30 #include "llvm/Target/TargetSubtargetInfo.h"
34 #define DEBUG_TYPE "scheduler"
36 static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
37 cl::ZeroOrMore, cl::init(false),
38 cl::desc("Disable use of DFA during scheduling"));
40 static cl::opt<signed> RegPressureThreshold(
41 "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
42 cl::desc("Track reg pressure and switch priority to in-depth"));
44 ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
45 : Picker(this), InstrItins(IS->getTargetLowering()
48 ->getInstrItineraryData()) {
49 const TargetMachine &TM = (*IS->MF).getTarget();
50 TRI = TM.getSubtargetImpl()->getRegisterInfo();
51 TLI = IS->getTargetLowering();
52 TII = TM.getSubtargetImpl()->getInstrInfo();
53 ResourcesModel = TII->CreateTargetScheduleState(&TM, nullptr);
54 // This hard requirement could be relaxed, but for now
55 // do not let it procede.
56 assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
58 unsigned NumRC = TRI->getNumRegClasses();
59 RegLimit.resize(NumRC);
60 RegPressure.resize(NumRC);
61 std::fill(RegLimit.begin(), RegLimit.end(), 0);
62 std::fill(RegPressure.begin(), RegPressure.end(), 0);
63 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
64 E = TRI->regclass_end();
66 RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF);
68 ParallelLiveRanges = 0;
69 HorizontalVerticalBalance = 0;
73 ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
74 unsigned NumberDeps = 0;
75 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
80 SUnit *PredSU = I->getSUnit();
81 const SDNode *ScegN = PredSU->getNode();
86 // If value is passed to CopyToReg, it is probably
88 switch (ScegN->getOpcode()) {
90 case ISD::TokenFactor: break;
91 case ISD::CopyFromReg: NumberDeps++; break;
92 case ISD::CopyToReg: break;
93 case ISD::INLINEASM: break;
95 if (!ScegN->isMachineOpcode())
98 for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
99 MVT VT = ScegN->getSimpleValueType(i);
100 if (TLI->isTypeLegal(VT)
101 && (TLI->getRegClassFor(VT)->getID() == RCId)) {
110 unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
112 unsigned NumberDeps = 0;
113 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
118 SUnit *SuccSU = I->getSUnit();
119 const SDNode *ScegN = SuccSU->getNode();
123 // If value is passed to CopyToReg, it is probably
125 switch (ScegN->getOpcode()) {
127 case ISD::TokenFactor: break;
128 case ISD::CopyFromReg: break;
129 case ISD::CopyToReg: NumberDeps++; break;
130 case ISD::INLINEASM: break;
132 if (!ScegN->isMachineOpcode())
135 for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
136 const SDValue &Op = ScegN->getOperand(i);
137 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
138 if (TLI->isTypeLegal(VT)
139 && (TLI->getRegClassFor(VT)->getID() == RCId)) {
148 static unsigned numberCtrlDepsInSU(SUnit *SU) {
149 unsigned NumberDeps = 0;
150 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
158 static unsigned numberCtrlPredInSU(SUnit *SU) {
159 unsigned NumberDeps = 0;
160 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
169 /// Initialize nodes.
171 void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
173 NumNodesSolelyBlocking.resize(SUnits->size(), 0);
175 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
176 SUnit *SU = &(*SUnits)[i];
177 initNumRegDefsLeft(SU);
182 /// This heuristic is used if DFA scheduling is not desired
183 /// for some VLIW platform.
184 bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
185 // The isScheduleHigh flag allows nodes with wraparound dependencies that
186 // cannot easily be modeled as edges with latencies to be scheduled as
187 // soon as possible in a top-down schedule.
188 if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
191 if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
194 unsigned LHSNum = LHS->NodeNum;
195 unsigned RHSNum = RHS->NodeNum;
197 // The most important heuristic is scheduling the critical path.
198 unsigned LHSLatency = PQ->getLatency(LHSNum);
199 unsigned RHSLatency = PQ->getLatency(RHSNum);
200 if (LHSLatency < RHSLatency) return true;
201 if (LHSLatency > RHSLatency) return false;
203 // After that, if two nodes have identical latencies, look to see if one will
204 // unblock more other nodes than the other.
205 unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
206 unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
207 if (LHSBlocked < RHSBlocked) return true;
208 if (LHSBlocked > RHSBlocked) return false;
210 // Finally, just to provide a stable ordering, use the node number as a
212 return LHSNum < RHSNum;
216 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
217 /// of SU, return it, otherwise return null.
218 SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
219 SUnit *OnlyAvailablePred = nullptr;
220 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
222 SUnit &Pred = *I->getSUnit();
223 if (!Pred.isScheduled) {
224 // We found an available, but not scheduled, predecessor. If it's the
225 // only one we have found, keep track of it... otherwise give up.
226 if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
228 OnlyAvailablePred = &Pred;
231 return OnlyAvailablePred;
234 void ResourcePriorityQueue::push(SUnit *SU) {
235 // Look at all of the successors of this node. Count the number of nodes that
236 // this node is the sole unscheduled node for.
237 unsigned NumNodesBlocking = 0;
238 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
240 if (getSingleUnscheduledPred(I->getSUnit()) == SU)
243 NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
247 /// Check if scheduling of this SU is possible
248 /// in the current packet.
249 bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
250 if (!SU || !SU->getNode())
253 // If this is a compound instruction,
254 // it is likely to be a call. Do not delay it.
255 if (SU->getNode()->getGluedNode())
258 // First see if the pipeline could receive this instruction
259 // in the current cycle.
260 if (SU->getNode()->isMachineOpcode())
261 switch (SU->getNode()->getMachineOpcode()) {
263 if (!ResourcesModel->canReserveResources(&TII->get(
264 SU->getNode()->getMachineOpcode())))
266 case TargetOpcode::EXTRACT_SUBREG:
267 case TargetOpcode::INSERT_SUBREG:
268 case TargetOpcode::SUBREG_TO_REG:
269 case TargetOpcode::REG_SEQUENCE:
270 case TargetOpcode::IMPLICIT_DEF:
274 // Now see if there are no other dependencies
275 // to instructions alredy in the packet.
276 for (unsigned i = 0, e = Packet.size(); i != e; ++i)
277 for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
278 E = Packet[i]->Succs.end(); I != E; ++I) {
279 // Since we do not add pseudos to packets, might as well
284 if (I->getSUnit() == SU)
291 /// Keep track of available resources.
292 void ResourcePriorityQueue::reserveResources(SUnit *SU) {
293 // If this SU does not fit in the packet
295 if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
296 ResourcesModel->clearResources();
300 if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
301 switch (SU->getNode()->getMachineOpcode()) {
303 ResourcesModel->reserveResources(&TII->get(
304 SU->getNode()->getMachineOpcode()));
306 case TargetOpcode::EXTRACT_SUBREG:
307 case TargetOpcode::INSERT_SUBREG:
308 case TargetOpcode::SUBREG_TO_REG:
309 case TargetOpcode::REG_SEQUENCE:
310 case TargetOpcode::IMPLICIT_DEF:
313 Packet.push_back(SU);
315 // Forcefully end packet for PseudoOps.
317 ResourcesModel->clearResources();
321 // If packet is now full, reset the state so in the next cycle
323 if (Packet.size() >= InstrItins->SchedModel.IssueWidth) {
324 ResourcesModel->clearResources();
329 signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
330 signed RegBalance = 0;
332 if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
336 for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
337 MVT VT = SU->getNode()->getSimpleValueType(i);
338 if (TLI->isTypeLegal(VT)
339 && TLI->getRegClassFor(VT)
340 && TLI->getRegClassFor(VT)->getID() == RCId)
341 RegBalance += numberRCValSuccInSU(SU, RCId);
344 for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
345 const SDValue &Op = SU->getNode()->getOperand(i);
346 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
347 if (isa<ConstantSDNode>(Op.getNode()))
350 if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
351 && TLI->getRegClassFor(VT)->getID() == RCId)
352 RegBalance -= numberRCValPredInSU(SU, RCId);
357 /// Estimates change in reg pressure from this SU.
358 /// It is achieved by trivial tracking of defined
359 /// and used vregs in dependent instructions.
360 /// The RawPressure flag makes this function to ignore
361 /// existing reg file sizes, and report raw def/use
363 signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
364 signed RegBalance = 0;
366 if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
370 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
371 E = TRI->regclass_end(); I != E; ++I) {
372 const TargetRegisterClass *RC = *I;
373 RegBalance += rawRegPressureDelta(SU, RC->getID());
377 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
378 E = TRI->regclass_end(); I != E; ++I) {
379 const TargetRegisterClass *RC = *I;
380 if ((RegPressure[RC->getID()] +
381 rawRegPressureDelta(SU, RC->getID()) > 0) &&
382 (RegPressure[RC->getID()] +
383 rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()]))
384 RegBalance += rawRegPressureDelta(SU, RC->getID());
391 // Constants used to denote relative importance of
392 // heuristic components for cost computation.
393 static const unsigned PriorityOne = 200;
394 static const unsigned PriorityTwo = 50;
395 static const unsigned PriorityThree = 15;
396 static const unsigned PriorityFour = 5;
397 static const unsigned ScaleOne = 20;
398 static const unsigned ScaleTwo = 10;
399 static const unsigned ScaleThree = 5;
400 static const unsigned FactorOne = 2;
402 /// Returns single number reflecting benefit of scheduling SU
403 /// in the current cycle.
404 signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
405 // Initial trivial priority.
408 // Do not waste time on a node that is already scheduled.
412 // Forced priority is high.
413 if (SU->isScheduleHigh)
414 ResCount += PriorityOne;
416 // Adaptable scheduling
417 // A small, but very parallel
418 // region, where reg pressure is an issue.
419 if (HorizontalVerticalBalance > RegPressureThreshold) {
420 // Critical path first
421 ResCount += (SU->getHeight() * ScaleTwo);
422 // If resources are available for it, multiply the
423 // chance of scheduling.
424 if (isResourceAvailable(SU))
425 ResCount <<= FactorOne;
427 // Consider change to reg pressure from scheduling
429 ResCount -= (regPressureDelta(SU,true) * ScaleOne);
431 // Default heuristic, greeady and
432 // critical path driven.
434 // Critical path first.
435 ResCount += (SU->getHeight() * ScaleTwo);
436 // Now see how many instructions is blocked by this SU.
437 ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
438 // If resources are available for it, multiply the
439 // chance of scheduling.
440 if (isResourceAvailable(SU))
441 ResCount <<= FactorOne;
443 ResCount -= (regPressureDelta(SU) * ScaleTwo);
446 // These are platform-specific things.
447 // Will need to go into the back end
448 // and accessed from here via a hook.
449 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
450 if (N->isMachineOpcode()) {
451 const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
453 ResCount += (PriorityTwo + (ScaleThree*N->getNumValues()));
456 switch (N->getOpcode()) {
458 case ISD::TokenFactor:
459 case ISD::CopyFromReg:
461 ResCount += PriorityFour;
465 ResCount += PriorityThree;
473 /// Main resource tracking point.
474 void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
475 // Use NULL entry as an event marker to reset
478 ResourcesModel->clearResources();
483 const SDNode *ScegN = SU->getNode();
484 // Update reg pressure tracking.
485 // First update current node.
486 if (ScegN->isMachineOpcode()) {
487 // Estimate generated regs.
488 for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
489 MVT VT = ScegN->getSimpleValueType(i);
491 if (TLI->isTypeLegal(VT)) {
492 const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
494 RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
497 // Estimate killed regs.
498 for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
499 const SDValue &Op = ScegN->getOperand(i);
500 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
502 if (TLI->isTypeLegal(VT)) {
503 const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
505 if (RegPressure[RC->getID()] >
506 (numberRCValPredInSU(SU, RC->getID())))
507 RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
508 else RegPressure[RC->getID()] = 0;
512 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
514 if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0))
516 --I->getSUnit()->NumRegDefsLeft;
520 // Reserve resources for this SU.
521 reserveResources(SU);
523 // Adjust number of parallel live ranges.
524 // Heuristic is simple - node with no data successors reduces
525 // number of live ranges. All others, increase it.
526 unsigned NumberNonControlDeps = 0;
528 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
530 adjustPriorityOfUnscheduledPreds(I->getSUnit());
532 NumberNonControlDeps++;
535 if (!NumberNonControlDeps) {
536 if (ParallelLiveRanges >= SU->NumPreds)
537 ParallelLiveRanges -= SU->NumPreds;
539 ParallelLiveRanges = 0;
543 ParallelLiveRanges += SU->NumRegDefsLeft;
545 // Track parallel live chains.
546 HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
547 HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
550 void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
551 unsigned NodeNumDefs = 0;
552 for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
553 if (N->isMachineOpcode()) {
554 const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
555 // No register need be allocated for this.
556 if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
560 NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
563 switch(N->getOpcode()) {
565 case ISD::CopyFromReg:
573 SU->NumRegDefsLeft = NodeNumDefs;
576 /// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
577 /// scheduled. If SU is not itself available, then there is at least one
578 /// predecessor node that has not been scheduled yet. If SU has exactly ONE
579 /// unscheduled predecessor, we want to increase its priority: it getting
580 /// scheduled will make this node available, so it is better than some other
581 /// node of the same priority that will not make a node available.
582 void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
583 if (SU->isAvailable) return; // All preds scheduled.
585 SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
586 if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable)
589 // Okay, we found a single predecessor that is available, but not scheduled.
590 // Since it is available, it must be in the priority queue. First remove it.
591 remove(OnlyAvailablePred);
593 // Reinsert the node into the priority queue, which recomputes its
594 // NumNodesSolelyBlocking value.
595 push(OnlyAvailablePred);
599 /// Main access point - returns next instructions
600 /// to be placed in scheduling sequence.
601 SUnit *ResourcePriorityQueue::pop() {
605 std::vector<SUnit *>::iterator Best = Queue.begin();
606 if (!DisableDFASched) {
607 signed BestCost = SUSchedulingCost(*Best);
608 for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()),
609 E = Queue.end(); I != E; ++I) {
611 if (SUSchedulingCost(*I) > BestCost) {
612 BestCost = SUSchedulingCost(*I);
617 // Use default TD scheduling mechanism.
619 for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()),
620 E = Queue.end(); I != E; ++I)
621 if (Picker(*Best, *I))
626 if (Best != std::prev(Queue.end()))
627 std::swap(*Best, Queue.back());
635 void ResourcePriorityQueue::remove(SUnit *SU) {
636 assert(!Queue.empty() && "Queue is empty!");
637 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
638 if (I != std::prev(Queue.end()))
639 std::swap(*I, Queue.back());
646 void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {}
648 void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {
649 ResourcePriorityQueue q = *this;
652 dbgs() << "Height " << su->getHeight() << ": ";