lib/CodeGen/MachineScheduler.cpp

   1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // MachineScheduler schedules machine instructions after phi elimination. It
  11 // preserves LiveIntervals so it can be invoked before register allocation.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #define DEBUG_TYPE "misched"
  16
  17 #include "llvm/CodeGen/MachineScheduler.h"
  18 #include "llvm/ADT/OwningPtr.h"
  19 #include "llvm/ADT/PriorityQueue.h"
  20 #include "llvm/Analysis/AliasAnalysis.h"
  21 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
  22 #include "llvm/CodeGen/Passes.h"
  23 #include "llvm/CodeGen/RegisterClassInfo.h"
  24 #include "llvm/CodeGen/ScheduleDFS.h"
  25 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
  26 #include "llvm/Support/CommandLine.h"
  27 #include "llvm/Support/Debug.h"
  28 #include "llvm/Support/ErrorHandling.h"
  29 #include "llvm/Support/GraphWriter.h"
  30 #include "llvm/Support/raw_ostream.h"
  31 #include <queue>
  32
  33 using namespace llvm;
  34
  35 namespace llvm {
  36 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
  37                            cl::desc("Force top-down list scheduling"));
  38 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
  39                             cl::desc("Force bottom-up list scheduling"));
  40 }
  41
  42 #ifndef NDEBUG
  43 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
  44   cl::desc("Pop up a window to show MISched dags after they are processed"));
  45
  46 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
  47   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
  48 #else
  49 static bool ViewMISchedDAGs = false;
  50 #endif // NDEBUG
  51
  52 // Experimental heuristics
  53 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
  54   cl::desc("Enable load clustering."), cl::init(true));
  55
  56 // Experimental heuristics
  57 static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
  58   cl::desc("Enable scheduling for macro fusion."), cl::init(true));
  59
  60 // DAG subtrees must have at least this many nodes.
  61 static const unsigned MinSubtreeSize = 8;
  62
  63 //===----------------------------------------------------------------------===//
  64 // Machine Instruction Scheduling Pass and Registry
  65 //===----------------------------------------------------------------------===//
  66
  67 MachineSchedContext::MachineSchedContext():
  68     MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
  69   RegClassInfo = new RegisterClassInfo();
  70 }
  71
  72 MachineSchedContext::~MachineSchedContext() {
  73   delete RegClassInfo;
  74 }
  75
  76 namespace {
  77 /// MachineScheduler runs after coalescing and before register allocation.
  78 class MachineScheduler : public MachineSchedContext,
  79                          public MachineFunctionPass {
  80 public:
  81   MachineScheduler();
  82
  83   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
  84
  85   virtual void releaseMemory() {}
  86
  87   virtual bool runOnMachineFunction(MachineFunction&);
  88
  89   virtual void print(raw_ostream &O, const Module* = 0) const;
  90
  91   static char ID; // Class identification, replacement for typeinfo
  92 };
  93 } // namespace
  94
  95 char MachineScheduler::ID = 0;
  96
  97 char &llvm::MachineSchedulerID = MachineScheduler::ID;
  98
  99 INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
 100                       "Machine Instruction Scheduler", false, false)
 101 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 102 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
 103 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 104 INITIALIZE_PASS_END(MachineScheduler, "misched",
 105                     "Machine Instruction Scheduler", false, false)
 106
 107 MachineScheduler::MachineScheduler()
 108 : MachineFunctionPass(ID) {
 109   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
 110 }
 111
 112 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
 113   AU.setPreservesCFG();
 114   AU.addRequiredID(MachineDominatorsID);
 115   AU.addRequired<MachineLoopInfo>();
 116   AU.addRequired<AliasAnalysis>();
 117   AU.addRequired<TargetPassConfig>();
 118   AU.addRequired<SlotIndexes>();
 119   AU.addPreserved<SlotIndexes>();
 120   AU.addRequired<LiveIntervals>();
 121   AU.addPreserved<LiveIntervals>();
 122   MachineFunctionPass::getAnalysisUsage(AU);
 123 }
 124
 125 MachinePassRegistry MachineSchedRegistry::Registry;
 126
 127 /// A dummy default scheduler factory indicates whether the scheduler
 128 /// is overridden on the command line.
 129 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
 130   return 0;
 131 }
 132
 133 /// MachineSchedOpt allows command line selection of the scheduler.
 134 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
 135                RegisterPassParser<MachineSchedRegistry> >
 136 MachineSchedOpt("misched",
 137                 cl::init(&useDefaultMachineSched), cl::Hidden,
 138                 cl::desc("Machine instruction scheduler to use"));
 139
 140 static MachineSchedRegistry
 141 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
 142                      useDefaultMachineSched);
 143
 144 /// Forward declare the standard machine scheduler. This will be used as the
 145 /// default scheduler if the target does not set a default.
 146 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
 147
 148
 149 /// Decrement this iterator until reaching the top or a non-debug instr.
 150 static MachineBasicBlock::iterator
 151 priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
 152   assert(I != Beg && "reached the top of the region, cannot decrement");
 153   while (--I != Beg) {
 154     if (!I->isDebugValue())
 155       break;
 156   }
 157   return I;
 158 }
 159
 160 /// If this iterator is a debug value, increment until reaching the End or a
 161 /// non-debug instruction.
 162 static MachineBasicBlock::iterator
 163 nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
 164   for(; I != End; ++I) {
 165     if (!I->isDebugValue())
 166       break;
 167   }
 168   return I;
 169 }
 170
 171 /// Top-level MachineScheduler pass driver.
 172 ///
 173 /// Visit blocks in function order. Divide each block into scheduling regions
 174 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
 175 /// consistent with the DAG builder, which traverses the interior of the
 176 /// scheduling regions bottom-up.
 177 ///
 178 /// This design avoids exposing scheduling boundaries to the DAG builder,
 179 /// simplifying the DAG builder's support for "special" target instructions.
 180 /// At the same time the design allows target schedulers to operate across
 181 /// scheduling boundaries, for example to bundle the boudary instructions
 182 /// without reordering them. This creates complexity, because the target
 183 /// scheduler must update the RegionBegin and RegionEnd positions cached by
 184 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
 185 /// design would be to split blocks at scheduling boundaries, but LLVM has a
 186 /// general bias against block splitting purely for implementation simplicity.
 187 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
 188   DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
 189
 190   // Initialize the context of the pass.
 191   MF = &mf;
 192   MLI = &getAnalysis<MachineLoopInfo>();
 193   MDT = &getAnalysis<MachineDominatorTree>();
 194   PassConfig = &getAnalysis<TargetPassConfig>();
 195   AA = &getAnalysis<AliasAnalysis>();
 196
 197   LIS = &getAnalysis<LiveIntervals>();
 198   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
 199
 200   RegClassInfo->runOnMachineFunction(*MF);
 201
 202   // Select the scheduler, or set the default.
 203   MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
 204   if (Ctor == useDefaultMachineSched) {
 205     // Get the default scheduler set by the target.
 206     Ctor = MachineSchedRegistry::getDefault();
 207     if (!Ctor) {
 208       Ctor = createConvergingSched;
 209       MachineSchedRegistry::setDefault(Ctor);
 210     }
 211   }
 212   // Instantiate the selected scheduler.
 213   OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
 214
 215   // Visit all machine basic blocks.
 216   //
 217   // TODO: Visit blocks in global postorder or postorder within the bottom-up
 218   // loop tree. Then we can optionally compute global RegPressure.
 219   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
 220        MBB != MBBEnd; ++MBB) {
 221
 222     Scheduler->startBlock(MBB);
 223
 224     // Break the block into scheduling regions [I, RegionEnd), and schedule each
 225     // region as soon as it is discovered. RegionEnd points the scheduling
 226     // boundary at the bottom of the region. The DAG does not include RegionEnd,
 227     // but the region does (i.e. the next RegionEnd is above the previous
 228     // RegionBegin). If the current block has no terminator then RegionEnd ==
 229     // MBB->end() for the bottom region.
 230     //
 231     // The Scheduler may insert instructions during either schedule() or
 232     // exitRegion(), even for empty regions. So the local iterators 'I' and
 233     // 'RegionEnd' are invalid across these calls.
 234     unsigned RemainingInstrs = MBB->size();
 235     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
 236         RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
 237
 238       // Avoid decrementing RegionEnd for blocks with no terminator.
 239       if (RegionEnd != MBB->end()
 240           || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
 241         --RegionEnd;
 242         // Count the boundary instruction.
 243         --RemainingInstrs;
 244       }
 245
 246       // The next region starts above the previous region. Look backward in the
 247       // instruction stream until we find the nearest boundary.
 248       MachineBasicBlock::iterator I = RegionEnd;
 249       for(;I != MBB->begin(); --I, --RemainingInstrs) {
 250         if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
 251           break;
 252       }
 253       // Notify the scheduler of the region, even if we may skip scheduling
 254       // it. Perhaps it still needs to be bundled.
 255       Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs);
 256
 257       // Skip empty scheduling regions (0 or 1 schedulable instructions).
 258       if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
 259         // Close the current region. Bundle the terminator if needed.
 260         // This invalidates 'RegionEnd' and 'I'.
 261         Scheduler->exitRegion();
 262         continue;
 263       }
 264       DEBUG(dbgs() << "********** MI Scheduling **********\n");
 265       DEBUG(dbgs() << MF->getName()
 266             << ":BB#" << MBB->getNumber() << " " << MBB->getName()
 267             << "\n  From: " << *I << "    To: ";
 268             if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
 269             else dbgs() << "End";
 270             dbgs() << " Remaining: " << RemainingInstrs << "\n");
 271
 272       // Schedule a region: possibly reorder instructions.
 273       // This invalidates 'RegionEnd' and 'I'.
 274       Scheduler->schedule();
 275
 276       // Close the current region.
 277       Scheduler->exitRegion();
 278
 279       // Scheduling has invalidated the current iterator 'I'. Ask the
 280       // scheduler for the top of it's scheduled region.
 281       RegionEnd = Scheduler->begin();
 282     }
 283     assert(RemainingInstrs == 0 && "Instruction count mismatch!");
 284     Scheduler->finishBlock();
 285   }
 286   Scheduler->finalizeSchedule();
 287   DEBUG(LIS->print(dbgs()));
 288   return true;
 289 }
 290
 291 void MachineScheduler::print(raw_ostream &O, const Module* m) const {
 292   // unimplemented
 293 }
 294
 295 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 296 void ReadyQueue::dump() {
 297   dbgs() << Name << ": ";
 298   for (unsigned i = 0, e = Queue.size(); i < e; ++i)
 299     dbgs() << Queue[i]->NodeNum << " ";
 300   dbgs() << "\n";
 301 }
 302 #endif
 303
 304 //===----------------------------------------------------------------------===//
 305 // ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
 306 // preservation.
 307 //===----------------------------------------------------------------------===//
 308
 309 ScheduleDAGMI::~ScheduleDAGMI() {
 310   delete DFSResult;
 311   DeleteContainerPointers(Mutations);
 312   delete SchedImpl;
 313 }
 314
 315 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
 316   if (SuccSU != &ExitSU) {
 317     // Do not use WillCreateCycle, it assumes SD scheduling.
 318     // If Pred is reachable from Succ, then the edge creates a cycle.
 319     if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
 320       return false;
 321     Topo.AddPred(SuccSU, PredDep.getSUnit());
 322   }
 323   SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
 324   // Return true regardless of whether a new edge needed to be inserted.
 325   return true;
 326 }
 327
 328 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
 329 /// NumPredsLeft reaches zero, release the successor node.
 330 ///
 331 /// FIXME: Adjust SuccSU height based on MinLatency.
 332 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
 333   SUnit *SuccSU = SuccEdge->getSUnit();
 334
 335   if (SuccEdge->isWeak()) {
 336     --SuccSU->WeakPredsLeft;
 337     if (SuccEdge->isCluster())
 338       NextClusterSucc = SuccSU;
 339     return;
 340   }
 341 #ifndef NDEBUG
 342   if (SuccSU->NumPredsLeft == 0) {
 343     dbgs() << "*** Scheduling failed! ***\n";
 344     SuccSU->dump(this);
 345     dbgs() << " has been released too many times!\n";
 346     llvm_unreachable(0);
 347   }
 348 #endif
 349   --SuccSU->NumPredsLeft;
 350   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
 351     SchedImpl->releaseTopNode(SuccSU);
 352 }
 353
 354 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
 355 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
 356   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
 357        I != E; ++I) {
 358     releaseSucc(SU, &*I);
 359   }
 360 }
 361
 362 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
 363 /// NumSuccsLeft reaches zero, release the predecessor node.
 364 ///
 365 /// FIXME: Adjust PredSU height based on MinLatency.
 366 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
 367   SUnit *PredSU = PredEdge->getSUnit();
 368
 369   if (PredEdge->isWeak()) {
 370     --PredSU->WeakSuccsLeft;
 371     if (PredEdge->isCluster())
 372       NextClusterPred = PredSU;
 373     return;
 374   }
 375 #ifndef NDEBUG
 376   if (PredSU->NumSuccsLeft == 0) {
 377     dbgs() << "*** Scheduling failed! ***\n";
 378     PredSU->dump(this);
 379     dbgs() << " has been released too many times!\n";
 380     llvm_unreachable(0);
 381   }
 382 #endif
 383   --PredSU->NumSuccsLeft;
 384   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
 385     SchedImpl->releaseBottomNode(PredSU);
 386 }
 387
 388 /// releasePredecessors - Call releasePred on each of SU's predecessors.
 389 void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
 390   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
 391        I != E; ++I) {
 392     releasePred(SU, &*I);
 393   }
 394 }
 395
 396 void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
 397                                     MachineBasicBlock::iterator InsertPos) {
 398   // Advance RegionBegin if the first instruction moves down.
 399   if (&*RegionBegin == MI)
 400     ++RegionBegin;
 401
 402   // Update the instruction stream.
 403   BB->splice(InsertPos, BB, MI);
 404
 405   // Update LiveIntervals
 406   LIS->handleMove(MI, /*UpdateFlags=*/true);
 407
 408   // Recede RegionBegin if an instruction moves above the first.
 409   if (RegionBegin == InsertPos)
 410     RegionBegin = MI;
 411 }
 412
 413 bool ScheduleDAGMI::checkSchedLimit() {
 414 #ifndef NDEBUG
 415   if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
 416     CurrentTop = CurrentBottom;
 417     return false;
 418   }
 419   ++NumInstrsScheduled;
 420 #endif
 421   return true;
 422 }
 423
 424 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
 425 /// crossing a scheduling boundary. [begin, end) includes all instructions in
 426 /// the region, including the boundary itself and single-instruction regions
 427 /// that don't get scheduled.
 428 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
 429                                 MachineBasicBlock::iterator begin,
 430                                 MachineBasicBlock::iterator end,
 431                                 unsigned endcount)
 432 {
 433   ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
 434
 435   // For convenience remember the end of the liveness region.
 436   LiveRegionEnd =
 437     (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
 438 }
 439
 440 // Setup the register pressure trackers for the top scheduled top and bottom
 441 // scheduled regions.
 442 void ScheduleDAGMI::initRegPressure() {
 443   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
 444   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
 445
 446   // Close the RPTracker to finalize live ins.
 447   RPTracker.closeRegion();
 448
 449   DEBUG(RPTracker.getPressure().dump(TRI));
 450
 451   // Initialize the live ins and live outs.
 452   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
 453   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
 454
 455   // Close one end of the tracker so we can call
 456   // getMaxUpward/DownwardPressureDelta before advancing across any
 457   // instructions. This converts currently live regs into live ins/outs.
 458   TopRPTracker.closeTop();
 459   BotRPTracker.closeBottom();
 460
 461   // Account for liveness generated by the region boundary.
 462   if (LiveRegionEnd != RegionEnd)
 463     BotRPTracker.recede();
 464
 465   assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
 466
 467   // Cache the list of excess pressure sets in this region. This will also track
 468   // the max pressure in the scheduled code for these sets.
 469   RegionCriticalPSets.clear();
 470   const std::vector<unsigned> &RegionPressure =
 471     RPTracker.getPressure().MaxSetPressure;
 472   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
 473     unsigned Limit = TRI->getRegPressureSetLimit(i);
 474     DEBUG(dbgs() << TRI->getRegPressureSetName(i)
 475           << "Limit " << Limit
 476           << " Actual " << RegionPressure[i] << "\n");
 477     if (RegionPressure[i] > Limit)
 478       RegionCriticalPSets.push_back(PressureElement(i, 0));
 479   }
 480   DEBUG(dbgs() << "Excess PSets: ";
 481         for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
 482           dbgs() << TRI->getRegPressureSetName(
 483             RegionCriticalPSets[i].PSetID) << " ";
 484         dbgs() << "\n");
 485 }
 486
 487 // FIXME: When the pressure tracker deals in pressure differences then we won't
 488 // iterate over all RegionCriticalPSets[i].
 489 void ScheduleDAGMI::
 490 updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
 491   for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
 492     unsigned ID = RegionCriticalPSets[i].PSetID;
 493     int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
 494     if ((int)NewMaxPressure[ID] > MaxUnits)
 495       MaxUnits = NewMaxPressure[ID];
 496   }
 497 }
 498
 499 /// schedule - Called back from MachineScheduler::runOnMachineFunction
 500 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
 501 /// only includes instructions that have DAG nodes, not scheduling boundaries.
 502 ///
 503 /// This is a skeletal driver, with all the functionality pushed into helpers,
 504 /// so that it can be easilly extended by experimental schedulers. Generally,
 505 /// implementing MachineSchedStrategy should be sufficient to implement a new
 506 /// scheduling algorithm. However, if a scheduler further subclasses
 507 /// ScheduleDAGMI then it will want to override this virtual method in order to
 508 /// update any specialized state.
 509 void ScheduleDAGMI::schedule() {
 510   buildDAGWithRegPressure();
 511
 512   Topo.InitDAGTopologicalSorting();
 513
 514   postprocessDAG();
 515
 516   SmallVector<SUnit*, 8> TopRoots, BotRoots;
 517   findRootsAndBiasEdges(TopRoots, BotRoots);
 518
 519   // Initialize the strategy before modifying the DAG.
 520   // This may initialize a DFSResult to be used for queue priority.
 521   SchedImpl->initialize(this);
 522
 523   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
 524           SUnits[su].dumpAll(this));
 525   if (ViewMISchedDAGs) viewGraph();
 526
 527   // Initialize ready queues now that the DAG and priority data are finalized.
 528   initQueues(TopRoots, BotRoots);
 529
 530   bool IsTopNode = false;
 531   while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
 532     assert(!SU->isScheduled && "Node already scheduled");
 533     if (!checkSchedLimit())
 534       break;
 535
 536     scheduleMI(SU, IsTopNode);
 537
 538     updateQueues(SU, IsTopNode);
 539   }
 540   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
 541
 542   placeDebugValues();
 543
 544   DEBUG({
 545       unsigned BBNum = begin()->getParent()->getNumber();
 546       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
 547       dumpSchedule();
 548       dbgs() << '\n';
 549     });
 550 }
 551
 552 /// Build the DAG and setup three register pressure trackers.
 553 void ScheduleDAGMI::buildDAGWithRegPressure() {
 554   // Initialize the register pressure tracker used by buildSchedGraph.
 555   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
 556
 557   // Account for liveness generate by the region boundary.
 558   if (LiveRegionEnd != RegionEnd)
 559     RPTracker.recede();
 560
 561   // Build the DAG, and compute current register pressure.
 562   buildSchedGraph(AA, &RPTracker);
 563
 564   // Initialize top/bottom trackers after computing region pressure.
 565   initRegPressure();
 566 }
 567
 568 /// Apply each ScheduleDAGMutation step in order.
 569 void ScheduleDAGMI::postprocessDAG() {
 570   for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
 571     Mutations[i]->apply(this);
 572   }
 573 }
 574
 575 void ScheduleDAGMI::computeDFSResult() {
 576   if (!DFSResult)
 577     DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
 578   DFSResult->clear();
 579   ScheduledTrees.clear();
 580   DFSResult->resize(SUnits.size());
 581   DFSResult->compute(SUnits);
 582   ScheduledTrees.resize(DFSResult->getNumSubtrees());
 583 }
 584
 585 void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
 586                                           SmallVectorImpl<SUnit*> &BotRoots) {
 587   for (std::vector<SUnit>::iterator
 588          I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
 589     SUnit *SU = &(*I);
 590
 591     // Order predecessors so DFSResult follows the critical path.
 592     SU->biasCriticalPath();
 593
 594     // A SUnit is ready to top schedule if it has no predecessors.
 595     if (!I->NumPredsLeft && SU != &EntrySU)
 596       TopRoots.push_back(SU);
 597     // A SUnit is ready to bottom schedule if it has no successors.
 598     if (!I->NumSuccsLeft && SU != &ExitSU)
 599       BotRoots.push_back(SU);
 600   }
 601 }
 602
 603 /// Identify DAG roots and setup scheduler queues.
 604 void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
 605                                ArrayRef<SUnit*> BotRoots) {
 606   NextClusterSucc = NULL;
 607   NextClusterPred = NULL;
 608
 609   // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
 610   //
 611   // Nodes with unreleased weak edges can still be roots.
 612   // Release top roots in forward order.
 613   for (SmallVectorImpl<SUnit*>::const_iterator
 614          I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
 615     SchedImpl->releaseTopNode(*I);
 616   }
 617   // Release bottom roots in reverse order so the higher priority nodes appear
 618   // first. This is more natural and slightly more efficient.
 619   for (SmallVectorImpl<SUnit*>::const_reverse_iterator
 620          I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
 621     SchedImpl->releaseBottomNode(*I);
 622   }
 623
 624   releaseSuccessors(&EntrySU);
 625   releasePredecessors(&ExitSU);
 626
 627   SchedImpl->registerRoots();
 628
 629   // Advance past initial DebugValues.
 630   assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
 631   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
 632   TopRPTracker.setPos(CurrentTop);
 633
 634   CurrentBottom = RegionEnd;
 635 }
 636
 637 /// Move an instruction and update register pressure.
 638 void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
 639   // Move the instruction to its new location in the instruction stream.
 640   MachineInstr *MI = SU->getInstr();
 641
 642   if (IsTopNode) {
 643     assert(SU->isTopReady() && "node still has unscheduled dependencies");
 644     if (&*CurrentTop == MI)
 645       CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
 646     else {
 647       moveInstruction(MI, CurrentTop);
 648       TopRPTracker.setPos(MI);
 649     }
 650
 651     // Update top scheduled pressure.
 652     TopRPTracker.advance();
 653     assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
 654     updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
 655   }
 656   else {
 657     assert(SU->isBottomReady() && "node still has unscheduled dependencies");
 658     MachineBasicBlock::iterator priorII =
 659       priorNonDebug(CurrentBottom, CurrentTop);
 660     if (&*priorII == MI)
 661       CurrentBottom = priorII;
 662     else {
 663       if (&*CurrentTop == MI) {
 664         CurrentTop = nextIfDebug(++CurrentTop, priorII);
 665         TopRPTracker.setPos(CurrentTop);
 666       }
 667       moveInstruction(MI, CurrentBottom);
 668       CurrentBottom = MI;
 669     }
 670     // Update bottom scheduled pressure.
 671     BotRPTracker.recede();
 672     assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
 673     updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
 674   }
 675 }
 676
 677 /// Update scheduler queues after scheduling an instruction.
 678 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
 679   // Release dependent instructions for scheduling.
 680   if (IsTopNode)
 681     releaseSuccessors(SU);
 682   else
 683     releasePredecessors(SU);
 684
 685   SU->isScheduled = true;
 686
 687   if (DFSResult) {
 688     unsigned SubtreeID = DFSResult->getSubtreeID(SU);
 689     if (!ScheduledTrees.test(SubtreeID)) {
 690       ScheduledTrees.set(SubtreeID);
 691       DFSResult->scheduleTree(SubtreeID);
 692       SchedImpl->scheduleTree(SubtreeID);
 693     }
 694   }
 695
 696   // Notify the scheduling strategy after updating the DAG.
 697   SchedImpl->schedNode(SU, IsTopNode);
 698 }
 699
 700 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
 701 void ScheduleDAGMI::placeDebugValues() {
 702   // If first instruction was a DBG_VALUE then put it back.
 703   if (FirstDbgValue) {
 704     BB->splice(RegionBegin, BB, FirstDbgValue);
 705     RegionBegin = FirstDbgValue;
 706   }
 707
 708   for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
 709          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
 710     std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
 711     MachineInstr *DbgValue = P.first;
 712     MachineBasicBlock::iterator OrigPrevMI = P.second;
 713     if (&*RegionBegin == DbgValue)
 714       ++RegionBegin;
 715     BB->splice(++OrigPrevMI, BB, DbgValue);
 716     if (OrigPrevMI == llvm::prior(RegionEnd))
 717       RegionEnd = DbgValue;
 718   }
 719   DbgValues.clear();
 720   FirstDbgValue = NULL;
 721 }
 722
 723 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 724 void ScheduleDAGMI::dumpSchedule() const {
 725   for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
 726     if (SUnit *SU = getSUnit(&(*MI)))
 727       SU->dump(this);
 728     else
 729       dbgs() << "Missing SUnit\n";
 730   }
 731 }
 732 #endif
 733
 734 //===----------------------------------------------------------------------===//
 735 // LoadClusterMutation - DAG post-processing to cluster loads.
 736 //===----------------------------------------------------------------------===//
 737
 738 namespace {
 739 /// \brief Post-process the DAG to create cluster edges between neighboring
 740 /// loads.
 741 class LoadClusterMutation : public ScheduleDAGMutation {
 742   struct LoadInfo {
 743     SUnit *SU;
 744     unsigned BaseReg;
 745     unsigned Offset;
 746     LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
 747       : SU(su), BaseReg(reg), Offset(ofs) {}
 748   };
 749   static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
 750                            const LoadClusterMutation::LoadInfo &RHS);
 751
 752   const TargetInstrInfo *TII;
 753   const TargetRegisterInfo *TRI;
 754 public:
 755   LoadClusterMutation(const TargetInstrInfo *tii,
 756                       const TargetRegisterInfo *tri)
 757     : TII(tii), TRI(tri) {}
 758
 759   virtual void apply(ScheduleDAGMI *DAG);
 760 protected:
 761   void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
 762 };
 763 } // anonymous
 764
 765 bool LoadClusterMutation::LoadInfoLess(
 766   const LoadClusterMutation::LoadInfo &LHS,
 767   const LoadClusterMutation::LoadInfo &RHS) {
 768   if (LHS.BaseReg != RHS.BaseReg)
 769     return LHS.BaseReg < RHS.BaseReg;
 770   return LHS.Offset < RHS.Offset;
 771 }
 772
 773 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
 774                                                   ScheduleDAGMI *DAG) {
 775   SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
 776   for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
 777     SUnit *SU = Loads[Idx];
 778     unsigned BaseReg;
 779     unsigned Offset;
 780     if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
 781       LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
 782   }
 783   if (LoadRecords.size() < 2)
 784     return;
 785   std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
 786   unsigned ClusterLength = 1;
 787   for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
 788     if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
 789       ClusterLength = 1;
 790       continue;
 791     }
 792
 793     SUnit *SUa = LoadRecords[Idx].SU;
 794     SUnit *SUb = LoadRecords[Idx+1].SU;
 795     if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
 796         && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
 797
 798       DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
 799             << SUb->NodeNum << ")\n");
 800       // Copy successor edges from SUa to SUb. Interleaving computation
 801       // dependent on SUa can prevent load combining due to register reuse.
 802       // Predecessor edges do not need to be copied from SUb to SUa since nearby
 803       // loads should have effectively the same inputs.
 804       for (SUnit::const_succ_iterator
 805              SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
 806         if (SI->getSUnit() == SUb)
 807           continue;
 808         DEBUG(dbgs() << "  Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
 809         DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
 810       }
 811       ++ClusterLength;
 812     }
 813     else
 814       ClusterLength = 1;
 815   }
 816 }
 817
 818 /// \brief Callback from DAG postProcessing to create cluster edges for loads.
 819 void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
 820   // Map DAG NodeNum to store chain ID.
 821   DenseMap<unsigned, unsigned> StoreChainIDs;
 822   // Map each store chain to a set of dependent loads.
 823   SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
 824   for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
 825     SUnit *SU = &DAG->SUnits[Idx];
 826     if (!SU->getInstr()->mayLoad())
 827       continue;
 828     unsigned ChainPredID = DAG->SUnits.size();
 829     for (SUnit::const_pred_iterator
 830            PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
 831       if (PI->isCtrl()) {
 832         ChainPredID = PI->getSUnit()->NodeNum;
 833         break;
 834       }
 835     }
 836     // Check if this chain-like pred has been seen
 837     // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
 838     unsigned NumChains = StoreChainDependents.size();
 839     std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
 840       StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
 841     if (Result.second)
 842       StoreChainDependents.resize(NumChains + 1);
 843     StoreChainDependents[Result.first->second].push_back(SU);
 844   }
 845   // Iterate over the store chains.
 846   for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
 847     clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
 848 }
 849
 850 //===----------------------------------------------------------------------===//
 851 // MacroFusion - DAG post-processing to encourage fusion of macro ops.
 852 //===----------------------------------------------------------------------===//
 853
 854 namespace {
 855 /// \brief Post-process the DAG to create cluster edges between instructions
 856 /// that may be fused by the processor into a single operation.
 857 class MacroFusion : public ScheduleDAGMutation {
 858   const TargetInstrInfo *TII;
 859 public:
 860   MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
 861
 862   virtual void apply(ScheduleDAGMI *DAG);
 863 };
 864 } // anonymous
 865
 866 /// \brief Callback from DAG postProcessing to create cluster edges to encourage
 867 /// fused operations.
 868 void MacroFusion::apply(ScheduleDAGMI *DAG) {
 869   // For now, assume targets can only fuse with the branch.
 870   MachineInstr *Branch = DAG->ExitSU.getInstr();
 871   if (!Branch)
 872     return;
 873
 874   for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
 875     SUnit *SU = &DAG->SUnits[--Idx];
 876     if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
 877       continue;
 878
 879     // Create a single weak edge from SU to ExitSU. The only effect is to cause
 880     // bottom-up scheduling to heavily prioritize the clustered SU.  There is no
 881     // need to copy predecessor edges from ExitSU to SU, since top-down
 882     // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
 883     // of SU, we could create an artificial edge from the deepest root, but it
 884     // hasn't been needed yet.
 885     bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
 886     (void)Success;
 887     assert(Success && "No DAG nodes should be reachable from ExitSU");
 888
 889     DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
 890     break;
 891   }
 892 }
 893
 894 //===----------------------------------------------------------------------===//
 895 // ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
 896 //===----------------------------------------------------------------------===//
 897
 898 namespace {
 899 /// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
 900 /// the schedule.
 901 class ConvergingScheduler : public MachineSchedStrategy {
 902 public:
 903   /// Represent the type of SchedCandidate found within a single queue.
 904   /// pickNodeBidirectional depends on these listed by decreasing priority.
 905   enum CandReason {
 906     NoCand, SingleExcess, SingleCritical, Cluster,
 907     ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
 908     TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
 909     NodeOrder};
 910
 911 #ifndef NDEBUG
 912   static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
 913 #endif
 914
 915   /// Policy for scheduling the next instruction in the candidate's zone.
 916   struct CandPolicy {
 917     bool ReduceLatency;
 918     unsigned ReduceResIdx;
 919     unsigned DemandResIdx;
 920
 921     CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
 922   };
 923
 924   /// Status of an instruction's critical resource consumption.
 925   struct SchedResourceDelta {
 926     // Count critical resources in the scheduled region required by SU.
 927     unsigned CritResources;
 928
 929     // Count critical resources from another region consumed by SU.
 930     unsigned DemandedResources;
 931
 932     SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
 933
 934     bool operator==(const SchedResourceDelta &RHS) const {
 935       return CritResources == RHS.CritResources
 936         && DemandedResources == RHS.DemandedResources;
 937     }
 938     bool operator!=(const SchedResourceDelta &RHS) const {
 939       return !operator==(RHS);
 940     }
 941   };
 942
 943   /// Store the state used by ConvergingScheduler heuristics, required for the
 944   /// lifetime of one invocation of pickNode().
 945   struct SchedCandidate {
 946     CandPolicy Policy;
 947
 948     // The best SUnit candidate.
 949     SUnit *SU;
 950
 951     // The reason for this candidate.
 952     CandReason Reason;
 953
 954     // Register pressure values for the best candidate.
 955     RegPressureDelta RPDelta;
 956
 957     // Critical resource consumption of the best candidate.
 958     SchedResourceDelta ResDelta;
 959
 960     SchedCandidate(const CandPolicy &policy)
 961     : Policy(policy), SU(NULL), Reason(NoCand) {}
 962
 963     bool isValid() const { return SU; }
 964
 965     // Copy the status of another candidate without changing policy.
 966     void setBest(SchedCandidate &Best) {
 967       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
 968       SU = Best.SU;
 969       Reason = Best.Reason;
 970       RPDelta = Best.RPDelta;
 971       ResDelta = Best.ResDelta;
 972     }
 973
 974     void initResourceDelta(const ScheduleDAGMI *DAG,
 975                            const TargetSchedModel *SchedModel);
 976   };
 977
 978   /// Summarize the unscheduled region.
 979   struct SchedRemainder {
 980     // Critical path through the DAG in expected latency.
 981     unsigned CriticalPath;
 982
 983     // Unscheduled resources
 984     SmallVector<unsigned, 16> RemainingCounts;
 985     // Critical resource for the unscheduled zone.
 986     unsigned CritResIdx;
 987     // Number of micro-ops left to schedule.
 988     unsigned RemainingMicroOps;
 989
 990     void reset() {
 991       CriticalPath = 0;
 992       RemainingCounts.clear();
 993       CritResIdx = 0;
 994       RemainingMicroOps = 0;
 995     }
 996
 997     SchedRemainder() { reset(); }
 998
 999     void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
1000
1001     unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const {
1002       if (!SchedModel->hasInstrSchedModel())
1003         return 0;
1004
1005       return std::max(
1006         RemainingMicroOps * SchedModel->getMicroOpFactor(),
1007         RemainingCounts[CritResIdx]);
1008     }
1009   };
1010
1011   /// Each Scheduling boundary is associated with ready queues. It tracks the
1012   /// current cycle in the direction of movement, and maintains the state
1013   /// of "hazards" and other interlocks at the current cycle.
1014   struct SchedBoundary {
1015     ScheduleDAGMI *DAG;
1016     const TargetSchedModel *SchedModel;
1017     SchedRemainder *Rem;
1018
1019     ReadyQueue Available;
1020     ReadyQueue Pending;
1021     bool CheckPending;
1022
1023     // For heuristics, keep a list of the nodes that immediately depend on the
1024     // most recently scheduled node.
1025     SmallPtrSet<const SUnit*, 8> NextSUs;
1026
1027     ScheduleHazardRecognizer *HazardRec;
1028
1029     unsigned CurrCycle;
1030     unsigned IssueCount;
1031
1032     /// MinReadyCycle - Cycle of the soonest available instruction.
1033     unsigned MinReadyCycle;
1034
1035     // The expected latency of the critical path in this scheduled zone.
1036     unsigned ExpectedLatency;
1037
1038     // Resources used in the scheduled zone beyond this boundary.
1039     SmallVector<unsigned, 16> ResourceCounts;
1040
1041     // Cache the critical resources ID in this scheduled zone.
1042     unsigned CritResIdx;
1043
1044     // Is the scheduled region resource limited vs. latency limited.
1045     bool IsResourceLimited;
1046
1047     unsigned ExpectedCount;
1048
1049 #ifndef NDEBUG
1050     // Remember the greatest min operand latency.
1051     unsigned MaxMinLatency;
1052 #endif
1053
1054     void reset() {
1055       Available.clear();
1056       Pending.clear();
1057       CheckPending = false;
1058       NextSUs.clear();
1059       HazardRec = 0;
1060       CurrCycle = 0;
1061       IssueCount = 0;
1062       MinReadyCycle = UINT_MAX;
1063       ExpectedLatency = 0;
1064       ResourceCounts.resize(1);
1065       assert(!ResourceCounts[0] && "nonzero count for bad resource");
1066       CritResIdx = 0;
1067       IsResourceLimited = false;
1068       ExpectedCount = 0;
1069 #ifndef NDEBUG
1070       MaxMinLatency = 0;
1071 #endif
1072       // Reserve a zero-count for invalid CritResIdx.
1073       ResourceCounts.resize(1);
1074     }
1075
1076     /// Pending queues extend the ready queues with the same ID and the
1077     /// PendingFlag set.
1078     SchedBoundary(unsigned ID, const Twine &Name):
1079       DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
1080       Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") {
1081       reset();
1082     }
1083
1084     ~SchedBoundary() { delete HazardRec; }
1085
1086     void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
1087               SchedRemainder *rem);
1088
1089     bool isTop() const {
1090       return Available.getID() == ConvergingScheduler::TopQID;
1091     }
1092
1093     unsigned getUnscheduledLatency(SUnit *SU) const {
1094       if (isTop())
1095         return SU->getHeight();
1096       return SU->getDepth() + SU->Latency;
1097     }
1098
1099     unsigned getCriticalCount() const {
1100       return ResourceCounts[CritResIdx];
1101     }
1102
1103     bool checkHazard(SUnit *SU);
1104
1105     void setLatencyPolicy(CandPolicy &Policy);
1106
1107     void releaseNode(SUnit *SU, unsigned ReadyCycle);
1108
1109     void bumpCycle();
1110
1111     void countResource(unsigned PIdx, unsigned Cycles);
1112
1113     void bumpNode(SUnit *SU);
1114
1115     void releasePending();
1116
1117     void removeReady(SUnit *SU);
1118
1119     SUnit *pickOnlyChoice();
1120   };
1121
1122 private:
1123   ScheduleDAGMI *DAG;
1124   const TargetSchedModel *SchedModel;
1125   const TargetRegisterInfo *TRI;
1126
1127   // State of the top and bottom scheduled instruction boundaries.
1128   SchedRemainder Rem;
1129   SchedBoundary Top;
1130   SchedBoundary Bot;
1131
1132 public:
1133   /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
1134   enum {
1135     TopQID = 1,
1136     BotQID = 2,
1137     LogMaxQID = 2
1138   };
1139
1140   ConvergingScheduler():
1141     DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
1142
1143   virtual void initialize(ScheduleDAGMI *dag);
1144
1145   virtual SUnit *pickNode(bool &IsTopNode);
1146
1147   virtual void schedNode(SUnit *SU, bool IsTopNode);
1148
1149   virtual void releaseTopNode(SUnit *SU);
1150
1151   virtual void releaseBottomNode(SUnit *SU);
1152
1153   virtual void registerRoots();
1154
1155 protected:
1156   void balanceZones(
1157     ConvergingScheduler::SchedBoundary &CriticalZone,
1158     ConvergingScheduler::SchedCandidate &CriticalCand,
1159     ConvergingScheduler::SchedBoundary &OppositeZone,
1160     ConvergingScheduler::SchedCandidate &OppositeCand);
1161
1162   void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand,
1163                            ConvergingScheduler::SchedCandidate &BotCand);
1164
1165   void tryCandidate(SchedCandidate &Cand,
1166                     SchedCandidate &TryCand,
1167                     SchedBoundary &Zone,
1168                     const RegPressureTracker &RPTracker,
1169                     RegPressureTracker &TempTracker);
1170
1171   SUnit *pickNodeBidirectional(bool &IsTopNode);
1172
1173   void pickNodeFromQueue(SchedBoundary &Zone,
1174                          const RegPressureTracker &RPTracker,
1175                          SchedCandidate &Candidate);
1176
1177 #ifndef NDEBUG
1178   void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone);
1179 #endif
1180 };
1181 } // namespace
1182
1183 void ConvergingScheduler::SchedRemainder::
1184 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1185   reset();
1186   if (!SchedModel->hasInstrSchedModel())
1187     return;
1188   RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1189   for (std::vector<SUnit>::iterator
1190          I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
1191     const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
1192     RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC);
1193     for (TargetSchedModel::ProcResIter
1194            PI = SchedModel->getWriteProcResBegin(SC),
1195            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1196       unsigned PIdx = PI->ProcResourceIdx;
1197       unsigned Factor = SchedModel->getResourceFactor(PIdx);
1198       RemainingCounts[PIdx] += (Factor * PI->Cycles);
1199     }
1200   }
1201   for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds();
1202        PIdx != PEnd; ++PIdx) {
1203     if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx])
1204         >= (int)SchedModel->getLatencyFactor()) {
1205       CritResIdx = PIdx;
1206     }
1207   }
1208 }
1209
1210 void ConvergingScheduler::SchedBoundary::
1211 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1212   reset();
1213   DAG = dag;
1214   SchedModel = smodel;
1215   Rem = rem;
1216   if (SchedModel->hasInstrSchedModel())
1217     ResourceCounts.resize(SchedModel->getNumProcResourceKinds());
1218 }
1219
1220 void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
1221   DAG = dag;
1222   SchedModel = DAG->getSchedModel();
1223   TRI = DAG->TRI;
1224   Rem.init(DAG, SchedModel);
1225   Top.init(DAG, SchedModel, &Rem);
1226   Bot.init(DAG, SchedModel, &Rem);
1227
1228   DAG->computeDFSResult();
1229
1230   // Initialize resource counts.
1231
1232   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
1233   // are disabled, then these HazardRecs will be disabled.
1234   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
1235   const TargetMachine &TM = DAG->MF.getTarget();
1236   Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1237   Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1238
1239   assert((!ForceTopDown || !ForceBottomUp) &&
1240          "-misched-topdown incompatible with -misched-bottomup");
1241 }
1242
1243 void ConvergingScheduler::releaseTopNode(SUnit *SU) {
1244   if (SU->isScheduled)
1245     return;
1246
1247   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1248        I != E; ++I) {
1249     unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
1250     unsigned MinLatency = I->getMinLatency();
1251 #ifndef NDEBUG
1252     Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
1253 #endif
1254     if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
1255       SU->TopReadyCycle = PredReadyCycle + MinLatency;
1256   }
1257   Top.releaseNode(SU, SU->TopReadyCycle);
1258 }
1259
1260 void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
1261   if (SU->isScheduled)
1262     return;
1263
1264   assert(SU->getInstr() && "Scheduled SUnit must have instr");
1265
1266   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1267        I != E; ++I) {
1268     if (I->isWeak())
1269       continue;
1270     unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
1271     unsigned MinLatency = I->getMinLatency();
1272 #ifndef NDEBUG
1273     Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
1274 #endif
1275     if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
1276       SU->BotReadyCycle = SuccReadyCycle + MinLatency;
1277   }
1278   Bot.releaseNode(SU, SU->BotReadyCycle);
1279 }
1280
1281 void ConvergingScheduler::registerRoots() {
1282   Rem.CriticalPath = DAG->ExitSU.getDepth();
1283   // Some roots may not feed into ExitSU. Check all of them in case.
1284   for (std::vector<SUnit*>::const_iterator
1285          I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
1286     if ((*I)->getDepth() > Rem.CriticalPath)
1287       Rem.CriticalPath = (*I)->getDepth();
1288   }
1289   DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
1290 }
1291
1292 /// Does this SU have a hazard within the current instruction group.
1293 ///
1294 /// The scheduler supports two modes of hazard recognition. The first is the
1295 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1296 /// supports highly complicated in-order reservation tables
1297 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1298 ///
1299 /// The second is a streamlined mechanism that checks for hazards based on
1300 /// simple counters that the scheduler itself maintains. It explicitly checks
1301 /// for instruction dispatch limitations, including the number of micro-ops that
1302 /// can dispatch per cycle.
1303 ///
1304 /// TODO: Also check whether the SU must start a new group.
1305 bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
1306   if (HazardRec->isEnabled())
1307     return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
1308
1309   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1310   if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) {
1311     DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
1312           << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1313     return true;
1314   }
1315   return false;
1316 }
1317
1318 /// Compute the remaining latency to determine whether ILP should be increased.
1319 void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
1320   // FIXME: compile time. In all, we visit four queues here one we should only
1321   // need to visit the one that was last popped if we cache the result.
1322   unsigned RemLatency = 0;
1323   for (ReadyQueue::iterator I = Available.begin(), E = Available.end();
1324        I != E; ++I) {
1325     unsigned L = getUnscheduledLatency(*I);
1326     if (L > RemLatency)
1327       RemLatency = L;
1328   }
1329   for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end();
1330        I != E; ++I) {
1331     unsigned L = getUnscheduledLatency(*I);
1332     if (L > RemLatency)
1333       RemLatency = L;
1334   }
1335   unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow();
1336   if (RemLatency + ExpectedLatency >= CriticalPathLimit
1337       && RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
1338     Policy.ReduceLatency = true;
1339     DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n');
1340   }
1341 }
1342
1343 void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
1344                                                      unsigned ReadyCycle) {
1345
1346   if (ReadyCycle < MinReadyCycle)
1347     MinReadyCycle = ReadyCycle;
1348
1349   // Check for interlocks first. For the purpose of other heuristics, an
1350   // instruction that cannot issue appears as if it's not in the ReadyQueue.
1351   if (ReadyCycle > CurrCycle || checkHazard(SU))
1352     Pending.push(SU);
1353   else
1354     Available.push(SU);
1355
1356   // Record this node as an immediate dependent of the scheduled node.
1357   NextSUs.insert(SU);
1358 }
1359
1360 /// Move the boundary of scheduled code by one cycle.
1361 void ConvergingScheduler::SchedBoundary::bumpCycle() {
1362   unsigned Width = SchedModel->getIssueWidth();
1363   IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
1364
1365   unsigned NextCycle = CurrCycle + 1;
1366   assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
1367   if (MinReadyCycle > NextCycle) {
1368     IssueCount = 0;
1369     NextCycle = MinReadyCycle;
1370   }
1371
1372   if (!HazardRec->isEnabled()) {
1373     // Bypass HazardRec virtual calls.
1374     CurrCycle = NextCycle;
1375   }
1376   else {
1377     // Bypass getHazardType calls in case of long latency.
1378     for (; CurrCycle != NextCycle; ++CurrCycle) {
1379       if (isTop())
1380         HazardRec->AdvanceCycle();
1381       else
1382         HazardRec->RecedeCycle();
1383     }
1384   }
1385   CheckPending = true;
1386   IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
1387
1388   DEBUG(dbgs() << "  *** " << Available.getName() << " cycle "
1389         << CurrCycle << '\n');
1390 }
1391
1392 /// Add the given processor resource to this scheduled zone.
1393 void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx,
1394                                                        unsigned Cycles) {
1395   unsigned Factor = SchedModel->getResourceFactor(PIdx);
1396   DEBUG(dbgs() << "  " << SchedModel->getProcResource(PIdx)->Name
1397         << " +(" << Cycles << "x" << Factor
1398         << ") / " << SchedModel->getLatencyFactor() << '\n');
1399
1400   unsigned Count = Factor * Cycles;
1401   ResourceCounts[PIdx] += Count;
1402   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
1403   Rem->RemainingCounts[PIdx] -= Count;
1404
1405   // Check if this resource exceeds the current critical resource by a full
1406   // cycle. If so, it becomes the critical resource.
1407   if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx])
1408       >= (int)SchedModel->getLatencyFactor()) {
1409     CritResIdx = PIdx;
1410     DEBUG(dbgs() << "  *** Critical resource "
1411           << SchedModel->getProcResource(PIdx)->Name << " x"
1412           << ResourceCounts[PIdx] << '\n');
1413   }
1414 }
1415
1416 /// Move the boundary of scheduled code by one SUnit.
1417 void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
1418   // Update the reservation table.
1419   if (HazardRec->isEnabled()) {
1420     if (!isTop() && SU->isCall) {
1421       // Calls are scheduled with their preceding instructions. For bottom-up
1422       // scheduling, clear the pipeline state before emitting.
1423       HazardRec->Reset();
1424     }
1425     HazardRec->EmitInstruction(SU);
1426   }
1427   // Update resource counts and critical resource.
1428   if (SchedModel->hasInstrSchedModel()) {
1429     const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1430     Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC);
1431     for (TargetSchedModel::ProcResIter
1432            PI = SchedModel->getWriteProcResBegin(SC),
1433            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1434       countResource(PI->ProcResourceIdx, PI->Cycles);
1435     }
1436   }
1437   if (isTop()) {
1438     if (SU->getDepth() > ExpectedLatency)
1439       ExpectedLatency = SU->getDepth();
1440   }
1441   else {
1442     if (SU->getHeight() > ExpectedLatency)
1443       ExpectedLatency = SU->getHeight();
1444   }
1445
1446   IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
1447
1448   // Check the instruction group dispatch limit.
1449   // TODO: Check if this SU must end a dispatch group.
1450   IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
1451
1452   // checkHazard prevents scheduling multiple instructions per cycle that exceed
1453   // issue width. However, we commonly reach the maximum. In this case
1454   // opportunistically bump the cycle to avoid uselessly checking everything in
1455   // the readyQ. Furthermore, a single instruction may produce more than one
1456   // cycle's worth of micro-ops.
1457   if (IssueCount >= SchedModel->getIssueWidth()) {
1458     DEBUG(dbgs() << "  *** Max instrs at cycle " << CurrCycle << '\n');
1459     bumpCycle();
1460   }
1461 }
1462
1463 /// Release pending ready nodes in to the available queue. This makes them
1464 /// visible to heuristics.
1465 void ConvergingScheduler::SchedBoundary::releasePending() {
1466   // If the available queue is empty, it is safe to reset MinReadyCycle.
1467   if (Available.empty())
1468     MinReadyCycle = UINT_MAX;
1469
1470   // Check to see if any of the pending instructions are ready to issue.  If
1471   // so, add them to the available queue.
1472   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
1473     SUnit *SU = *(Pending.begin()+i);
1474     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
1475
1476     if (ReadyCycle < MinReadyCycle)
1477       MinReadyCycle = ReadyCycle;
1478
1479     if (ReadyCycle > CurrCycle)
1480       continue;
1481
1482     if (checkHazard(SU))
1483       continue;
1484
1485     Available.push(SU);
1486     Pending.remove(Pending.begin()+i);
1487     --i; --e;
1488   }
1489   DEBUG(if (!Pending.empty()) Pending.dump());
1490   CheckPending = false;
1491 }
1492
1493 /// Remove SU from the ready set for this boundary.
1494 void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
1495   if (Available.isInQueue(SU))
1496     Available.remove(Available.find(SU));
1497   else {
1498     assert(Pending.isInQueue(SU) && "bad ready count");
1499     Pending.remove(Pending.find(SU));
1500   }
1501 }
1502
1503 /// If this queue only has one ready candidate, return it. As a side effect,
1504 /// defer any nodes that now hit a hazard, and advance the cycle until at least
1505 /// one node is ready. If multiple instructions are ready, return NULL.
1506 SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
1507   if (CheckPending)
1508     releasePending();
1509
1510   if (IssueCount > 0) {
1511     // Defer any ready instrs that now have a hazard.
1512     for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
1513       if (checkHazard(*I)) {
1514         Pending.push(*I);
1515         I = Available.remove(I);
1516         continue;
1517       }
1518       ++I;
1519     }
1520   }
1521   for (unsigned i = 0; Available.empty(); ++i) {
1522     assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
1523            "permanent hazard"); (void)i;
1524     bumpCycle();
1525     releasePending();
1526   }
1527   if (Available.size() == 1)
1528     return *Available.begin();
1529   return NULL;
1530 }
1531
1532 /// Record the candidate policy for opposite zones with different critical
1533 /// resources.
1534 ///
1535 /// If the CriticalZone is latency limited, don't force a policy for the
1536 /// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed.
1537 void ConvergingScheduler::balanceZones(
1538   ConvergingScheduler::SchedBoundary &CriticalZone,
1539   ConvergingScheduler::SchedCandidate &CriticalCand,
1540   ConvergingScheduler::SchedBoundary &OppositeZone,
1541   ConvergingScheduler::SchedCandidate &OppositeCand) {
1542
1543   if (!CriticalZone.IsResourceLimited)
1544     return;
1545   assert(SchedModel->hasInstrSchedModel() && "required schedmodel");
1546
1547   SchedRemainder *Rem = CriticalZone.Rem;
1548
1549   // If the critical zone is overconsuming a resource relative to the
1550   // remainder, try to reduce it.
1551   unsigned RemainingCritCount =
1552     Rem->RemainingCounts[CriticalZone.CritResIdx];
1553   if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount)
1554       > (int)SchedModel->getLatencyFactor()) {
1555     CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
1556     DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce "
1557           << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
1558           << '\n');
1559   }
1560   // If the other zone is underconsuming a resource relative to the full zone,
1561   // try to increase it.
1562   unsigned OppositeCount =
1563     OppositeZone.ResourceCounts[CriticalZone.CritResIdx];
1564   if ((int)(OppositeZone.ExpectedCount - OppositeCount)
1565       > (int)SchedModel->getLatencyFactor()) {
1566     OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
1567     DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand "
1568           << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
1569           << '\n');
1570   }
1571 }
1572
1573 /// Determine if the scheduled zones exceed resource limits or critical path and
1574 /// set each candidate's ReduceHeight policy accordingly.
1575 void ConvergingScheduler::checkResourceLimits(
1576   ConvergingScheduler::SchedCandidate &TopCand,
1577   ConvergingScheduler::SchedCandidate &BotCand) {
1578
1579   // Set ReduceLatency to true if needed.
1580   Bot.setLatencyPolicy(BotCand.Policy);
1581   Top.setLatencyPolicy(TopCand.Policy);
1582
1583   // Handle resource-limited regions.
1584   if (Top.IsResourceLimited && Bot.IsResourceLimited
1585       && Top.CritResIdx == Bot.CritResIdx) {
1586     // If the scheduled critical resource in both zones is no longer the
1587     // critical remaining resource, attempt to reduce resource height both ways.
1588     if (Top.CritResIdx != Rem.CritResIdx) {
1589       TopCand.Policy.ReduceResIdx = Top.CritResIdx;
1590       BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
1591       DEBUG(dbgs() << "Reduce scheduled "
1592             << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
1593     }
1594     return;
1595   }
1596   // Handle latency-limited regions.
1597   if (!Top.IsResourceLimited && !Bot.IsResourceLimited) {
1598     // If the total scheduled expected latency exceeds the region's critical
1599     // path then reduce latency both ways.
1600     //
1601     // Just because a zone is not resource limited does not mean it is latency
1602     // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle
1603     // to exceed expected latency.
1604     if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath)
1605         && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
1606       TopCand.Policy.ReduceLatency = true;
1607       BotCand.Policy.ReduceLatency = true;
1608       DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency
1609             << " + " << Bot.ExpectedLatency << '\n');
1610     }
1611     return;
1612   }
1613   // The critical resource is different in each zone, so request balancing.
1614
1615   // Compute the cost of each zone.
1616   Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle);
1617   Top.ExpectedCount = std::max(
1618     Top.getCriticalCount(),
1619     Top.ExpectedCount * SchedModel->getLatencyFactor());
1620   Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle);
1621   Bot.ExpectedCount = std::max(
1622     Bot.getCriticalCount(),
1623     Bot.ExpectedCount * SchedModel->getLatencyFactor());
1624
1625   balanceZones(Top, TopCand, Bot, BotCand);
1626   balanceZones(Bot, BotCand, Top, TopCand);
1627 }
1628
1629 void ConvergingScheduler::SchedCandidate::
1630 initResourceDelta(const ScheduleDAGMI *DAG,
1631                   const TargetSchedModel *SchedModel) {
1632   if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
1633     return;
1634
1635   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1636   for (TargetSchedModel::ProcResIter
1637          PI = SchedModel->getWriteProcResBegin(SC),
1638          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1639     if (PI->ProcResourceIdx == Policy.ReduceResIdx)
1640       ResDelta.CritResources += PI->Cycles;
1641     if (PI->ProcResourceIdx == Policy.DemandResIdx)
1642       ResDelta.DemandedResources += PI->Cycles;
1643   }
1644 }
1645
1646 /// Return true if this heuristic determines order.
1647 static bool tryLess(unsigned TryVal, unsigned CandVal,
1648                     ConvergingScheduler::SchedCandidate &TryCand,
1649                     ConvergingScheduler::SchedCandidate &Cand,
1650                     ConvergingScheduler::CandReason Reason) {
1651   if (TryVal < CandVal) {
1652     TryCand.Reason = Reason;
1653     return true;
1654   }
1655   if (TryVal > CandVal) {
1656     if (Cand.Reason > Reason)
1657       Cand.Reason = Reason;
1658     return true;
1659   }
1660   return false;
1661 }
1662
1663 static bool tryGreater(unsigned TryVal, unsigned CandVal,
1664                        ConvergingScheduler::SchedCandidate &TryCand,
1665                        ConvergingScheduler::SchedCandidate &Cand,
1666                        ConvergingScheduler::CandReason Reason) {
1667   if (TryVal > CandVal) {
1668     TryCand.Reason = Reason;
1669     return true;
1670   }
1671   if (TryVal < CandVal) {
1672     if (Cand.Reason > Reason)
1673       Cand.Reason = Reason;
1674     return true;
1675   }
1676   return false;
1677 }
1678
1679 static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
1680   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
1681 }
1682
1683 /// Apply a set of heursitics to a new candidate. Heuristics are currently
1684 /// hierarchical. This may be more efficient than a graduated cost model because
1685 /// we don't need to evaluate all aspects of the model for each node in the
1686 /// queue. But it's really done to make the heuristics easier to debug and
1687 /// statistically analyze.
1688 ///
1689 /// \param Cand provides the policy and current best candidate.
1690 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
1691 /// \param Zone describes the scheduled zone that we are extending.
1692 /// \param RPTracker describes reg pressure within the scheduled zone.
1693 /// \param TempTracker is a scratch pressure tracker to reuse in queries.
1694 void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
1695                                        SchedCandidate &TryCand,
1696                                        SchedBoundary &Zone,
1697                                        const RegPressureTracker &RPTracker,
1698                                        RegPressureTracker &TempTracker) {
1699
1700   // Always initialize TryCand's RPDelta.
1701   TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta,
1702                                   DAG->getRegionCriticalPSets(),
1703                                   DAG->getRegPressure().MaxSetPressure);
1704
1705   // Initialize the candidate if needed.
1706   if (!Cand.isValid()) {
1707     TryCand.Reason = NodeOrder;
1708     return;
1709   }
1710   // Avoid exceeding the target's limit.
1711   if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
1712               Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
1713     return;
1714   if (Cand.Reason == SingleExcess)
1715     Cand.Reason = MultiPressure;
1716
1717   // Avoid increasing the max critical pressure in the scheduled region.
1718   if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease,
1719               Cand.RPDelta.CriticalMax.UnitIncrease,
1720               TryCand, Cand, SingleCritical))
1721     return;
1722   if (Cand.Reason == SingleCritical)
1723     Cand.Reason = MultiPressure;
1724
1725   // Keep clustered nodes together to encourage downstream peephole
1726   // optimizations which may reduce resource requirements.
1727   //
1728   // This is a best effort to set things up for a post-RA pass. Optimizations
1729   // like generating loads of multiple registers should ideally be done within
1730   // the scheduler pass by combining the loads during DAG postprocessing.
1731   const SUnit *NextClusterSU =
1732     Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
1733   if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
1734                  TryCand, Cand, Cluster))
1735     return;
1736   // Currently, weak edges are for clustering, so we hard-code that reason.
1737   // However, deferring the current TryCand will not change Cand's reason.
1738   CandReason OrigReason = Cand.Reason;
1739   if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
1740               getWeakLeft(Cand.SU, Zone.isTop()),
1741               TryCand, Cand, Cluster)) {
1742     Cand.Reason = OrigReason;
1743     return;
1744   }
1745   // Avoid critical resource consumption and balance the schedule.
1746   TryCand.initResourceDelta(DAG, SchedModel);
1747   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
1748               TryCand, Cand, ResourceReduce))
1749     return;
1750   if (tryGreater(TryCand.ResDelta.DemandedResources,
1751                  Cand.ResDelta.DemandedResources,
1752                  TryCand, Cand, ResourceDemand))
1753     return;
1754
1755   // Avoid serializing long latency dependence chains.
1756   if (Cand.Policy.ReduceLatency) {
1757     if (Zone.isTop()) {
1758       if (Cand.SU->getDepth() * SchedModel->getLatencyFactor()
1759           > Zone.ExpectedCount) {
1760         if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
1761                     TryCand, Cand, TopDepthReduce))
1762           return;
1763       }
1764       if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
1765                      TryCand, Cand, TopPathReduce))
1766         return;
1767     }
1768     else {
1769       if (Cand.SU->getHeight() * SchedModel->getLatencyFactor()
1770           > Zone.ExpectedCount) {
1771         if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
1772                     TryCand, Cand, BotHeightReduce))
1773           return;
1774       }
1775       if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
1776                      TryCand, Cand, BotPathReduce))
1777         return;
1778     }
1779   }
1780
1781   // Avoid increasing the max pressure of the entire region.
1782   if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease,
1783               Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax))
1784     return;
1785   if (Cand.Reason == SingleMax)
1786     Cand.Reason = MultiPressure;
1787
1788   // Prefer immediate defs/users of the last scheduled instruction. This is a
1789   // nice pressure avoidance strategy that also conserves the processor's
1790   // register renaming resources and keeps the machine code readable.
1791   if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
1792                  TryCand, Cand, NextDefUse))
1793     return;
1794
1795   // Fall through to original instruction order.
1796   if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
1797       || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
1798     TryCand.Reason = NodeOrder;
1799   }
1800 }
1801
1802 /// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is
1803 /// more desirable than RHS from scheduling standpoint.
1804 static bool compareRPDelta(const RegPressureDelta &LHS,
1805                            const RegPressureDelta &RHS) {
1806   // Compare each component of pressure in decreasing order of importance
1807   // without checking if any are valid. Invalid PressureElements are assumed to
1808   // have UnitIncrease==0, so are neutral.
1809
1810   // Avoid increasing the max critical pressure in the scheduled region.
1811   if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) {
1812     DEBUG(dbgs() << "RP excess top - bot: "
1813           << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');
1814     return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
1815   }
1816   // Avoid increasing the max critical pressure in the scheduled region.
1817   if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) {
1818     DEBUG(dbgs() << "RP critical top - bot: "
1819           << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)
1820           << '\n');
1821     return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
1822   }
1823   // Avoid increasing the max pressure of the entire region.
1824   if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) {
1825     DEBUG(dbgs() << "RP current top - bot: "
1826           << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)
1827           << '\n');
1828     return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
1829   }
1830   return false;
1831 }
1832
1833 #ifndef NDEBUG
1834 const char *ConvergingScheduler::getReasonStr(
1835   ConvergingScheduler::CandReason Reason) {
1836   switch (Reason) {
1837   case NoCand:         return "NOCAND    ";
1838   case SingleExcess:   return "REG-EXCESS";
1839   case SingleCritical: return "REG-CRIT  ";
1840   case Cluster:        return "CLUSTER   ";
1841   case SingleMax:      return "REG-MAX   ";
1842   case MultiPressure:  return "REG-MULTI ";
1843   case ResourceReduce: return "RES-REDUCE";
1844   case ResourceDemand: return "RES-DEMAND";
1845   case TopDepthReduce: return "TOP-DEPTH ";
1846   case TopPathReduce:  return "TOP-PATH  ";
1847   case BotHeightReduce:return "BOT-HEIGHT";
1848   case BotPathReduce:  return "BOT-PATH  ";
1849   case NextDefUse:     return "DEF-USE   ";
1850   case NodeOrder:      return "ORDER     ";
1851   };
1852   llvm_unreachable("Unknown reason!");
1853 }
1854
1855 void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand,
1856                                          const SchedBoundary &Zone) {
1857   const char *Label = getReasonStr(Cand.Reason);
1858   PressureElement P;
1859   unsigned ResIdx = 0;
1860   unsigned Latency = 0;
1861   switch (Cand.Reason) {
1862   default:
1863     break;
1864   case SingleExcess:
1865     P = Cand.RPDelta.Excess;
1866     break;
1867   case SingleCritical:
1868     P = Cand.RPDelta.CriticalMax;
1869     break;
1870   case SingleMax:
1871     P = Cand.RPDelta.CurrentMax;
1872     break;
1873   case ResourceReduce:
1874     ResIdx = Cand.Policy.ReduceResIdx;
1875     break;
1876   case ResourceDemand:
1877     ResIdx = Cand.Policy.DemandResIdx;
1878     break;
1879   case TopDepthReduce:
1880     Latency = Cand.SU->getDepth();
1881     break;
1882   case TopPathReduce:
1883     Latency = Cand.SU->getHeight();
1884     break;
1885   case BotHeightReduce:
1886     Latency = Cand.SU->getHeight();
1887     break;
1888   case BotPathReduce:
1889     Latency = Cand.SU->getDepth();
1890     break;
1891   }
1892   dbgs() << Label << " " << Zone.Available.getName() << " ";
1893   if (P.isValid())
1894     dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
1895            << " ";
1896   else
1897     dbgs() << "     ";
1898   if (ResIdx)
1899     dbgs() << SchedModel->getProcResource(ResIdx)->Name << " ";
1900   else
1901     dbgs() << "        ";
1902   if (Latency)
1903     dbgs() << Latency << " cycles ";
1904   else
1905     dbgs() << "         ";
1906   Cand.SU->dump(DAG);
1907 }
1908 #endif
1909
1910 /// Pick the best candidate from the top queue.
1911 ///
1912 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
1913 /// DAG building. To adjust for the current scheduling location we need to
1914 /// maintain the number of vreg uses remaining to be top-scheduled.
1915 void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
1916                                             const RegPressureTracker &RPTracker,
1917                                             SchedCandidate &Cand) {
1918   ReadyQueue &Q = Zone.Available;
1919
1920   DEBUG(Q.dump());
1921
1922   // getMaxPressureDelta temporarily modifies the tracker.
1923   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
1924
1925   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
1926
1927     SchedCandidate TryCand(Cand.Policy);
1928     TryCand.SU = *I;
1929     tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
1930     if (TryCand.Reason != NoCand) {
1931       // Initialize resource delta if needed in case future heuristics query it.
1932       if (TryCand.ResDelta == SchedResourceDelta())
1933         TryCand.initResourceDelta(DAG, SchedModel);
1934       Cand.setBest(TryCand);
1935       DEBUG(traceCandidate(Cand, Zone));
1936     }
1937   }
1938 }
1939
1940 static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
1941                       bool IsTop) {
1942   DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot")
1943         << " SU(" << Cand.SU->NodeNum << ") "
1944         << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
1945 }
1946
1947 /// Pick the best candidate node from either the top or bottom queue.
1948 SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
1949   // Schedule as far as possible in the direction of no choice. This is most
1950   // efficient, but also provides the best heuristics for CriticalPSets.
1951   if (SUnit *SU = Bot.pickOnlyChoice()) {
1952     IsTopNode = false;
1953     return SU;
1954   }
1955   if (SUnit *SU = Top.pickOnlyChoice()) {
1956     IsTopNode = true;
1957     return SU;
1958   }
1959   CandPolicy NoPolicy;
1960   SchedCandidate BotCand(NoPolicy);
1961   SchedCandidate TopCand(NoPolicy);
1962   checkResourceLimits(TopCand, BotCand);
1963
1964   // Prefer bottom scheduling when heuristics are silent.
1965   pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
1966   assert(BotCand.Reason != NoCand && "failed to find the first candidate");
1967
1968   // If either Q has a single candidate that provides the least increase in
1969   // Excess pressure, we can immediately schedule from that Q.
1970   //
1971   // RegionCriticalPSets summarizes the pressure within the scheduled region and
1972   // affects picking from either Q. If scheduling in one direction must
1973   // increase pressure for one of the excess PSets, then schedule in that
1974   // direction first to provide more freedom in the other direction.
1975   if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) {
1976     IsTopNode = false;
1977     tracePick(BotCand, IsTopNode);
1978     return BotCand.SU;
1979   }
1980   // Check if the top Q has a better candidate.
1981   pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
1982   assert(TopCand.Reason != NoCand && "failed to find the first candidate");
1983
1984   // If either Q has a single candidate that minimizes pressure above the
1985   // original region's pressure pick it.
1986   if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) {
1987     if (TopCand.Reason < BotCand.Reason) {
1988       IsTopNode = true;
1989       tracePick(TopCand, IsTopNode);
1990       return TopCand.SU;
1991     }
1992     IsTopNode = false;
1993     tracePick(BotCand, IsTopNode);
1994     return BotCand.SU;
1995   }
1996   // Check for a salient pressure difference and pick the best from either side.
1997   if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {
1998     IsTopNode = true;
1999     tracePick(TopCand, IsTopNode);
2000     return TopCand.SU;
2001   }
2002   // Otherwise prefer the bottom candidate, in node order if all else failed.
2003   if (TopCand.Reason < BotCand.Reason) {
2004     IsTopNode = true;
2005     tracePick(TopCand, IsTopNode);
2006     return TopCand.SU;
2007   }
2008   IsTopNode = false;
2009   tracePick(BotCand, IsTopNode);
2010   return BotCand.SU;
2011 }
2012
2013 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
2014 SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
2015   if (DAG->top() == DAG->bottom()) {
2016     assert(Top.Available.empty() && Top.Pending.empty() &&
2017            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
2018     return NULL;
2019   }
2020   SUnit *SU;
2021   do {
2022     if (ForceTopDown) {
2023       SU = Top.pickOnlyChoice();
2024       if (!SU) {
2025         CandPolicy NoPolicy;
2026         SchedCandidate TopCand(NoPolicy);
2027         pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
2028         assert(TopCand.Reason != NoCand && "failed to find the first candidate");
2029         SU = TopCand.SU;
2030       }
2031       IsTopNode = true;
2032     }
2033     else if (ForceBottomUp) {
2034       SU = Bot.pickOnlyChoice();
2035       if (!SU) {
2036         CandPolicy NoPolicy;
2037         SchedCandidate BotCand(NoPolicy);
2038         pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2039         assert(BotCand.Reason != NoCand && "failed to find the first candidate");
2040         SU = BotCand.SU;
2041       }
2042       IsTopNode = false;
2043     }
2044     else {
2045       SU = pickNodeBidirectional(IsTopNode);
2046     }
2047   } while (SU->isScheduled);
2048
2049   if (SU->isTopReady())
2050     Top.removeReady(SU);
2051   if (SU->isBottomReady())
2052     Bot.removeReady(SU);
2053
2054   DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
2055         << " Scheduling Instruction in cycle "
2056         << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
2057         SU->dump(DAG));
2058   return SU;
2059 }
2060
2061 /// Update the scheduler's state after scheduling a node. This is the same node
2062 /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
2063 /// it's state based on the current cycle before MachineSchedStrategy does.
2064 void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
2065   if (IsTopNode) {
2066     SU->TopReadyCycle = Top.CurrCycle;
2067     Top.bumpNode(SU);
2068   }
2069   else {
2070     SU->BotReadyCycle = Bot.CurrCycle;
2071     Bot.bumpNode(SU);
2072   }
2073 }
2074
2075 /// Create the standard converging machine scheduler. This will be used as the
2076 /// default scheduler if the target does not set a default.
2077 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
2078   assert((!ForceTopDown || !ForceBottomUp) &&
2079          "-misched-topdown incompatible with -misched-bottomup");
2080   ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());
2081   // Register DAG post-processors.
2082   if (EnableLoadCluster)
2083     DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
2084   if (EnableMacroFusion)
2085     DAG->addMutation(new MacroFusion(DAG->TII));
2086   return DAG;
2087 }
2088 static MachineSchedRegistry
2089 ConvergingSchedRegistry("converge", "Standard converging scheduler.",
2090                         createConvergingSched);
2091
2092 //===----------------------------------------------------------------------===//
2093 // ILP Scheduler. Currently for experimental analysis of heuristics.
2094 //===----------------------------------------------------------------------===//
2095
2096 namespace {
2097 /// \brief Order nodes by the ILP metric.
2098 struct ILPOrder {
2099   const SchedDFSResult *DFSResult;
2100   const BitVector *ScheduledTrees;
2101   bool MaximizeILP;
2102
2103   ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {}
2104
2105   /// \brief Apply a less-than relation on node priority.
2106   ///
2107   /// (Return true if A comes after B in the Q.)
2108   bool operator()(const SUnit *A, const SUnit *B) const {
2109     unsigned SchedTreeA = DFSResult->getSubtreeID(A);
2110     unsigned SchedTreeB = DFSResult->getSubtreeID(B);
2111     if (SchedTreeA != SchedTreeB) {
2112       // Unscheduled trees have lower priority.
2113       if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
2114         return ScheduledTrees->test(SchedTreeB);
2115
2116       // Trees with shallower connections have have lower priority.
2117       if (DFSResult->getSubtreeLevel(SchedTreeA)
2118           != DFSResult->getSubtreeLevel(SchedTreeB)) {
2119         return DFSResult->getSubtreeLevel(SchedTreeA)
2120           < DFSResult->getSubtreeLevel(SchedTreeB);
2121       }
2122     }
2123     if (MaximizeILP)
2124       return DFSResult->getILP(A) < DFSResult->getILP(B);
2125     else
2126       return DFSResult->getILP(A) > DFSResult->getILP(B);
2127   }
2128 };
2129
2130 /// \brief Schedule based on the ILP metric.
2131 class ILPScheduler : public MachineSchedStrategy {
2132   /// In case all subtrees are eventually connected to a common root through
2133   /// data dependence (e.g. reduction), place an upper limit on their size.
2134   ///
2135   /// FIXME: A subtree limit is generally good, but in the situation commented
2136   /// above, where multiple similar subtrees feed a common root, we should
2137   /// only split at a point where the resulting subtrees will be balanced.
2138   /// (a motivating test case must be found).
2139   static const unsigned SubtreeLimit = 16;
2140
2141   ScheduleDAGMI *DAG;
2142   ILPOrder Cmp;
2143
2144   std::vector<SUnit*> ReadyQ;
2145 public:
2146   ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {}
2147
2148   virtual void initialize(ScheduleDAGMI *dag) {
2149     DAG = dag;
2150     DAG->computeDFSResult();
2151     Cmp.DFSResult = DAG->getDFSResult();
2152     Cmp.ScheduledTrees = &DAG->getScheduledTrees();
2153     ReadyQ.clear();
2154   }
2155
2156   virtual void registerRoots() {
2157     // Restore the heap in ReadyQ with the updated DFS results.
2158     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2159   }
2160
2161   /// Implement MachineSchedStrategy interface.
2162   /// -----------------------------------------
2163
2164   /// Callback to select the highest priority node from the ready Q.
2165   virtual SUnit *pickNode(bool &IsTopNode) {
2166     if (ReadyQ.empty()) return NULL;
2167     pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2168     SUnit *SU = ReadyQ.back();
2169     ReadyQ.pop_back();
2170     IsTopNode = false;
2171     DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): "
2172           << *SU->getInstr()
2173           << " ILP: " << DAG->getDFSResult()->getILP(SU)
2174           << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
2175           << DAG->getDFSResult()->getSubtreeLevel(
2176             DAG->getDFSResult()->getSubtreeID(SU)) << '\n');
2177     return SU;
2178   }
2179
2180   /// \brief Scheduler callback to notify that a new subtree is scheduled.
2181   virtual void scheduleTree(unsigned SubtreeID) {
2182     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2183   }
2184
2185   /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
2186   /// DFSResults, and resort the priority Q.
2187   virtual void schedNode(SUnit *SU, bool IsTopNode) {
2188     assert(!IsTopNode && "SchedDFSResult needs bottom-up");
2189   }
2190
2191   virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
2192
2193   virtual void releaseBottomNode(SUnit *SU) {
2194     ReadyQ.push_back(SU);
2195     std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2196   }
2197 };
2198 } // namespace
2199
2200 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
2201   return new ScheduleDAGMI(C, new ILPScheduler(true));
2202 }
2203 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
2204   return new ScheduleDAGMI(C, new ILPScheduler(false));
2205 }
2206 static MachineSchedRegistry ILPMaxRegistry(
2207   "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
2208 static MachineSchedRegistry ILPMinRegistry(
2209   "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
2210
2211 //===----------------------------------------------------------------------===//
2212 // Machine Instruction Shuffler for Correctness Testing
2213 //===----------------------------------------------------------------------===//
2214
2215 #ifndef NDEBUG
2216 namespace {
2217 /// Apply a less-than relation on the node order, which corresponds to the
2218 /// instruction order prior to scheduling. IsReverse implements greater-than.
2219 template<bool IsReverse>
2220 struct SUnitOrder {
2221   bool operator()(SUnit *A, SUnit *B) const {
2222     if (IsReverse)
2223       return A->NodeNum > B->NodeNum;
2224     else
2225       return A->NodeNum < B->NodeNum;
2226   }
2227 };
2228
2229 /// Reorder instructions as much as possible.
2230 class InstructionShuffler : public MachineSchedStrategy {
2231   bool IsAlternating;
2232   bool IsTopDown;
2233
2234   // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
2235   // gives nodes with a higher number higher priority causing the latest
2236   // instructions to be scheduled first.
2237   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
2238     TopQ;
2239   // When scheduling bottom-up, use greater-than as the queue priority.
2240   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
2241     BottomQ;
2242 public:
2243   InstructionShuffler(bool alternate, bool topdown)
2244     : IsAlternating(alternate), IsTopDown(topdown) {}
2245
2246   virtual void initialize(ScheduleDAGMI *) {
2247     TopQ.clear();
2248     BottomQ.clear();
2249   }
2250
2251   /// Implement MachineSchedStrategy interface.
2252   /// -----------------------------------------
2253
2254   virtual SUnit *pickNode(bool &IsTopNode) {
2255     SUnit *SU;
2256     if (IsTopDown) {
2257       do {
2258         if (TopQ.empty()) return NULL;
2259         SU = TopQ.top();
2260         TopQ.pop();
2261       } while (SU->isScheduled);
2262       IsTopNode = true;
2263     }
2264     else {
2265       do {
2266         if (BottomQ.empty()) return NULL;
2267         SU = BottomQ.top();
2268         BottomQ.pop();
2269       } while (SU->isScheduled);
2270       IsTopNode = false;
2271     }
2272     if (IsAlternating)
2273       IsTopDown = !IsTopDown;
2274     return SU;
2275   }
2276
2277   virtual void schedNode(SUnit *SU, bool IsTopNode) {}
2278
2279   virtual void releaseTopNode(SUnit *SU) {
2280     TopQ.push(SU);
2281   }
2282   virtual void releaseBottomNode(SUnit *SU) {
2283     BottomQ.push(SU);
2284   }
2285 };
2286 } // namespace
2287
2288 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
2289   bool Alternate = !ForceTopDown && !ForceBottomUp;
2290   bool TopDown = !ForceBottomUp;
2291   assert((TopDown || !ForceTopDown) &&
2292          "-misched-topdown incompatible with -misched-bottomup");
2293   return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
2294 }
2295 static MachineSchedRegistry ShufflerRegistry(
2296   "shuffle", "Shuffle machine instructions alternating directions",
2297   createInstructionShuffler);
2298 #endif // !NDEBUG
2299
2300 //===----------------------------------------------------------------------===//
2301 // GraphWriter support for ScheduleDAGMI.
2302 //===----------------------------------------------------------------------===//
2303
2304 #ifndef NDEBUG
2305 namespace llvm {
2306
2307 template<> struct GraphTraits<
2308   ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
2309
2310 template<>
2311 struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
2312
2313   DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
2314
2315   static std::string getGraphName(const ScheduleDAG *G) {
2316     return G->MF.getName();
2317   }
2318
2319   static bool renderGraphFromBottomUp() {
2320     return true;
2321   }
2322
2323   static bool isNodeHidden(const SUnit *Node) {
2324     return (Node->NumPreds > 10 || Node->NumSuccs > 10);
2325   }
2326
2327   static bool hasNodeAddressLabel(const SUnit *Node,
2328                                   const ScheduleDAG *Graph) {
2329     return false;
2330   }
2331
2332   /// If you want to override the dot attributes printed for a particular
2333   /// edge, override this method.
2334   static std::string getEdgeAttributes(const SUnit *Node,
2335                                        SUnitIterator EI,
2336                                        const ScheduleDAG *Graph) {
2337     if (EI.isArtificialDep())
2338       return "color=cyan,style=dashed";
2339     if (EI.isCtrlDep())
2340       return "color=blue,style=dashed";
2341     return "";
2342   }
2343
2344   static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
2345     std::string Str;
2346     raw_string_ostream SS(Str);
2347     SS << "SU(" << SU->NodeNum << ')';
2348     return SS.str();
2349   }
2350   static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
2351     return G->getGraphNodeLabel(SU);
2352   }
2353
2354   static std::string getNodeAttributes(const SUnit *N,
2355                                        const ScheduleDAG *Graph) {
2356     std::string Str("shape=Mrecord");
2357     const SchedDFSResult *DFS =
2358       static_cast<const ScheduleDAGMI*>(Graph)->getDFSResult();
2359     if (DFS) {
2360       Str += ",style=filled,fillcolor=\"#";
2361       Str += DOT::getColorString(DFS->getSubtreeID(N));
2362       Str += '"';
2363     }
2364     return Str;
2365   }
2366 };
2367 } // namespace llvm
2368 #endif // NDEBUG
2369
2370 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
2371 /// rendered using 'dot'.
2372 ///
2373 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
2374 #ifndef NDEBUG
2375   ViewGraph(this, Name, false, Title);
2376 #else
2377   errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
2378          << "systems with Graphviz or gv!\n";
2379 #endif  // NDEBUG
2380 }
2381
2382 /// Out-of-line implementation with no arguments is handy for gdb.
2383 void ScheduleDAGMI::viewGraph() {
2384   viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
2385 }