lib/CodeGen/MachineScheduler.cpp

   1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // MachineScheduler schedules machine instructions after phi elimination. It
  11 // preserves LiveIntervals so it can be invoked before register allocation.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #define DEBUG_TYPE "misched"
  16
  17 #include "llvm/CodeGen/MachineScheduler.h"
  18 #include "llvm/ADT/PriorityQueue.h"
  19 #include "llvm/Analysis/AliasAnalysis.h"
  20 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
  21 #include "llvm/CodeGen/MachineDominators.h"
  22 #include "llvm/CodeGen/MachineLoopInfo.h"
  23 #include "llvm/CodeGen/MachineRegisterInfo.h"
  24 #include "llvm/CodeGen/Passes.h"
  25 #include "llvm/CodeGen/RegisterClassInfo.h"
  26 #include "llvm/CodeGen/ScheduleDFS.h"
  27 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
  28 #include "llvm/Support/CommandLine.h"
  29 #include "llvm/Support/Debug.h"
  30 #include "llvm/Support/ErrorHandling.h"
  31 #include "llvm/Support/GraphWriter.h"
  32 #include "llvm/Support/raw_ostream.h"
  33 #include "llvm/Target/TargetInstrInfo.h"
  34 #include <queue>
  35
  36 using namespace llvm;
  37
  38 namespace llvm {
  39 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
  40                            cl::desc("Force top-down list scheduling"));
  41 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
  42                             cl::desc("Force bottom-up list scheduling"));
  43 }
  44
  45 #ifndef NDEBUG
  46 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
  47   cl::desc("Pop up a window to show MISched dags after they are processed"));
  48
  49 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
  50   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
  51
  52 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
  53   cl::desc("Only schedule this function"));
  54 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
  55   cl::desc("Only schedule this MBB#"));
  56 #else
  57 static bool ViewMISchedDAGs = false;
  58 #endif // NDEBUG
  59
  60 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
  61   cl::desc("Enable register pressure scheduling."), cl::init(true));
  62
  63 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
  64   cl::desc("Enable cyclic critical path analysis."), cl::init(true));
  65
  66 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
  67   cl::desc("Enable load clustering."), cl::init(true));
  68
  69 // Experimental heuristics
  70 static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
  71   cl::desc("Enable scheduling for macro fusion."), cl::init(true));
  72
  73 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
  74   cl::desc("Verify machine instrs before and after machine scheduling"));
  75
  76 // DAG subtrees must have at least this many nodes.
  77 static const unsigned MinSubtreeSize = 8;
  78
  79 // Pin the vtables to this file.
  80 void MachineSchedStrategy::anchor() {}
  81 void ScheduleDAGMutation::anchor() {}
  82
  83 //===----------------------------------------------------------------------===//
  84 // Machine Instruction Scheduling Pass and Registry
  85 //===----------------------------------------------------------------------===//
  86
  87 MachineSchedContext::MachineSchedContext():
  88     MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
  89   RegClassInfo = new RegisterClassInfo();
  90 }
  91
  92 MachineSchedContext::~MachineSchedContext() {
  93   delete RegClassInfo;
  94 }
  95
  96 namespace {
  97 /// Base class for a machine scheduler class that can run at any point.
  98 class MachineSchedulerBase : public MachineSchedContext,
  99                              public MachineFunctionPass {
 100 public:
 101   MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
 102
 103   void print(raw_ostream &O, const Module* = 0) const override;
 104
 105 protected:
 106   void scheduleRegions(ScheduleDAGInstrs &Scheduler);
 107 };
 108
 109 /// MachineScheduler runs after coalescing and before register allocation.
 110 class MachineScheduler : public MachineSchedulerBase {
 111 public:
 112   MachineScheduler();
 113
 114   void getAnalysisUsage(AnalysisUsage &AU) const override;
 115
 116   bool runOnMachineFunction(MachineFunction&) override;
 117
 118   static char ID; // Class identification, replacement for typeinfo
 119
 120 protected:
 121   ScheduleDAGInstrs *createMachineScheduler();
 122 };
 123
 124 /// PostMachineScheduler runs after shortly before code emission.
 125 class PostMachineScheduler : public MachineSchedulerBase {
 126 public:
 127   PostMachineScheduler();
 128
 129   void getAnalysisUsage(AnalysisUsage &AU) const override;
 130
 131   bool runOnMachineFunction(MachineFunction&) override;
 132
 133   static char ID; // Class identification, replacement for typeinfo
 134
 135 protected:
 136   ScheduleDAGInstrs *createPostMachineScheduler();
 137 };
 138 } // namespace
 139
 140 char MachineScheduler::ID = 0;
 141
 142 char &llvm::MachineSchedulerID = MachineScheduler::ID;
 143
 144 INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
 145                       "Machine Instruction Scheduler", false, false)
 146 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 147 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
 148 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 149 INITIALIZE_PASS_END(MachineScheduler, "misched",
 150                     "Machine Instruction Scheduler", false, false)
 151
 152 MachineScheduler::MachineScheduler()
 153 : MachineSchedulerBase(ID) {
 154   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
 155 }
 156
 157 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
 158   AU.setPreservesCFG();
 159   AU.addRequiredID(MachineDominatorsID);
 160   AU.addRequired<MachineLoopInfo>();
 161   AU.addRequired<AliasAnalysis>();
 162   AU.addRequired<TargetPassConfig>();
 163   AU.addRequired<SlotIndexes>();
 164   AU.addPreserved<SlotIndexes>();
 165   AU.addRequired<LiveIntervals>();
 166   AU.addPreserved<LiveIntervals>();
 167   MachineFunctionPass::getAnalysisUsage(AU);
 168 }
 169
 170 char PostMachineScheduler::ID = 0;
 171
 172 char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
 173
 174 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
 175                 "PostRA Machine Instruction Scheduler", false, false)
 176
 177 PostMachineScheduler::PostMachineScheduler()
 178 : MachineSchedulerBase(ID) {
 179   initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
 180 }
 181
 182 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
 183   AU.setPreservesCFG();
 184   AU.addRequiredID(MachineDominatorsID);
 185   AU.addRequired<MachineLoopInfo>();
 186   AU.addRequired<TargetPassConfig>();
 187   MachineFunctionPass::getAnalysisUsage(AU);
 188 }
 189
 190 MachinePassRegistry MachineSchedRegistry::Registry;
 191
 192 /// A dummy default scheduler factory indicates whether the scheduler
 193 /// is overridden on the command line.
 194 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
 195   return 0;
 196 }
 197
 198 /// MachineSchedOpt allows command line selection of the scheduler.
 199 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
 200                RegisterPassParser<MachineSchedRegistry> >
 201 MachineSchedOpt("misched",
 202                 cl::init(&useDefaultMachineSched), cl::Hidden,
 203                 cl::desc("Machine instruction scheduler to use"));
 204
 205 static MachineSchedRegistry
 206 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
 207                      useDefaultMachineSched);
 208
 209 /// Forward declare the standard machine scheduler. This will be used as the
 210 /// default scheduler if the target does not set a default.
 211 static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C);
 212 static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C);
 213
 214 /// Decrement this iterator until reaching the top or a non-debug instr.
 215 static MachineBasicBlock::const_iterator
 216 priorNonDebug(MachineBasicBlock::const_iterator I,
 217               MachineBasicBlock::const_iterator Beg) {
 218   assert(I != Beg && "reached the top of the region, cannot decrement");
 219   while (--I != Beg) {
 220     if (!I->isDebugValue())
 221       break;
 222   }
 223   return I;
 224 }
 225
 226 /// Non-const version.
 227 static MachineBasicBlock::iterator
 228 priorNonDebug(MachineBasicBlock::iterator I,
 229               MachineBasicBlock::const_iterator Beg) {
 230   return const_cast<MachineInstr*>(
 231     &*priorNonDebug(MachineBasicBlock::const_iterator(I), Beg));
 232 }
 233
 234 /// If this iterator is a debug value, increment until reaching the End or a
 235 /// non-debug instruction.
 236 static MachineBasicBlock::const_iterator
 237 nextIfDebug(MachineBasicBlock::const_iterator I,
 238             MachineBasicBlock::const_iterator End) {
 239   for(; I != End; ++I) {
 240     if (!I->isDebugValue())
 241       break;
 242   }
 243   return I;
 244 }
 245
 246 /// Non-const version.
 247 static MachineBasicBlock::iterator
 248 nextIfDebug(MachineBasicBlock::iterator I,
 249             MachineBasicBlock::const_iterator End) {
 250   // Cast the return value to nonconst MachineInstr, then cast to an
 251   // instr_iterator, which does not check for null, finally return a
 252   // bundle_iterator.
 253   return MachineBasicBlock::instr_iterator(
 254     const_cast<MachineInstr*>(
 255       &*nextIfDebug(MachineBasicBlock::const_iterator(I), End)));
 256 }
 257
 258 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
 259 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
 260   // Select the scheduler, or set the default.
 261   MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
 262   if (Ctor != useDefaultMachineSched)
 263     return Ctor(this);
 264
 265   // Get the default scheduler set by the target for this function.
 266   ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
 267   if (Scheduler)
 268     return Scheduler;
 269
 270   // Default to GenericScheduler.
 271   return createGenericSchedLive(this);
 272 }
 273
 274 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
 275 /// the caller. We don't have a command line option to override the postRA
 276 /// scheduler. The Target must configure it.
 277 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
 278   // Get the postRA scheduler set by the target for this function.
 279   ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
 280   if (Scheduler)
 281     return Scheduler;
 282
 283   // Default to GenericScheduler.
 284   return createGenericSchedPostRA(this);
 285 }
 286
 287 /// Top-level MachineScheduler pass driver.
 288 ///
 289 /// Visit blocks in function order. Divide each block into scheduling regions
 290 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
 291 /// consistent with the DAG builder, which traverses the interior of the
 292 /// scheduling regions bottom-up.
 293 ///
 294 /// This design avoids exposing scheduling boundaries to the DAG builder,
 295 /// simplifying the DAG builder's support for "special" target instructions.
 296 /// At the same time the design allows target schedulers to operate across
 297 /// scheduling boundaries, for example to bundle the boudary instructions
 298 /// without reordering them. This creates complexity, because the target
 299 /// scheduler must update the RegionBegin and RegionEnd positions cached by
 300 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
 301 /// design would be to split blocks at scheduling boundaries, but LLVM has a
 302 /// general bias against block splitting purely for implementation simplicity.
 303 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
 304   DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
 305
 306   // Initialize the context of the pass.
 307   MF = &mf;
 308   MLI = &getAnalysis<MachineLoopInfo>();
 309   MDT = &getAnalysis<MachineDominatorTree>();
 310   PassConfig = &getAnalysis<TargetPassConfig>();
 311   AA = &getAnalysis<AliasAnalysis>();
 312
 313   LIS = &getAnalysis<LiveIntervals>();
 314
 315   if (VerifyScheduling) {
 316     DEBUG(LIS->dump());
 317     MF->verify(this, "Before machine scheduling.");
 318   }
 319   RegClassInfo->runOnMachineFunction(*MF);
 320
 321   // Instantiate the selected scheduler for this target, function, and
 322   // optimization level.
 323   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
 324   scheduleRegions(*Scheduler);
 325
 326   DEBUG(LIS->dump());
 327   if (VerifyScheduling)
 328     MF->verify(this, "After machine scheduling.");
 329   return true;
 330 }
 331
 332 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
 333   if (skipOptnoneFunction(*mf.getFunction()))
 334     return false;
 335
 336   DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
 337
 338   // Initialize the context of the pass.
 339   MF = &mf;
 340   PassConfig = &getAnalysis<TargetPassConfig>();
 341
 342   if (VerifyScheduling)
 343     MF->verify(this, "Before post machine scheduling.");
 344
 345   // Instantiate the selected scheduler for this target, function, and
 346   // optimization level.
 347   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
 348   scheduleRegions(*Scheduler);
 349
 350   if (VerifyScheduling)
 351     MF->verify(this, "After post machine scheduling.");
 352   return true;
 353 }
 354
 355 /// Return true of the given instruction should not be included in a scheduling
 356 /// region.
 357 ///
 358 /// MachineScheduler does not currently support scheduling across calls. To
 359 /// handle calls, the DAG builder needs to be modified to create register
 360 /// anti/output dependencies on the registers clobbered by the call's regmask
 361 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
 362 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
 363 /// the boundary, but there would be no benefit to postRA scheduling across
 364 /// calls this late anyway.
 365 static bool isSchedBoundary(MachineBasicBlock::iterator MI,
 366                             MachineBasicBlock *MBB,
 367                             MachineFunction *MF,
 368                             const TargetInstrInfo *TII,
 369                             bool IsPostRA) {
 370   return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF);
 371 }
 372
 373 /// Main driver for both MachineScheduler and PostMachineScheduler.
 374 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
 375   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
 376   bool IsPostRA = Scheduler.isPostRA();
 377
 378   // Visit all machine basic blocks.
 379   //
 380   // TODO: Visit blocks in global postorder or postorder within the bottom-up
 381   // loop tree. Then we can optionally compute global RegPressure.
 382   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
 383        MBB != MBBEnd; ++MBB) {
 384
 385     Scheduler.startBlock(MBB);
 386
 387 #ifndef NDEBUG
 388     if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
 389       continue;
 390     if (SchedOnlyBlock.getNumOccurrences()
 391         && (int)SchedOnlyBlock != MBB->getNumber())
 392       continue;
 393 #endif
 394
 395     // Break the block into scheduling regions [I, RegionEnd), and schedule each
 396     // region as soon as it is discovered. RegionEnd points the scheduling
 397     // boundary at the bottom of the region. The DAG does not include RegionEnd,
 398     // but the region does (i.e. the next RegionEnd is above the previous
 399     // RegionBegin). If the current block has no terminator then RegionEnd ==
 400     // MBB->end() for the bottom region.
 401     //
 402     // The Scheduler may insert instructions during either schedule() or
 403     // exitRegion(), even for empty regions. So the local iterators 'I' and
 404     // 'RegionEnd' are invalid across these calls.
 405     //
 406     // MBB::size() uses instr_iterator to count. Here we need a bundle to count
 407     // as a single instruction.
 408     unsigned RemainingInstrs = std::distance(MBB->begin(), MBB->end());
 409     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
 410         RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) {
 411
 412       // Avoid decrementing RegionEnd for blocks with no terminator.
 413       if (RegionEnd != MBB->end() ||
 414           isSchedBoundary(std::prev(RegionEnd), MBB, MF, TII, IsPostRA)) {
 415         --RegionEnd;
 416         // Count the boundary instruction.
 417         --RemainingInstrs;
 418       }
 419
 420       // The next region starts above the previous region. Look backward in the
 421       // instruction stream until we find the nearest boundary.
 422       unsigned NumRegionInstrs = 0;
 423       MachineBasicBlock::iterator I = RegionEnd;
 424       for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) {
 425         if (isSchedBoundary(std::prev(I), MBB, MF, TII, IsPostRA))
 426           break;
 427       }
 428       // Notify the scheduler of the region, even if we may skip scheduling
 429       // it. Perhaps it still needs to be bundled.
 430       Scheduler.enterRegion(MBB, I, RegionEnd, NumRegionInstrs);
 431
 432       // Skip empty scheduling regions (0 or 1 schedulable instructions).
 433       if (I == RegionEnd || I == std::prev(RegionEnd)) {
 434         // Close the current region. Bundle the terminator if needed.
 435         // This invalidates 'RegionEnd' and 'I'.
 436         Scheduler.exitRegion();
 437         continue;
 438       }
 439       DEBUG(dbgs() << "********** " << ((Scheduler.isPostRA()) ? "PostRA " : "")
 440             << "MI Scheduling **********\n");
 441       DEBUG(dbgs() << MF->getName()
 442             << ":BB#" << MBB->getNumber() << " " << MBB->getName()
 443             << "\n  From: " << *I << "    To: ";
 444             if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
 445             else dbgs() << "End";
 446             dbgs() << " RegionInstrs: " << NumRegionInstrs
 447             << " Remaining: " << RemainingInstrs << "\n");
 448
 449       // Schedule a region: possibly reorder instructions.
 450       // This invalidates 'RegionEnd' and 'I'.
 451       Scheduler.schedule();
 452
 453       // Close the current region.
 454       Scheduler.exitRegion();
 455
 456       // Scheduling has invalidated the current iterator 'I'. Ask the
 457       // scheduler for the top of it's scheduled region.
 458       RegionEnd = Scheduler.begin();
 459     }
 460     assert(RemainingInstrs == 0 && "Instruction count mismatch!");
 461     Scheduler.finishBlock();
 462     if (Scheduler.isPostRA()) {
 463       // FIXME: Ideally, no further passes should rely on kill flags. However,
 464       // thumb2 size reduction is currently an exception.
 465       Scheduler.fixupKills(MBB);
 466     }
 467   }
 468   Scheduler.finalizeSchedule();
 469 }
 470
 471 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
 472   // unimplemented
 473 }
 474
 475 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 476 void ReadyQueue::dump() {
 477   dbgs() << Name << ": ";
 478   for (unsigned i = 0, e = Queue.size(); i < e; ++i)
 479     dbgs() << Queue[i]->NodeNum << " ";
 480   dbgs() << "\n";
 481 }
 482 #endif
 483
 484 //===----------------------------------------------------------------------===//
 485 // ScheduleDAGMI - Basic machine instruction scheduling. This is
 486 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
 487 // virtual registers.
 488 // ===----------------------------------------------------------------------===/
 489
 490 ScheduleDAGMI::~ScheduleDAGMI() {
 491   DeleteContainerPointers(Mutations);
 492   delete SchedImpl;
 493 }
 494
 495 bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
 496   return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
 497 }
 498
 499 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
 500   if (SuccSU != &ExitSU) {
 501     // Do not use WillCreateCycle, it assumes SD scheduling.
 502     // If Pred is reachable from Succ, then the edge creates a cycle.
 503     if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
 504       return false;
 505     Topo.AddPred(SuccSU, PredDep.getSUnit());
 506   }
 507   SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
 508   // Return true regardless of whether a new edge needed to be inserted.
 509   return true;
 510 }
 511
 512 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
 513 /// NumPredsLeft reaches zero, release the successor node.
 514 ///
 515 /// FIXME: Adjust SuccSU height based on MinLatency.
 516 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
 517   SUnit *SuccSU = SuccEdge->getSUnit();
 518
 519   if (SuccEdge->isWeak()) {
 520     --SuccSU->WeakPredsLeft;
 521     if (SuccEdge->isCluster())
 522       NextClusterSucc = SuccSU;
 523     return;
 524   }
 525 #ifndef NDEBUG
 526   if (SuccSU->NumPredsLeft == 0) {
 527     dbgs() << "*** Scheduling failed! ***\n";
 528     SuccSU->dump(this);
 529     dbgs() << " has been released too many times!\n";
 530     llvm_unreachable(0);
 531   }
 532 #endif
 533   --SuccSU->NumPredsLeft;
 534   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
 535     SchedImpl->releaseTopNode(SuccSU);
 536 }
 537
 538 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
 539 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
 540   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
 541        I != E; ++I) {
 542     releaseSucc(SU, &*I);
 543   }
 544 }
 545
 546 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
 547 /// NumSuccsLeft reaches zero, release the predecessor node.
 548 ///
 549 /// FIXME: Adjust PredSU height based on MinLatency.
 550 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
 551   SUnit *PredSU = PredEdge->getSUnit();
 552
 553   if (PredEdge->isWeak()) {
 554     --PredSU->WeakSuccsLeft;
 555     if (PredEdge->isCluster())
 556       NextClusterPred = PredSU;
 557     return;
 558   }
 559 #ifndef NDEBUG
 560   if (PredSU->NumSuccsLeft == 0) {
 561     dbgs() << "*** Scheduling failed! ***\n";
 562     PredSU->dump(this);
 563     dbgs() << " has been released too many times!\n";
 564     llvm_unreachable(0);
 565   }
 566 #endif
 567   --PredSU->NumSuccsLeft;
 568   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
 569     SchedImpl->releaseBottomNode(PredSU);
 570 }
 571
 572 /// releasePredecessors - Call releasePred on each of SU's predecessors.
 573 void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
 574   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
 575        I != E; ++I) {
 576     releasePred(SU, &*I);
 577   }
 578 }
 579
 580 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
 581 /// crossing a scheduling boundary. [begin, end) includes all instructions in
 582 /// the region, including the boundary itself and single-instruction regions
 583 /// that don't get scheduled.
 584 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
 585                                      MachineBasicBlock::iterator begin,
 586                                      MachineBasicBlock::iterator end,
 587                                      unsigned regioninstrs)
 588 {
 589   ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
 590
 591   SchedImpl->initPolicy(begin, end, regioninstrs);
 592 }
 593
 594 /// This is normally called from the main scheduler loop but may also be invoked
 595 /// by the scheduling strategy to perform additional code motion.
 596 void ScheduleDAGMI::moveInstruction(
 597   MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
 598   // Advance RegionBegin if the first instruction moves down.
 599   if (&*RegionBegin == MI)
 600     ++RegionBegin;
 601
 602   // Update the instruction stream.
 603   BB->splice(InsertPos, BB, MI);
 604
 605   // Update LiveIntervals
 606   if (LIS)
 607     LIS->handleMove(MI, /*UpdateFlags=*/true);
 608
 609   // Recede RegionBegin if an instruction moves above the first.
 610   if (RegionBegin == InsertPos)
 611     RegionBegin = MI;
 612 }
 613
 614 bool ScheduleDAGMI::checkSchedLimit() {
 615 #ifndef NDEBUG
 616   if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
 617     CurrentTop = CurrentBottom;
 618     return false;
 619   }
 620   ++NumInstrsScheduled;
 621 #endif
 622   return true;
 623 }
 624
 625 /// Per-region scheduling driver, called back from
 626 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
 627 /// does not consider liveness or register pressure. It is useful for PostRA
 628 /// scheduling and potentially other custom schedulers.
 629 void ScheduleDAGMI::schedule() {
 630   // Build the DAG.
 631   buildSchedGraph(AA);
 632
 633   Topo.InitDAGTopologicalSorting();
 634
 635   postprocessDAG();
 636
 637   SmallVector<SUnit*, 8> TopRoots, BotRoots;
 638   findRootsAndBiasEdges(TopRoots, BotRoots);
 639
 640   // Initialize the strategy before modifying the DAG.
 641   // This may initialize a DFSResult to be used for queue priority.
 642   SchedImpl->initialize(this);
 643
 644   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
 645           SUnits[su].dumpAll(this));
 646   if (ViewMISchedDAGs) viewGraph();
 647
 648   // Initialize ready queues now that the DAG and priority data are finalized.
 649   initQueues(TopRoots, BotRoots);
 650
 651   bool IsTopNode = false;
 652   while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
 653     assert(!SU->isScheduled && "Node already scheduled");
 654     if (!checkSchedLimit())
 655       break;
 656
 657     MachineInstr *MI = SU->getInstr();
 658     if (IsTopNode) {
 659       assert(SU->isTopReady() && "node still has unscheduled dependencies");
 660       if (&*CurrentTop == MI)
 661         CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
 662       else
 663         moveInstruction(MI, CurrentTop);
 664     }
 665     else {
 666       assert(SU->isBottomReady() && "node still has unscheduled dependencies");
 667       MachineBasicBlock::iterator priorII =
 668         priorNonDebug(CurrentBottom, CurrentTop);
 669       if (&*priorII == MI)
 670         CurrentBottom = priorII;
 671       else {
 672         if (&*CurrentTop == MI)
 673           CurrentTop = nextIfDebug(++CurrentTop, priorII);
 674         moveInstruction(MI, CurrentBottom);
 675         CurrentBottom = MI;
 676       }
 677     }
 678     updateQueues(SU, IsTopNode);
 679
 680     // Notify the scheduling strategy after updating the DAG.
 681     SchedImpl->schedNode(SU, IsTopNode);
 682   }
 683   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
 684
 685   placeDebugValues();
 686
 687   DEBUG({
 688       unsigned BBNum = begin()->getParent()->getNumber();
 689       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
 690       dumpSchedule();
 691       dbgs() << '\n';
 692     });
 693 }
 694
 695 /// Apply each ScheduleDAGMutation step in order.
 696 void ScheduleDAGMI::postprocessDAG() {
 697   for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
 698     Mutations[i]->apply(this);
 699   }
 700 }
 701
 702 void ScheduleDAGMI::
 703 findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
 704                       SmallVectorImpl<SUnit*> &BotRoots) {
 705   for (std::vector<SUnit>::iterator
 706          I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
 707     SUnit *SU = &(*I);
 708     assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
 709
 710     // Order predecessors so DFSResult follows the critical path.
 711     SU->biasCriticalPath();
 712
 713     // A SUnit is ready to top schedule if it has no predecessors.
 714     if (!I->NumPredsLeft)
 715       TopRoots.push_back(SU);
 716     // A SUnit is ready to bottom schedule if it has no successors.
 717     if (!I->NumSuccsLeft)
 718       BotRoots.push_back(SU);
 719   }
 720   ExitSU.biasCriticalPath();
 721 }
 722
 723 /// Identify DAG roots and setup scheduler queues.
 724 void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
 725                                ArrayRef<SUnit*> BotRoots) {
 726   NextClusterSucc = NULL;
 727   NextClusterPred = NULL;
 728
 729   // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
 730   //
 731   // Nodes with unreleased weak edges can still be roots.
 732   // Release top roots in forward order.
 733   for (SmallVectorImpl<SUnit*>::const_iterator
 734          I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
 735     SchedImpl->releaseTopNode(*I);
 736   }
 737   // Release bottom roots in reverse order so the higher priority nodes appear
 738   // first. This is more natural and slightly more efficient.
 739   for (SmallVectorImpl<SUnit*>::const_reverse_iterator
 740          I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
 741     SchedImpl->releaseBottomNode(*I);
 742   }
 743
 744   releaseSuccessors(&EntrySU);
 745   releasePredecessors(&ExitSU);
 746
 747   SchedImpl->registerRoots();
 748
 749   // Advance past initial DebugValues.
 750   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
 751   CurrentBottom = RegionEnd;
 752 }
 753
 754 /// Update scheduler queues after scheduling an instruction.
 755 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
 756   // Release dependent instructions for scheduling.
 757   if (IsTopNode)
 758     releaseSuccessors(SU);
 759   else
 760     releasePredecessors(SU);
 761
 762   SU->isScheduled = true;
 763 }
 764
 765 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
 766 void ScheduleDAGMI::placeDebugValues() {
 767   // If first instruction was a DBG_VALUE then put it back.
 768   if (FirstDbgValue) {
 769     BB->splice(RegionBegin, BB, FirstDbgValue);
 770     RegionBegin = FirstDbgValue;
 771   }
 772
 773   for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
 774          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
 775     std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
 776     MachineInstr *DbgValue = P.first;
 777     MachineBasicBlock::iterator OrigPrevMI = P.second;
 778     if (&*RegionBegin == DbgValue)
 779       ++RegionBegin;
 780     BB->splice(++OrigPrevMI, BB, DbgValue);
 781     if (OrigPrevMI == std::prev(RegionEnd))
 782       RegionEnd = DbgValue;
 783   }
 784   DbgValues.clear();
 785   FirstDbgValue = NULL;
 786 }
 787
 788 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 789 void ScheduleDAGMI::dumpSchedule() const {
 790   for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
 791     if (SUnit *SU = getSUnit(&(*MI)))
 792       SU->dump(this);
 793     else
 794       dbgs() << "Missing SUnit\n";
 795   }
 796 }
 797 #endif
 798
 799 //===----------------------------------------------------------------------===//
 800 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
 801 // preservation.
 802 //===----------------------------------------------------------------------===//
 803
 804 ScheduleDAGMILive::~ScheduleDAGMILive() {
 805   delete DFSResult;
 806 }
 807
 808 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
 809 /// crossing a scheduling boundary. [begin, end) includes all instructions in
 810 /// the region, including the boundary itself and single-instruction regions
 811 /// that don't get scheduled.
 812 void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
 813                                 MachineBasicBlock::iterator begin,
 814                                 MachineBasicBlock::iterator end,
 815                                 unsigned regioninstrs)
 816 {
 817   // ScheduleDAGMI initializes SchedImpl's per-region policy.
 818   ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
 819
 820   // For convenience remember the end of the liveness region.
 821   LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
 822
 823   SUPressureDiffs.clear();
 824
 825   ShouldTrackPressure = SchedImpl->shouldTrackPressure();
 826 }
 827
 828 // Setup the register pressure trackers for the top scheduled top and bottom
 829 // scheduled regions.
 830 void ScheduleDAGMILive::initRegPressure() {
 831   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
 832   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
 833
 834   // Close the RPTracker to finalize live ins.
 835   RPTracker.closeRegion();
 836
 837   DEBUG(RPTracker.dump());
 838
 839   // Initialize the live ins and live outs.
 840   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
 841   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
 842
 843   // Close one end of the tracker so we can call
 844   // getMaxUpward/DownwardPressureDelta before advancing across any
 845   // instructions. This converts currently live regs into live ins/outs.
 846   TopRPTracker.closeTop();
 847   BotRPTracker.closeBottom();
 848
 849   BotRPTracker.initLiveThru(RPTracker);
 850   if (!BotRPTracker.getLiveThru().empty()) {
 851     TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
 852     DEBUG(dbgs() << "Live Thru: ";
 853           dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
 854   };
 855
 856   // For each live out vreg reduce the pressure change associated with other
 857   // uses of the same vreg below the live-out reaching def.
 858   updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
 859
 860   // Account for liveness generated by the region boundary.
 861   if (LiveRegionEnd != RegionEnd) {
 862     SmallVector<unsigned, 8> LiveUses;
 863     BotRPTracker.recede(&LiveUses);
 864     updatePressureDiffs(LiveUses);
 865   }
 866
 867   assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
 868
 869   // Cache the list of excess pressure sets in this region. This will also track
 870   // the max pressure in the scheduled code for these sets.
 871   RegionCriticalPSets.clear();
 872   const std::vector<unsigned> &RegionPressure =
 873     RPTracker.getPressure().MaxSetPressure;
 874   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
 875     unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
 876     if (RegionPressure[i] > Limit) {
 877       DEBUG(dbgs() << TRI->getRegPressureSetName(i)
 878             << " Limit " << Limit
 879             << " Actual " << RegionPressure[i] << "\n");
 880       RegionCriticalPSets.push_back(PressureChange(i));
 881     }
 882   }
 883   DEBUG(dbgs() << "Excess PSets: ";
 884         for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
 885           dbgs() << TRI->getRegPressureSetName(
 886             RegionCriticalPSets[i].getPSet()) << " ";
 887         dbgs() << "\n");
 888 }
 889
 890 void ScheduleDAGMILive::
 891 updateScheduledPressure(const SUnit *SU,
 892                         const std::vector<unsigned> &NewMaxPressure) {
 893   const PressureDiff &PDiff = getPressureDiff(SU);
 894   unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
 895   for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end();
 896        I != E; ++I) {
 897     if (!I->isValid())
 898       break;
 899     unsigned ID = I->getPSet();
 900     while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
 901       ++CritIdx;
 902     if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
 903       if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
 904           && NewMaxPressure[ID] <= INT16_MAX)
 905         RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
 906     }
 907     unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
 908     if (NewMaxPressure[ID] >= Limit - 2) {
 909       DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
 910             << NewMaxPressure[ID] << " > " << Limit << "(+ "
 911             << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
 912     }
 913   }
 914 }
 915
 916 /// Update the PressureDiff array for liveness after scheduling this
 917 /// instruction.
 918 void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) {
 919   for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) {
 920     /// FIXME: Currently assuming single-use physregs.
 921     unsigned Reg = LiveUses[LUIdx];
 922     DEBUG(dbgs() << "  LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
 923     if (!TRI->isVirtualRegister(Reg))
 924       continue;
 925
 926     // This may be called before CurrentBottom has been initialized. However,
 927     // BotRPTracker must have a valid position. We want the value live into the
 928     // instruction or live out of the block, so ask for the previous
 929     // instruction's live-out.
 930     const LiveInterval &LI = LIS->getInterval(Reg);
 931     VNInfo *VNI;
 932     MachineBasicBlock::const_iterator I =
 933       nextIfDebug(BotRPTracker.getPos(), BB->end());
 934     if (I == BB->end())
 935       VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
 936     else {
 937       LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I));
 938       VNI = LRQ.valueIn();
 939     }
 940     // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
 941     assert(VNI && "No live value at use.");
 942     for (VReg2UseMap::iterator
 943            UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
 944       SUnit *SU = UI->SU;
 945       DEBUG(dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
 946             << *SU->getInstr());
 947       // If this use comes before the reaching def, it cannot be a last use, so
 948       // descrease its pressure change.
 949       if (!SU->isScheduled && SU != &ExitSU) {
 950         LiveQueryResult LRQ
 951           = LI.Query(LIS->getInstructionIndex(SU->getInstr()));
 952         if (LRQ.valueIn() == VNI)
 953           getPressureDiff(SU).addPressureChange(Reg, true, &MRI);
 954       }
 955     }
 956   }
 957 }
 958
 959 /// schedule - Called back from MachineScheduler::runOnMachineFunction
 960 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
 961 /// only includes instructions that have DAG nodes, not scheduling boundaries.
 962 ///
 963 /// This is a skeletal driver, with all the functionality pushed into helpers,
 964 /// so that it can be easilly extended by experimental schedulers. Generally,
 965 /// implementing MachineSchedStrategy should be sufficient to implement a new
 966 /// scheduling algorithm. However, if a scheduler further subclasses
 967 /// ScheduleDAGMILive then it will want to override this virtual method in order
 968 /// to update any specialized state.
 969 void ScheduleDAGMILive::schedule() {
 970   buildDAGWithRegPressure();
 971
 972   Topo.InitDAGTopologicalSorting();
 973
 974   postprocessDAG();
 975
 976   SmallVector<SUnit*, 8> TopRoots, BotRoots;
 977   findRootsAndBiasEdges(TopRoots, BotRoots);
 978
 979   // Initialize the strategy before modifying the DAG.
 980   // This may initialize a DFSResult to be used for queue priority.
 981   SchedImpl->initialize(this);
 982
 983   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
 984           SUnits[su].dumpAll(this));
 985   if (ViewMISchedDAGs) viewGraph();
 986
 987   // Initialize ready queues now that the DAG and priority data are finalized.
 988   initQueues(TopRoots, BotRoots);
 989
 990   if (ShouldTrackPressure) {
 991     assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
 992     TopRPTracker.setPos(CurrentTop);
 993   }
 994
 995   bool IsTopNode = false;
 996   while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
 997     assert(!SU->isScheduled && "Node already scheduled");
 998     if (!checkSchedLimit())
 999       break;
1000
1001     scheduleMI(SU, IsTopNode);
1002
1003     updateQueues(SU, IsTopNode);
1004
1005     if (DFSResult) {
1006       unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1007       if (!ScheduledTrees.test(SubtreeID)) {
1008         ScheduledTrees.set(SubtreeID);
1009         DFSResult->scheduleTree(SubtreeID);
1010         SchedImpl->scheduleTree(SubtreeID);
1011       }
1012     }
1013
1014     // Notify the scheduling strategy after updating the DAG.
1015     SchedImpl->schedNode(SU, IsTopNode);
1016   }
1017   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1018
1019   placeDebugValues();
1020
1021   DEBUG({
1022       unsigned BBNum = begin()->getParent()->getNumber();
1023       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
1024       dumpSchedule();
1025       dbgs() << '\n';
1026     });
1027 }
1028
1029 /// Build the DAG and setup three register pressure trackers.
1030 void ScheduleDAGMILive::buildDAGWithRegPressure() {
1031   if (!ShouldTrackPressure) {
1032     RPTracker.reset();
1033     RegionCriticalPSets.clear();
1034     buildSchedGraph(AA);
1035     return;
1036   }
1037
1038   // Initialize the register pressure tracker used by buildSchedGraph.
1039   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1040                  /*TrackUntiedDefs=*/true);
1041
1042   // Account for liveness generate by the region boundary.
1043   if (LiveRegionEnd != RegionEnd)
1044     RPTracker.recede();
1045
1046   // Build the DAG, and compute current register pressure.
1047   buildSchedGraph(AA, &RPTracker, &SUPressureDiffs);
1048
1049   // Initialize top/bottom trackers after computing region pressure.
1050   initRegPressure();
1051 }
1052
1053 void ScheduleDAGMILive::computeDFSResult() {
1054   if (!DFSResult)
1055     DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1056   DFSResult->clear();
1057   ScheduledTrees.clear();
1058   DFSResult->resize(SUnits.size());
1059   DFSResult->compute(SUnits);
1060   ScheduledTrees.resize(DFSResult->getNumSubtrees());
1061 }
1062
1063 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1064 /// only provides the critical path for single block loops. To handle loops that
1065 /// span blocks, we could use the vreg path latencies provided by
1066 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1067 /// available for use in the scheduler.
1068 ///
1069 /// The cyclic path estimation identifies a def-use pair that crosses the back
1070 /// edge and considers the depth and height of the nodes. For example, consider
1071 /// the following instruction sequence where each instruction has unit latency
1072 /// and defines an epomymous virtual register:
1073 ///
1074 /// a->b(a,c)->c(b)->d(c)->exit
1075 ///
1076 /// The cyclic critical path is a two cycles: b->c->b
1077 /// The acyclic critical path is four cycles: a->b->c->d->exit
1078 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1079 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1080 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1081 /// LiveInDepth = depth(b) = len(a->b) = 1
1082 ///
1083 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1084 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1085 /// CyclicCriticalPath = min(2, 2) = 2
1086 ///
1087 /// This could be relevant to PostRA scheduling, but is currently implemented
1088 /// assuming LiveIntervals.
1089 unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
1090   // This only applies to single block loop.
1091   if (!BB->isSuccessor(BB))
1092     return 0;
1093
1094   unsigned MaxCyclicLatency = 0;
1095   // Visit each live out vreg def to find def/use pairs that cross iterations.
1096   ArrayRef<unsigned> LiveOuts = RPTracker.getPressure().LiveOutRegs;
1097   for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE = LiveOuts.end();
1098        RI != RE; ++RI) {
1099     unsigned Reg = *RI;
1100     if (!TRI->isVirtualRegister(Reg))
1101         continue;
1102     const LiveInterval &LI = LIS->getInterval(Reg);
1103     const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1104     if (!DefVNI)
1105       continue;
1106
1107     MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1108     const SUnit *DefSU = getSUnit(DefMI);
1109     if (!DefSU)
1110       continue;
1111
1112     unsigned LiveOutHeight = DefSU->getHeight();
1113     unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1114     // Visit all local users of the vreg def.
1115     for (VReg2UseMap::iterator
1116            UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
1117       if (UI->SU == &ExitSU)
1118         continue;
1119
1120       // Only consider uses of the phi.
1121       LiveQueryResult LRQ =
1122         LI.Query(LIS->getInstructionIndex(UI->SU->getInstr()));
1123       if (!LRQ.valueIn()->isPHIDef())
1124         continue;
1125
1126       // Assume that a path spanning two iterations is a cycle, which could
1127       // overestimate in strange cases. This allows cyclic latency to be
1128       // estimated as the minimum slack of the vreg's depth or height.
1129       unsigned CyclicLatency = 0;
1130       if (LiveOutDepth > UI->SU->getDepth())
1131         CyclicLatency = LiveOutDepth - UI->SU->getDepth();
1132
1133       unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency;
1134       if (LiveInHeight > LiveOutHeight) {
1135         if (LiveInHeight - LiveOutHeight < CyclicLatency)
1136           CyclicLatency = LiveInHeight - LiveOutHeight;
1137       }
1138       else
1139         CyclicLatency = 0;
1140
1141       DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1142             << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n");
1143       if (CyclicLatency > MaxCyclicLatency)
1144         MaxCyclicLatency = CyclicLatency;
1145     }
1146   }
1147   DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1148   return MaxCyclicLatency;
1149 }
1150
1151 /// Move an instruction and update register pressure.
1152 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1153   // Move the instruction to its new location in the instruction stream.
1154   MachineInstr *MI = SU->getInstr();
1155
1156   if (IsTopNode) {
1157     assert(SU->isTopReady() && "node still has unscheduled dependencies");
1158     if (&*CurrentTop == MI)
1159       CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1160     else {
1161       moveInstruction(MI, CurrentTop);
1162       TopRPTracker.setPos(MI);
1163     }
1164
1165     if (ShouldTrackPressure) {
1166       // Update top scheduled pressure.
1167       TopRPTracker.advance();
1168       assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1169       updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1170     }
1171   }
1172   else {
1173     assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1174     MachineBasicBlock::iterator priorII =
1175       priorNonDebug(CurrentBottom, CurrentTop);
1176     if (&*priorII == MI)
1177       CurrentBottom = priorII;
1178     else {
1179       if (&*CurrentTop == MI) {
1180         CurrentTop = nextIfDebug(++CurrentTop, priorII);
1181         TopRPTracker.setPos(CurrentTop);
1182       }
1183       moveInstruction(MI, CurrentBottom);
1184       CurrentBottom = MI;
1185     }
1186     if (ShouldTrackPressure) {
1187       // Update bottom scheduled pressure.
1188       SmallVector<unsigned, 8> LiveUses;
1189       BotRPTracker.recede(&LiveUses);
1190       assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1191       updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1192       updatePressureDiffs(LiveUses);
1193     }
1194   }
1195 }
1196
1197 //===----------------------------------------------------------------------===//
1198 // LoadClusterMutation - DAG post-processing to cluster loads.
1199 //===----------------------------------------------------------------------===//
1200
1201 namespace {
1202 /// \brief Post-process the DAG to create cluster edges between neighboring
1203 /// loads.
1204 class LoadClusterMutation : public ScheduleDAGMutation {
1205   struct LoadInfo {
1206     SUnit *SU;
1207     unsigned BaseReg;
1208     unsigned Offset;
1209     LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
1210       : SU(su), BaseReg(reg), Offset(ofs) {}
1211
1212     bool operator<(const LoadInfo &RHS) const {
1213       return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset);
1214     }
1215   };
1216
1217   const TargetInstrInfo *TII;
1218   const TargetRegisterInfo *TRI;
1219 public:
1220   LoadClusterMutation(const TargetInstrInfo *tii,
1221                       const TargetRegisterInfo *tri)
1222     : TII(tii), TRI(tri) {}
1223
1224   void apply(ScheduleDAGMI *DAG) override;
1225 protected:
1226   void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
1227 };
1228 } // anonymous
1229
1230 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
1231                                                   ScheduleDAGMI *DAG) {
1232   SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
1233   for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
1234     SUnit *SU = Loads[Idx];
1235     unsigned BaseReg;
1236     unsigned Offset;
1237     if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
1238       LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
1239   }
1240   if (LoadRecords.size() < 2)
1241     return;
1242   std::sort(LoadRecords.begin(), LoadRecords.end());
1243   unsigned ClusterLength = 1;
1244   for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
1245     if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
1246       ClusterLength = 1;
1247       continue;
1248     }
1249
1250     SUnit *SUa = LoadRecords[Idx].SU;
1251     SUnit *SUb = LoadRecords[Idx+1].SU;
1252     if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
1253         && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1254
1255       DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
1256             << SUb->NodeNum << ")\n");
1257       // Copy successor edges from SUa to SUb. Interleaving computation
1258       // dependent on SUa can prevent load combining due to register reuse.
1259       // Predecessor edges do not need to be copied from SUb to SUa since nearby
1260       // loads should have effectively the same inputs.
1261       for (SUnit::const_succ_iterator
1262              SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
1263         if (SI->getSUnit() == SUb)
1264           continue;
1265         DEBUG(dbgs() << "  Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
1266         DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
1267       }
1268       ++ClusterLength;
1269     }
1270     else
1271       ClusterLength = 1;
1272   }
1273 }
1274
1275 /// \brief Callback from DAG postProcessing to create cluster edges for loads.
1276 void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
1277   // Map DAG NodeNum to store chain ID.
1278   DenseMap<unsigned, unsigned> StoreChainIDs;
1279   // Map each store chain to a set of dependent loads.
1280   SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1281   for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
1282     SUnit *SU = &DAG->SUnits[Idx];
1283     if (!SU->getInstr()->mayLoad())
1284       continue;
1285     unsigned ChainPredID = DAG->SUnits.size();
1286     for (SUnit::const_pred_iterator
1287            PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
1288       if (PI->isCtrl()) {
1289         ChainPredID = PI->getSUnit()->NodeNum;
1290         break;
1291       }
1292     }
1293     // Check if this chain-like pred has been seen
1294     // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
1295     unsigned NumChains = StoreChainDependents.size();
1296     std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1297       StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1298     if (Result.second)
1299       StoreChainDependents.resize(NumChains + 1);
1300     StoreChainDependents[Result.first->second].push_back(SU);
1301   }
1302   // Iterate over the store chains.
1303   for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
1304     clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
1305 }
1306
1307 //===----------------------------------------------------------------------===//
1308 // MacroFusion - DAG post-processing to encourage fusion of macro ops.
1309 //===----------------------------------------------------------------------===//
1310
1311 namespace {
1312 /// \brief Post-process the DAG to create cluster edges between instructions
1313 /// that may be fused by the processor into a single operation.
1314 class MacroFusion : public ScheduleDAGMutation {
1315   const TargetInstrInfo *TII;
1316 public:
1317   MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
1318
1319   void apply(ScheduleDAGMI *DAG) override;
1320 };
1321 } // anonymous
1322
1323 /// \brief Callback from DAG postProcessing to create cluster edges to encourage
1324 /// fused operations.
1325 void MacroFusion::apply(ScheduleDAGMI *DAG) {
1326   // For now, assume targets can only fuse with the branch.
1327   MachineInstr *Branch = DAG->ExitSU.getInstr();
1328   if (!Branch)
1329     return;
1330
1331   for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
1332     SUnit *SU = &DAG->SUnits[--Idx];
1333     if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
1334       continue;
1335
1336     // Create a single weak edge from SU to ExitSU. The only effect is to cause
1337     // bottom-up scheduling to heavily prioritize the clustered SU.  There is no
1338     // need to copy predecessor edges from ExitSU to SU, since top-down
1339     // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
1340     // of SU, we could create an artificial edge from the deepest root, but it
1341     // hasn't been needed yet.
1342     bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
1343     (void)Success;
1344     assert(Success && "No DAG nodes should be reachable from ExitSU");
1345
1346     DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
1347     break;
1348   }
1349 }
1350
1351 //===----------------------------------------------------------------------===//
1352 // CopyConstrain - DAG post-processing to encourage copy elimination.
1353 //===----------------------------------------------------------------------===//
1354
1355 namespace {
1356 /// \brief Post-process the DAG to create weak edges from all uses of a copy to
1357 /// the one use that defines the copy's source vreg, most likely an induction
1358 /// variable increment.
1359 class CopyConstrain : public ScheduleDAGMutation {
1360   // Transient state.
1361   SlotIndex RegionBeginIdx;
1362   // RegionEndIdx is the slot index of the last non-debug instruction in the
1363   // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1364   SlotIndex RegionEndIdx;
1365 public:
1366   CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1367
1368   void apply(ScheduleDAGMI *DAG) override;
1369
1370 protected:
1371   void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1372 };
1373 } // anonymous
1374
1375 /// constrainLocalCopy handles two possibilities:
1376 /// 1) Local src:
1377 /// I0:     = dst
1378 /// I1: src = ...
1379 /// I2:     = dst
1380 /// I3: dst = src (copy)
1381 /// (create pred->succ edges I0->I1, I2->I1)
1382 ///
1383 /// 2) Local copy:
1384 /// I0: dst = src (copy)
1385 /// I1:     = dst
1386 /// I2: src = ...
1387 /// I3:     = dst
1388 /// (create pred->succ edges I1->I2, I3->I2)
1389 ///
1390 /// Although the MachineScheduler is currently constrained to single blocks,
1391 /// this algorithm should handle extended blocks. An EBB is a set of
1392 /// contiguously numbered blocks such that the previous block in the EBB is
1393 /// always the single predecessor.
1394 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1395   LiveIntervals *LIS = DAG->getLIS();
1396   MachineInstr *Copy = CopySU->getInstr();
1397
1398   // Check for pure vreg copies.
1399   unsigned SrcReg = Copy->getOperand(1).getReg();
1400   if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
1401     return;
1402
1403   unsigned DstReg = Copy->getOperand(0).getReg();
1404   if (!TargetRegisterInfo::isVirtualRegister(DstReg))
1405     return;
1406
1407   // Check if either the dest or source is local. If it's live across a back
1408   // edge, it's not local. Note that if both vregs are live across the back
1409   // edge, we cannot successfully contrain the copy without cyclic scheduling.
1410   unsigned LocalReg = DstReg;
1411   unsigned GlobalReg = SrcReg;
1412   LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1413   if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1414     LocalReg = SrcReg;
1415     GlobalReg = DstReg;
1416     LocalLI = &LIS->getInterval(LocalReg);
1417     if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1418       return;
1419   }
1420   LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1421
1422   // Find the global segment after the start of the local LI.
1423   LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1424   // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1425   // local live range. We could create edges from other global uses to the local
1426   // start, but the coalescer should have already eliminated these cases, so
1427   // don't bother dealing with it.
1428   if (GlobalSegment == GlobalLI->end())
1429     return;
1430
1431   // If GlobalSegment is killed at the LocalLI->start, the call to find()
1432   // returned the next global segment. But if GlobalSegment overlaps with
1433   // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
1434   // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1435   if (GlobalSegment->contains(LocalLI->beginIndex()))
1436     ++GlobalSegment;
1437
1438   if (GlobalSegment == GlobalLI->end())
1439     return;
1440
1441   // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1442   if (GlobalSegment != GlobalLI->begin()) {
1443     // Two address defs have no hole.
1444     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1445                                GlobalSegment->start)) {
1446       return;
1447     }
1448     // If the prior global segment may be defined by the same two-address
1449     // instruction that also defines LocalLI, then can't make a hole here.
1450     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1451                                LocalLI->beginIndex())) {
1452       return;
1453     }
1454     // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1455     // it would be a disconnected component in the live range.
1456     assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1457            "Disconnected LRG within the scheduling region.");
1458   }
1459   MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1460   if (!GlobalDef)
1461     return;
1462
1463   SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1464   if (!GlobalSU)
1465     return;
1466
1467   // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1468   // constraining the uses of the last local def to precede GlobalDef.
1469   SmallVector<SUnit*,8> LocalUses;
1470   const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1471   MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1472   SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1473   for (SUnit::const_succ_iterator
1474          I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
1475        I != E; ++I) {
1476     if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
1477       continue;
1478     if (I->getSUnit() == GlobalSU)
1479       continue;
1480     if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
1481       return;
1482     LocalUses.push_back(I->getSUnit());
1483   }
1484   // Open the top of the GlobalLI hole by constraining any earlier global uses
1485   // to precede the start of LocalLI.
1486   SmallVector<SUnit*,8> GlobalUses;
1487   MachineInstr *FirstLocalDef =
1488     LIS->getInstructionFromIndex(LocalLI->beginIndex());
1489   SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1490   for (SUnit::const_pred_iterator
1491          I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
1492     if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
1493       continue;
1494     if (I->getSUnit() == FirstLocalSU)
1495       continue;
1496     if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
1497       return;
1498     GlobalUses.push_back(I->getSUnit());
1499   }
1500   DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1501   // Add the weak edges.
1502   for (SmallVectorImpl<SUnit*>::const_iterator
1503          I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1504     DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
1505           << GlobalSU->NodeNum << ")\n");
1506     DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1507   }
1508   for (SmallVectorImpl<SUnit*>::const_iterator
1509          I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1510     DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
1511           << FirstLocalSU->NodeNum << ")\n");
1512     DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1513   }
1514 }
1515
1516 /// \brief Callback from DAG postProcessing to create weak edges to encourage
1517 /// copy elimination.
1518 void CopyConstrain::apply(ScheduleDAGMI *DAG) {
1519   assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1520
1521   MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1522   if (FirstPos == DAG->end())
1523     return;
1524   RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos);
1525   RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1526     &*priorNonDebug(DAG->end(), DAG->begin()));
1527
1528   for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
1529     SUnit *SU = &DAG->SUnits[Idx];
1530     if (!SU->getInstr()->isCopy())
1531       continue;
1532
1533     constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG));
1534   }
1535 }
1536
1537 //===----------------------------------------------------------------------===//
1538 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1539 // and possibly other custom schedulers.
1540 //===----------------------------------------------------------------------===//
1541
1542 static const unsigned InvalidCycle = ~0U;
1543
1544 SchedBoundary::~SchedBoundary() { delete HazardRec; }
1545
1546 void SchedBoundary::reset() {
1547   // A new HazardRec is created for each DAG and owned by SchedBoundary.
1548   // Destroying and reconstructing it is very expensive though. So keep
1549   // invalid, placeholder HazardRecs.
1550   if (HazardRec && HazardRec->isEnabled()) {
1551     delete HazardRec;
1552     HazardRec = 0;
1553   }
1554   Available.clear();
1555   Pending.clear();
1556   CheckPending = false;
1557   NextSUs.clear();
1558   CurrCycle = 0;
1559   CurrMOps = 0;
1560   MinReadyCycle = UINT_MAX;
1561   ExpectedLatency = 0;
1562   DependentLatency = 0;
1563   RetiredMOps = 0;
1564   MaxExecutedResCount = 0;
1565   ZoneCritResIdx = 0;
1566   IsResourceLimited = false;
1567   ReservedCycles.clear();
1568 #ifndef NDEBUG
1569   // Track the maximum number of stall cycles that could arise either from the
1570   // latency of a DAG edge or the number of cycles that a processor resource is
1571   // reserved (SchedBoundary::ReservedCycles).
1572   MaxObservedLatency = 0;
1573 #endif
1574   // Reserve a zero-count for invalid CritResIdx.
1575   ExecutedResCounts.resize(1);
1576   assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1577 }
1578
1579 void SchedRemainder::
1580 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1581   reset();
1582   if (!SchedModel->hasInstrSchedModel())
1583     return;
1584   RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1585   for (std::vector<SUnit>::iterator
1586          I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
1587     const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
1588     RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
1589       * SchedModel->getMicroOpFactor();
1590     for (TargetSchedModel::ProcResIter
1591            PI = SchedModel->getWriteProcResBegin(SC),
1592            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1593       unsigned PIdx = PI->ProcResourceIdx;
1594       unsigned Factor = SchedModel->getResourceFactor(PIdx);
1595       RemainingCounts[PIdx] += (Factor * PI->Cycles);
1596     }
1597   }
1598 }
1599
1600 void SchedBoundary::
1601 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1602   reset();
1603   DAG = dag;
1604   SchedModel = smodel;
1605   Rem = rem;
1606   if (SchedModel->hasInstrSchedModel()) {
1607     ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
1608     ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
1609   }
1610 }
1611
1612 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1613 /// these "soft stalls" differently than the hard stall cycles based on CPU
1614 /// resources and computed by checkHazard(). A fully in-order model
1615 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
1616 /// available for scheduling until they are ready. However, a weaker in-order
1617 /// model may use this for heuristics. For example, if a processor has in-order
1618 /// behavior when reading certain resources, this may come into play.
1619 unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
1620   if (!SU->isUnbuffered)
1621     return 0;
1622
1623   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1624   if (ReadyCycle > CurrCycle)
1625     return ReadyCycle - CurrCycle;
1626   return 0;
1627 }
1628
1629 /// Compute the next cycle at which the given processor resource can be
1630 /// scheduled.
1631 unsigned SchedBoundary::
1632 getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1633   unsigned NextUnreserved = ReservedCycles[PIdx];
1634   // If this resource has never been used, always return cycle zero.
1635   if (NextUnreserved == InvalidCycle)
1636     return 0;
1637   // For bottom-up scheduling add the cycles needed for the current operation.
1638   if (!isTop())
1639     NextUnreserved += Cycles;
1640   return NextUnreserved;
1641 }
1642
1643 /// Does this SU have a hazard within the current instruction group.
1644 ///
1645 /// The scheduler supports two modes of hazard recognition. The first is the
1646 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1647 /// supports highly complicated in-order reservation tables
1648 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1649 ///
1650 /// The second is a streamlined mechanism that checks for hazards based on
1651 /// simple counters that the scheduler itself maintains. It explicitly checks
1652 /// for instruction dispatch limitations, including the number of micro-ops that
1653 /// can dispatch per cycle.
1654 ///
1655 /// TODO: Also check whether the SU must start a new group.
1656 bool SchedBoundary::checkHazard(SUnit *SU) {
1657   if (HazardRec->isEnabled()
1658       && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
1659     return true;
1660   }
1661   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1662   if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
1663     DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
1664           << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1665     return true;
1666   }
1667   if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
1668     const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1669     for (TargetSchedModel::ProcResIter
1670            PI = SchedModel->getWriteProcResBegin(SC),
1671            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1672       if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle)
1673         return true;
1674     }
1675   }
1676   return false;
1677 }
1678
1679 // Find the unscheduled node in ReadySUs with the highest latency.
1680 unsigned SchedBoundary::
1681 findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
1682   SUnit *LateSU = 0;
1683   unsigned RemLatency = 0;
1684   for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
1685        I != E; ++I) {
1686     unsigned L = getUnscheduledLatency(*I);
1687     if (L > RemLatency) {
1688       RemLatency = L;
1689       LateSU = *I;
1690     }
1691   }
1692   if (LateSU) {
1693     DEBUG(dbgs() << Available.getName() << " RemLatency SU("
1694           << LateSU->NodeNum << ") " << RemLatency << "c\n");
1695   }
1696   return RemLatency;
1697 }
1698
1699 // Count resources in this zone and the remaining unscheduled
1700 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
1701 // resource index, or zero if the zone is issue limited.
1702 unsigned SchedBoundary::
1703 getOtherResourceCount(unsigned &OtherCritIdx) {
1704   OtherCritIdx = 0;
1705   if (!SchedModel->hasInstrSchedModel())
1706     return 0;
1707
1708   unsigned OtherCritCount = Rem->RemIssueCount
1709     + (RetiredMOps * SchedModel->getMicroOpFactor());
1710   DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
1711         << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
1712   for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
1713        PIdx != PEnd; ++PIdx) {
1714     unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
1715     if (OtherCount > OtherCritCount) {
1716       OtherCritCount = OtherCount;
1717       OtherCritIdx = PIdx;
1718     }
1719   }
1720   if (OtherCritIdx) {
1721     DEBUG(dbgs() << "  " << Available.getName() << " + Remain CritRes: "
1722           << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
1723           << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
1724   }
1725   return OtherCritCount;
1726 }
1727
1728 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
1729   if (ReadyCycle < MinReadyCycle)
1730     MinReadyCycle = ReadyCycle;
1731
1732   // Check for interlocks first. For the purpose of other heuristics, an
1733   // instruction that cannot issue appears as if it's not in the ReadyQueue.
1734   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
1735   if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU))
1736     Pending.push(SU);
1737   else
1738     Available.push(SU);
1739
1740   // Record this node as an immediate dependent of the scheduled node.
1741   NextSUs.insert(SU);
1742 }
1743
1744 void SchedBoundary::releaseTopNode(SUnit *SU) {
1745   if (SU->isScheduled)
1746     return;
1747
1748   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1749        I != E; ++I) {
1750     if (I->isWeak())
1751       continue;
1752     unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
1753     unsigned Latency = I->getLatency();
1754 #ifndef NDEBUG
1755     MaxObservedLatency = std::max(Latency, MaxObservedLatency);
1756 #endif
1757     if (SU->TopReadyCycle < PredReadyCycle + Latency)
1758       SU->TopReadyCycle = PredReadyCycle + Latency;
1759   }
1760   releaseNode(SU, SU->TopReadyCycle);
1761 }
1762
1763 void SchedBoundary::releaseBottomNode(SUnit *SU) {
1764   if (SU->isScheduled)
1765     return;
1766
1767   assert(SU->getInstr() && "Scheduled SUnit must have instr");
1768
1769   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1770        I != E; ++I) {
1771     if (I->isWeak())
1772       continue;
1773     unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
1774     unsigned Latency = I->getLatency();
1775 #ifndef NDEBUG
1776     MaxObservedLatency = std::max(Latency, MaxObservedLatency);
1777 #endif
1778     if (SU->BotReadyCycle < SuccReadyCycle + Latency)
1779       SU->BotReadyCycle = SuccReadyCycle + Latency;
1780   }
1781   releaseNode(SU, SU->BotReadyCycle);
1782 }
1783
1784 /// Move the boundary of scheduled code by one cycle.
1785 void SchedBoundary::bumpCycle(unsigned NextCycle) {
1786   if (SchedModel->getMicroOpBufferSize() == 0) {
1787     assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
1788     if (MinReadyCycle > NextCycle)
1789       NextCycle = MinReadyCycle;
1790   }
1791   // Update the current micro-ops, which will issue in the next cycle.
1792   unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
1793   CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
1794
1795   // Decrement DependentLatency based on the next cycle.
1796   if ((NextCycle - CurrCycle) > DependentLatency)
1797     DependentLatency = 0;
1798   else
1799     DependentLatency -= (NextCycle - CurrCycle);
1800
1801   if (!HazardRec->isEnabled()) {
1802     // Bypass HazardRec virtual calls.
1803     CurrCycle = NextCycle;
1804   }
1805   else {
1806     // Bypass getHazardType calls in case of long latency.
1807     for (; CurrCycle != NextCycle; ++CurrCycle) {
1808       if (isTop())
1809         HazardRec->AdvanceCycle();
1810       else
1811         HazardRec->RecedeCycle();
1812     }
1813   }
1814   CheckPending = true;
1815   unsigned LFactor = SchedModel->getLatencyFactor();
1816   IsResourceLimited =
1817     (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
1818     > (int)LFactor;
1819
1820   DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
1821 }
1822
1823 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
1824   ExecutedResCounts[PIdx] += Count;
1825   if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
1826     MaxExecutedResCount = ExecutedResCounts[PIdx];
1827 }
1828
1829 /// Add the given processor resource to this scheduled zone.
1830 ///
1831 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
1832 /// during which this resource is consumed.
1833 ///
1834 /// \return the next cycle at which the instruction may execute without
1835 /// oversubscribing resources.
1836 unsigned SchedBoundary::
1837 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
1838   unsigned Factor = SchedModel->getResourceFactor(PIdx);
1839   unsigned Count = Factor * Cycles;
1840   DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx)
1841         << " +" << Cycles << "x" << Factor << "u\n");
1842
1843   // Update Executed resources counts.
1844   incExecutedResources(PIdx, Count);
1845   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
1846   Rem->RemainingCounts[PIdx] -= Count;
1847
1848   // Check if this resource exceeds the current critical resource. If so, it
1849   // becomes the critical resource.
1850   if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
1851     ZoneCritResIdx = PIdx;
1852     DEBUG(dbgs() << "  *** Critical resource "
1853           << SchedModel->getResourceName(PIdx) << ": "
1854           << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
1855   }
1856   // For reserved resources, record the highest cycle using the resource.
1857   unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
1858   if (NextAvailable > CurrCycle) {
1859     DEBUG(dbgs() << "  Resource conflict: "
1860           << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
1861           << NextAvailable << "\n");
1862   }
1863   return NextAvailable;
1864 }
1865
1866 /// Move the boundary of scheduled code by one SUnit.
1867 void SchedBoundary::bumpNode(SUnit *SU) {
1868   // Update the reservation table.
1869   if (HazardRec->isEnabled()) {
1870     if (!isTop() && SU->isCall) {
1871       // Calls are scheduled with their preceding instructions. For bottom-up
1872       // scheduling, clear the pipeline state before emitting.
1873       HazardRec->Reset();
1874     }
1875     HazardRec->EmitInstruction(SU);
1876   }
1877   // checkHazard should prevent scheduling multiple instructions per cycle that
1878   // exceed the issue width.
1879   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1880   unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
1881   assert(
1882       (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
1883       "Cannot schedule this instruction's MicroOps in the current cycle.");
1884
1885   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1886   DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
1887
1888   unsigned NextCycle = CurrCycle;
1889   switch (SchedModel->getMicroOpBufferSize()) {
1890   case 0:
1891     assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
1892     break;
1893   case 1:
1894     if (ReadyCycle > NextCycle) {
1895       NextCycle = ReadyCycle;
1896       DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
1897     }
1898     break;
1899   default:
1900     // We don't currently model the OOO reorder buffer, so consider all
1901     // scheduled MOps to be "retired". We do loosely model in-order resource
1902     // latency. If this instruction uses an in-order resource, account for any
1903     // likely stall cycles.
1904     if (SU->isUnbuffered && ReadyCycle > NextCycle)
1905       NextCycle = ReadyCycle;
1906     break;
1907   }
1908   RetiredMOps += IncMOps;
1909
1910   // Update resource counts and critical resource.
1911   if (SchedModel->hasInstrSchedModel()) {
1912     unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
1913     assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
1914     Rem->RemIssueCount -= DecRemIssue;
1915     if (ZoneCritResIdx) {
1916       // Scale scheduled micro-ops for comparing with the critical resource.
1917       unsigned ScaledMOps =
1918         RetiredMOps * SchedModel->getMicroOpFactor();
1919
1920       // If scaled micro-ops are now more than the previous critical resource by
1921       // a full cycle, then micro-ops issue becomes critical.
1922       if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
1923           >= (int)SchedModel->getLatencyFactor()) {
1924         ZoneCritResIdx = 0;
1925         DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
1926               << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
1927       }
1928     }
1929     for (TargetSchedModel::ProcResIter
1930            PI = SchedModel->getWriteProcResBegin(SC),
1931            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1932       unsigned RCycle =
1933         countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
1934       if (RCycle > NextCycle)
1935         NextCycle = RCycle;
1936     }
1937     if (SU->hasReservedResource) {
1938       // For reserved resources, record the highest cycle using the resource.
1939       // For top-down scheduling, this is the cycle in which we schedule this
1940       // instruction plus the number of cycles the operations reserves the
1941       // resource. For bottom-up is it simply the instruction's cycle.
1942       for (TargetSchedModel::ProcResIter
1943              PI = SchedModel->getWriteProcResBegin(SC),
1944              PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1945         unsigned PIdx = PI->ProcResourceIdx;
1946         if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
1947           ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle;
1948 #ifndef NDEBUG
1949           MaxObservedLatency = std::max(PI->Cycles, MaxObservedLatency);
1950 #endif
1951         }
1952       }
1953     }
1954   }
1955   // Update ExpectedLatency and DependentLatency.
1956   unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
1957   unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
1958   if (SU->getDepth() > TopLatency) {
1959     TopLatency = SU->getDepth();
1960     DEBUG(dbgs() << "  " << Available.getName()
1961           << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
1962   }
1963   if (SU->getHeight() > BotLatency) {
1964     BotLatency = SU->getHeight();
1965     DEBUG(dbgs() << "  " << Available.getName()
1966           << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
1967   }
1968   // If we stall for any reason, bump the cycle.
1969   if (NextCycle > CurrCycle) {
1970     bumpCycle(NextCycle);
1971   }
1972   else {
1973     // After updating ZoneCritResIdx and ExpectedLatency, check if we're
1974     // resource limited. If a stall occurred, bumpCycle does this.
1975     unsigned LFactor = SchedModel->getLatencyFactor();
1976     IsResourceLimited =
1977       (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
1978       > (int)LFactor;
1979   }
1980   // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
1981   // resets CurrMOps. Loop to handle instructions with more MOps than issue in
1982   // one cycle.  Since we commonly reach the max MOps here, opportunistically
1983   // bump the cycle to avoid uselessly checking everything in the readyQ.
1984   CurrMOps += IncMOps;
1985   while (CurrMOps >= SchedModel->getIssueWidth()) {
1986     DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
1987           << " at cycle " << CurrCycle << '\n');
1988     bumpCycle(++NextCycle);
1989   }
1990   DEBUG(dumpScheduledState());
1991 }
1992
1993 /// Release pending ready nodes in to the available queue. This makes them
1994 /// visible to heuristics.
1995 void SchedBoundary::releasePending() {
1996   // If the available queue is empty, it is safe to reset MinReadyCycle.
1997   if (Available.empty())
1998     MinReadyCycle = UINT_MAX;
1999
2000   // Check to see if any of the pending instructions are ready to issue.  If
2001   // so, add them to the available queue.
2002   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2003   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2004     SUnit *SU = *(Pending.begin()+i);
2005     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2006
2007     if (ReadyCycle < MinReadyCycle)
2008       MinReadyCycle = ReadyCycle;
2009
2010     if (!IsBuffered && ReadyCycle > CurrCycle)
2011       continue;
2012
2013     if (checkHazard(SU))
2014       continue;
2015
2016     Available.push(SU);
2017     Pending.remove(Pending.begin()+i);
2018     --i; --e;
2019   }
2020   DEBUG(if (!Pending.empty()) Pending.dump());
2021   CheckPending = false;
2022 }
2023
2024 /// Remove SU from the ready set for this boundary.
2025 void SchedBoundary::removeReady(SUnit *SU) {
2026   if (Available.isInQueue(SU))
2027     Available.remove(Available.find(SU));
2028   else {
2029     assert(Pending.isInQueue(SU) && "bad ready count");
2030     Pending.remove(Pending.find(SU));
2031   }
2032 }
2033
2034 /// If this queue only has one ready candidate, return it. As a side effect,
2035 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2036 /// one node is ready. If multiple instructions are ready, return NULL.
2037 SUnit *SchedBoundary::pickOnlyChoice() {
2038   if (CheckPending)
2039     releasePending();
2040
2041   if (CurrMOps > 0) {
2042     // Defer any ready instrs that now have a hazard.
2043     for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2044       if (checkHazard(*I)) {
2045         Pending.push(*I);
2046         I = Available.remove(I);
2047         continue;
2048       }
2049       ++I;
2050     }
2051   }
2052   for (unsigned i = 0; Available.empty(); ++i) {
2053     assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) &&
2054            "permanent hazard"); (void)i;
2055     bumpCycle(CurrCycle + 1);
2056     releasePending();
2057   }
2058   if (Available.size() == 1)
2059     return *Available.begin();
2060   return NULL;
2061 }
2062
2063 #ifndef NDEBUG
2064 // This is useful information to dump after bumpNode.
2065 // Note that the Queue contents are more useful before pickNodeFromQueue.
2066 void SchedBoundary::dumpScheduledState() {
2067   unsigned ResFactor;
2068   unsigned ResCount;
2069   if (ZoneCritResIdx) {
2070     ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2071     ResCount = getResourceCount(ZoneCritResIdx);
2072   }
2073   else {
2074     ResFactor = SchedModel->getMicroOpFactor();
2075     ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
2076   }
2077   unsigned LFactor = SchedModel->getLatencyFactor();
2078   dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2079          << "  Retired: " << RetiredMOps;
2080   dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
2081   dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
2082          << ResCount / ResFactor << " "
2083          << SchedModel->getResourceName(ZoneCritResIdx)
2084          << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
2085          << (IsResourceLimited ? "  - Resource" : "  - Latency")
2086          << " limited.\n";
2087 }
2088 #endif
2089
2090 //===----------------------------------------------------------------------===//
2091 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2092 //===----------------------------------------------------------------------===//
2093
2094 namespace {
2095 /// Base class for GenericScheduler. This class maintains information about
2096 /// scheduling candidates based on TargetSchedModel making it easy to implement
2097 /// heuristics for either preRA or postRA scheduling.
2098 class GenericSchedulerBase : public MachineSchedStrategy {
2099 public:
2100   /// Represent the type of SchedCandidate found within a single queue.
2101   /// pickNodeBidirectional depends on these listed by decreasing priority.
2102   enum CandReason {
2103     NoCand, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak, RegMax,
2104     ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
2105     TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
2106
2107 #ifndef NDEBUG
2108   static const char *getReasonStr(GenericSchedulerBase::CandReason Reason);
2109 #endif
2110
2111   /// Policy for scheduling the next instruction in the candidate's zone.
2112   struct CandPolicy {
2113     bool ReduceLatency;
2114     unsigned ReduceResIdx;
2115     unsigned DemandResIdx;
2116
2117     CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
2118   };
2119
2120   /// Status of an instruction's critical resource consumption.
2121   struct SchedResourceDelta {
2122     // Count critical resources in the scheduled region required by SU.
2123     unsigned CritResources;
2124
2125     // Count critical resources from another region consumed by SU.
2126     unsigned DemandedResources;
2127
2128     SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
2129
2130     bool operator==(const SchedResourceDelta &RHS) const {
2131       return CritResources == RHS.CritResources
2132         && DemandedResources == RHS.DemandedResources;
2133     }
2134     bool operator!=(const SchedResourceDelta &RHS) const {
2135       return !operator==(RHS);
2136     }
2137   };
2138
2139   /// Store the state used by GenericScheduler heuristics, required for the
2140   /// lifetime of one invocation of pickNode().
2141   struct SchedCandidate {
2142     CandPolicy Policy;
2143
2144     // The best SUnit candidate.
2145     SUnit *SU;
2146
2147     // The reason for this candidate.
2148     CandReason Reason;
2149
2150     // Set of reasons that apply to multiple candidates.
2151     uint32_t RepeatReasonSet;
2152
2153     // Register pressure values for the best candidate.
2154     RegPressureDelta RPDelta;
2155
2156     // Critical resource consumption of the best candidate.
2157     SchedResourceDelta ResDelta;
2158
2159     SchedCandidate(const CandPolicy &policy)
2160       : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {}
2161
2162     bool isValid() const { return SU; }
2163
2164     // Copy the status of another candidate without changing policy.
2165     void setBest(SchedCandidate &Best) {
2166       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
2167       SU = Best.SU;
2168       Reason = Best.Reason;
2169       RPDelta = Best.RPDelta;
2170       ResDelta = Best.ResDelta;
2171     }
2172
2173     bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); }
2174     void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); }
2175
2176     void initResourceDelta(const ScheduleDAGMI *DAG,
2177                            const TargetSchedModel *SchedModel);
2178   };
2179
2180 protected:
2181   const MachineSchedContext *Context;
2182   const TargetSchedModel *SchedModel;
2183   const TargetRegisterInfo *TRI;
2184
2185   SchedRemainder Rem;
2186 protected:
2187   GenericSchedulerBase(const MachineSchedContext *C):
2188     Context(C), SchedModel(0), TRI(0) {}
2189
2190   void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone,
2191                  SchedBoundary *OtherZone);
2192
2193 #ifndef NDEBUG
2194   void traceCandidate(const SchedCandidate &Cand);
2195 #endif
2196 };
2197 } // namespace
2198
2199 void GenericSchedulerBase::SchedCandidate::
2200 initResourceDelta(const ScheduleDAGMI *DAG,
2201                   const TargetSchedModel *SchedModel) {
2202   if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2203     return;
2204
2205   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2206   for (TargetSchedModel::ProcResIter
2207          PI = SchedModel->getWriteProcResBegin(SC),
2208          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2209     if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2210       ResDelta.CritResources += PI->Cycles;
2211     if (PI->ProcResourceIdx == Policy.DemandResIdx)
2212       ResDelta.DemandedResources += PI->Cycles;
2213   }
2214 }
2215
2216 /// Set the CandPolicy given a scheduling zone given the current resources and
2217 /// latencies inside and outside the zone.
2218 void GenericSchedulerBase::setPolicy(CandPolicy &Policy,
2219                                      bool IsPostRA,
2220                                      SchedBoundary &CurrZone,
2221                                      SchedBoundary *OtherZone) {
2222   // Apply preemptive heuristics based on the the total latency and resources
2223   // inside and outside this zone. Potential stalls should be considered before
2224   // following this policy.
2225
2226   // Compute remaining latency. We need this both to determine whether the
2227   // overall schedule has become latency-limited and whether the instructions
2228   // outside this zone are resource or latency limited.
2229   //
2230   // The "dependent" latency is updated incrementally during scheduling as the
2231   // max height/depth of scheduled nodes minus the cycles since it was
2232   // scheduled:
2233   //   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2234   //
2235   // The "independent" latency is the max ready queue depth:
2236   //   ILat = max N.depth for N in Available|Pending
2237   //
2238   // RemainingLatency is the greater of independent and dependent latency.
2239   unsigned RemLatency = CurrZone.getDependentLatency();
2240   RemLatency = std::max(RemLatency,
2241                         CurrZone.findMaxLatency(CurrZone.Available.elements()));
2242   RemLatency = std::max(RemLatency,
2243                         CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2244
2245   // Compute the critical resource outside the zone.
2246   unsigned OtherCritIdx = 0;
2247   unsigned OtherCount =
2248     OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2249
2250   bool OtherResLimited = false;
2251   if (SchedModel->hasInstrSchedModel()) {
2252     unsigned LFactor = SchedModel->getLatencyFactor();
2253     OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
2254   }
2255   // Schedule aggressively for latency in PostRA mode. We don't check for
2256   // acyclic latency during PostRA, and highly out-of-order processors will
2257   // skip PostRA scheduling.
2258   if (!OtherResLimited) {
2259     if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
2260       Policy.ReduceLatency |= true;
2261       DEBUG(dbgs() << "  " << CurrZone.Available.getName()
2262             << " RemainingLatency " << RemLatency << " + "
2263             << CurrZone.getCurrCycle() << "c > CritPath "
2264             << Rem.CriticalPath << "\n");
2265     }
2266   }
2267   // If the same resource is limiting inside and outside the zone, do nothing.
2268   if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2269     return;
2270
2271   DEBUG(
2272     if (CurrZone.isResourceLimited()) {
2273       dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
2274              << SchedModel->getResourceName(CurrZone.getZoneCritResIdx())
2275              << "\n";
2276     }
2277     if (OtherResLimited)
2278       dbgs() << "  RemainingLimit: "
2279              << SchedModel->getResourceName(OtherCritIdx) << "\n";
2280     if (!CurrZone.isResourceLimited() && !OtherResLimited)
2281       dbgs() << "  Latency limited both directions.\n");
2282
2283   if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2284     Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2285
2286   if (OtherResLimited)
2287     Policy.DemandResIdx = OtherCritIdx;
2288 }
2289
2290 #ifndef NDEBUG
2291 const char *GenericSchedulerBase::getReasonStr(
2292   GenericSchedulerBase::CandReason Reason) {
2293   switch (Reason) {
2294   case NoCand:         return "NOCAND    ";
2295   case PhysRegCopy:    return "PREG-COPY";
2296   case RegExcess:      return "REG-EXCESS";
2297   case RegCritical:    return "REG-CRIT  ";
2298   case Stall:          return "STALL     ";
2299   case Cluster:        return "CLUSTER   ";
2300   case Weak:           return "WEAK      ";
2301   case RegMax:         return "REG-MAX   ";
2302   case ResourceReduce: return "RES-REDUCE";
2303   case ResourceDemand: return "RES-DEMAND";
2304   case TopDepthReduce: return "TOP-DEPTH ";
2305   case TopPathReduce:  return "TOP-PATH  ";
2306   case BotHeightReduce:return "BOT-HEIGHT";
2307   case BotPathReduce:  return "BOT-PATH  ";
2308   case NextDefUse:     return "DEF-USE   ";
2309   case NodeOrder:      return "ORDER     ";
2310   };
2311   llvm_unreachable("Unknown reason!");
2312 }
2313
2314 void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
2315   PressureChange P;
2316   unsigned ResIdx = 0;
2317   unsigned Latency = 0;
2318   switch (Cand.Reason) {
2319   default:
2320     break;
2321   case RegExcess:
2322     P = Cand.RPDelta.Excess;
2323     break;
2324   case RegCritical:
2325     P = Cand.RPDelta.CriticalMax;
2326     break;
2327   case RegMax:
2328     P = Cand.RPDelta.CurrentMax;
2329     break;
2330   case ResourceReduce:
2331     ResIdx = Cand.Policy.ReduceResIdx;
2332     break;
2333   case ResourceDemand:
2334     ResIdx = Cand.Policy.DemandResIdx;
2335     break;
2336   case TopDepthReduce:
2337     Latency = Cand.SU->getDepth();
2338     break;
2339   case TopPathReduce:
2340     Latency = Cand.SU->getHeight();
2341     break;
2342   case BotHeightReduce:
2343     Latency = Cand.SU->getHeight();
2344     break;
2345   case BotPathReduce:
2346     Latency = Cand.SU->getDepth();
2347     break;
2348   }
2349   dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2350   if (P.isValid())
2351     dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2352            << ":" << P.getUnitInc() << " ";
2353   else
2354     dbgs() << "      ";
2355   if (ResIdx)
2356     dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2357   else
2358     dbgs() << "         ";
2359   if (Latency)
2360     dbgs() << " " << Latency << " cycles ";
2361   else
2362     dbgs() << "          ";
2363   dbgs() << '\n';
2364 }
2365 #endif
2366
2367 /// Return true if this heuristic determines order.
2368 static bool tryLess(int TryVal, int CandVal,
2369                     GenericSchedulerBase::SchedCandidate &TryCand,
2370                     GenericSchedulerBase::SchedCandidate &Cand,
2371                     GenericSchedulerBase::CandReason Reason) {
2372   if (TryVal < CandVal) {
2373     TryCand.Reason = Reason;
2374     return true;
2375   }
2376   if (TryVal > CandVal) {
2377     if (Cand.Reason > Reason)
2378       Cand.Reason = Reason;
2379     return true;
2380   }
2381   Cand.setRepeat(Reason);
2382   return false;
2383 }
2384
2385 static bool tryGreater(int TryVal, int CandVal,
2386                        GenericSchedulerBase::SchedCandidate &TryCand,
2387                        GenericSchedulerBase::SchedCandidate &Cand,
2388                        GenericSchedulerBase::CandReason Reason) {
2389   if (TryVal > CandVal) {
2390     TryCand.Reason = Reason;
2391     return true;
2392   }
2393   if (TryVal < CandVal) {
2394     if (Cand.Reason > Reason)
2395       Cand.Reason = Reason;
2396     return true;
2397   }
2398   Cand.setRepeat(Reason);
2399   return false;
2400 }
2401
2402 static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
2403                        GenericSchedulerBase::SchedCandidate &Cand,
2404                        SchedBoundary &Zone) {
2405   if (Zone.isTop()) {
2406     if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2407       if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2408                   TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2409         return true;
2410     }
2411     if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2412                    TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2413       return true;
2414   }
2415   else {
2416     if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2417       if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2418                   TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2419         return true;
2420     }
2421     if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2422                    TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2423       return true;
2424   }
2425   return false;
2426 }
2427
2428 static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand,
2429                       bool IsTop) {
2430   DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2431         << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n');
2432 }
2433
2434 namespace {
2435 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance
2436 /// the schedule.
2437 class GenericScheduler : public GenericSchedulerBase {
2438   ScheduleDAGMILive *DAG;
2439
2440   // State of the top and bottom scheduled instruction boundaries.
2441   SchedBoundary Top;
2442   SchedBoundary Bot;
2443
2444   MachineSchedPolicy RegionPolicy;
2445 public:
2446   GenericScheduler(const MachineSchedContext *C):
2447     GenericSchedulerBase(C), DAG(0), Top(SchedBoundary::TopQID, "TopQ"),
2448     Bot(SchedBoundary::BotQID, "BotQ") {}
2449
2450   void initPolicy(MachineBasicBlock::iterator Begin,
2451                   MachineBasicBlock::iterator End,
2452                   unsigned NumRegionInstrs) override;
2453
2454   bool shouldTrackPressure() const override {
2455     return RegionPolicy.ShouldTrackPressure;
2456   }
2457
2458   void initialize(ScheduleDAGMI *dag) override;
2459
2460   SUnit *pickNode(bool &IsTopNode) override;
2461
2462   void schedNode(SUnit *SU, bool IsTopNode) override;
2463
2464   void releaseTopNode(SUnit *SU) override {
2465     Top.releaseTopNode(SU);
2466   }
2467
2468   void releaseBottomNode(SUnit *SU) override {
2469     Bot.releaseBottomNode(SU);
2470   }
2471
2472   void registerRoots() override;
2473
2474 protected:
2475   void checkAcyclicLatency();
2476
2477   void tryCandidate(SchedCandidate &Cand,
2478                     SchedCandidate &TryCand,
2479                     SchedBoundary &Zone,
2480                     const RegPressureTracker &RPTracker,
2481                     RegPressureTracker &TempTracker);
2482
2483   SUnit *pickNodeBidirectional(bool &IsTopNode);
2484
2485   void pickNodeFromQueue(SchedBoundary &Zone,
2486                          const RegPressureTracker &RPTracker,
2487                          SchedCandidate &Candidate);
2488
2489   void reschedulePhysRegCopies(SUnit *SU, bool isTop);
2490 };
2491 } // namespace
2492
2493 void GenericScheduler::initialize(ScheduleDAGMI *dag) {
2494   assert(dag->hasVRegLiveness() &&
2495          "(PreRA)GenericScheduler needs vreg liveness");
2496   DAG = static_cast<ScheduleDAGMILive*>(dag);
2497   SchedModel = DAG->getSchedModel();
2498   TRI = DAG->TRI;
2499
2500   Rem.init(DAG, SchedModel);
2501   Top.init(DAG, SchedModel, &Rem);
2502   Bot.init(DAG, SchedModel, &Rem);
2503
2504   // Initialize resource counts.
2505
2506   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2507   // are disabled, then these HazardRecs will be disabled.
2508   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2509   const TargetMachine &TM = DAG->MF.getTarget();
2510   if (!Top.HazardRec) {
2511     Top.HazardRec =
2512       TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
2513   }
2514   if (!Bot.HazardRec) {
2515     Bot.HazardRec =
2516       TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
2517   }
2518 }
2519
2520 /// Initialize the per-region scheduling policy.
2521 void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
2522                                   MachineBasicBlock::iterator End,
2523                                   unsigned NumRegionInstrs) {
2524   const TargetMachine &TM = Context->MF->getTarget();
2525   const TargetLowering *TLI = TM.getTargetLowering();
2526
2527   // Avoid setting up the register pressure tracker for small regions to save
2528   // compile time. As a rough heuristic, only track pressure when the number of
2529   // schedulable instructions exceeds half the integer register file.
2530   RegionPolicy.ShouldTrackPressure = true;
2531   for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2532     MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2533     if (TLI->isTypeLegal(LegalIntVT)) {
2534       unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2535         TLI->getRegClassFor(LegalIntVT));
2536       RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2537     }
2538   }
2539
2540   // For generic targets, we default to bottom-up, because it's simpler and more
2541   // compile-time optimizations have been implemented in that direction.
2542   RegionPolicy.OnlyBottomUp = true;
2543
2544   // Allow the subtarget to override default policy.
2545   const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
2546   ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs);
2547
2548   // After subtarget overrides, apply command line options.
2549   if (!EnableRegPressure)
2550     RegionPolicy.ShouldTrackPressure = false;
2551
2552   // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2553   // e.g. -misched-bottomup=false allows scheduling in both directions.
2554   assert((!ForceTopDown || !ForceBottomUp) &&
2555          "-misched-topdown incompatible with -misched-bottomup");
2556   if (ForceBottomUp.getNumOccurrences() > 0) {
2557     RegionPolicy.OnlyBottomUp = ForceBottomUp;
2558     if (RegionPolicy.OnlyBottomUp)
2559       RegionPolicy.OnlyTopDown = false;
2560   }
2561   if (ForceTopDown.getNumOccurrences() > 0) {
2562     RegionPolicy.OnlyTopDown = ForceTopDown;
2563     if (RegionPolicy.OnlyTopDown)
2564       RegionPolicy.OnlyBottomUp = false;
2565   }
2566 }
2567
2568 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2569 /// critical path by more cycles than it takes to drain the instruction buffer.
2570 /// We estimate an upper bounds on in-flight instructions as:
2571 ///
2572 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2573 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2574 /// InFlightResources = InFlightIterations * LoopResources
2575 ///
2576 /// TODO: Check execution resources in addition to IssueCount.
2577 void GenericScheduler::checkAcyclicLatency() {
2578   if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2579     return;
2580
2581   // Scaled number of cycles per loop iteration.
2582   unsigned IterCount =
2583     std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2584              Rem.RemIssueCount);
2585   // Scaled acyclic critical path.
2586   unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2587   // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2588   unsigned InFlightCount =
2589     (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2590   unsigned BufferLimit =
2591     SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2592
2593   Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2594
2595   DEBUG(dbgs() << "IssueCycles="
2596         << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2597         << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2598         << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
2599         << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2600         << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2601         if (Rem.IsAcyclicLatencyLimited)
2602           dbgs() << "  ACYCLIC LATENCY LIMIT\n");
2603 }
2604
2605 void GenericScheduler::registerRoots() {
2606   Rem.CriticalPath = DAG->ExitSU.getDepth();
2607
2608   // Some roots may not feed into ExitSU. Check all of them in case.
2609   for (std::vector<SUnit*>::const_iterator
2610          I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
2611     if ((*I)->getDepth() > Rem.CriticalPath)
2612       Rem.CriticalPath = (*I)->getDepth();
2613   }
2614   DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
2615
2616   if (EnableCyclicPath) {
2617     Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2618     checkAcyclicLatency();
2619   }
2620 }
2621
2622 static bool tryPressure(const PressureChange &TryP,
2623                         const PressureChange &CandP,
2624                         GenericSchedulerBase::SchedCandidate &TryCand,
2625                         GenericSchedulerBase::SchedCandidate &Cand,
2626                         GenericSchedulerBase::CandReason Reason) {
2627   int TryRank = TryP.getPSetOrMax();
2628   int CandRank = CandP.getPSetOrMax();
2629   // If both candidates affect the same set, go with the smallest increase.
2630   if (TryRank == CandRank) {
2631     return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2632                    Reason);
2633   }
2634   // If one candidate decreases and the other increases, go with it.
2635   // Invalid candidates have UnitInc==0.
2636   if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2637               Reason)) {
2638     return true;
2639   }
2640   // If the candidates are decreasing pressure, reverse priority.
2641   if (TryP.getUnitInc() < 0)
2642     std::swap(TryRank, CandRank);
2643   return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2644 }
2645
2646 static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2647   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2648 }
2649
2650 /// Minimize physical register live ranges. Regalloc wants them adjacent to
2651 /// their physreg def/use.
2652 ///
2653 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2654 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2655 /// with the operation that produces or consumes the physreg. We'll do this when
2656 /// regalloc has support for parallel copies.
2657 static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
2658   const MachineInstr *MI = SU->getInstr();
2659   if (!MI->isCopy())
2660     return 0;
2661
2662   unsigned ScheduledOper = isTop ? 1 : 0;
2663   unsigned UnscheduledOper = isTop ? 0 : 1;
2664   // If we have already scheduled the physreg produce/consumer, immediately
2665   // schedule the copy.
2666   if (TargetRegisterInfo::isPhysicalRegister(
2667         MI->getOperand(ScheduledOper).getReg()))
2668     return 1;
2669   // If the physreg is at the boundary, defer it. Otherwise schedule it
2670   // immediately to free the dependent. We can hoist the copy later.
2671   bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2672   if (TargetRegisterInfo::isPhysicalRegister(
2673         MI->getOperand(UnscheduledOper).getReg()))
2674     return AtBoundary ? -1 : 1;
2675   return 0;
2676 }
2677
2678 /// Apply a set of heursitics to a new candidate. Heuristics are currently
2679 /// hierarchical. This may be more efficient than a graduated cost model because
2680 /// we don't need to evaluate all aspects of the model for each node in the
2681 /// queue. But it's really done to make the heuristics easier to debug and
2682 /// statistically analyze.
2683 ///
2684 /// \param Cand provides the policy and current best candidate.
2685 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2686 /// \param Zone describes the scheduled zone that we are extending.
2687 /// \param RPTracker describes reg pressure within the scheduled zone.
2688 /// \param TempTracker is a scratch pressure tracker to reuse in queries.
2689 void GenericScheduler::tryCandidate(SchedCandidate &Cand,
2690                                     SchedCandidate &TryCand,
2691                                     SchedBoundary &Zone,
2692                                     const RegPressureTracker &RPTracker,
2693                                     RegPressureTracker &TempTracker) {
2694
2695   if (DAG->isTrackingPressure()) {
2696     // Always initialize TryCand's RPDelta.
2697     if (Zone.isTop()) {
2698       TempTracker.getMaxDownwardPressureDelta(
2699         TryCand.SU->getInstr(),
2700         TryCand.RPDelta,
2701         DAG->getRegionCriticalPSets(),
2702         DAG->getRegPressure().MaxSetPressure);
2703     }
2704     else {
2705       if (VerifyScheduling) {
2706         TempTracker.getMaxUpwardPressureDelta(
2707           TryCand.SU->getInstr(),
2708           &DAG->getPressureDiff(TryCand.SU),
2709           TryCand.RPDelta,
2710           DAG->getRegionCriticalPSets(),
2711           DAG->getRegPressure().MaxSetPressure);
2712       }
2713       else {
2714         RPTracker.getUpwardPressureDelta(
2715           TryCand.SU->getInstr(),
2716           DAG->getPressureDiff(TryCand.SU),
2717           TryCand.RPDelta,
2718           DAG->getRegionCriticalPSets(),
2719           DAG->getRegPressure().MaxSetPressure);
2720       }
2721     }
2722   }
2723   DEBUG(if (TryCand.RPDelta.Excess.isValid())
2724           dbgs() << "  SU(" << TryCand.SU->NodeNum << ") "
2725                  << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet())
2726                  << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n");
2727
2728   // Initialize the candidate if needed.
2729   if (!Cand.isValid()) {
2730     TryCand.Reason = NodeOrder;
2731     return;
2732   }
2733
2734   if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
2735                  biasPhysRegCopy(Cand.SU, Zone.isTop()),
2736                  TryCand, Cand, PhysRegCopy))
2737     return;
2738
2739   // Avoid exceeding the target's limit. If signed PSetID is negative, it is
2740   // invalid; convert it to INT_MAX to give it lowest priority.
2741   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
2742                                                Cand.RPDelta.Excess,
2743                                                TryCand, Cand, RegExcess))
2744     return;
2745
2746   // Avoid increasing the max critical pressure in the scheduled region.
2747   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
2748                                                Cand.RPDelta.CriticalMax,
2749                                                TryCand, Cand, RegCritical))
2750     return;
2751
2752   // For loops that are acyclic path limited, aggressively schedule for latency.
2753   // This can result in very long dependence chains scheduled in sequence, so
2754   // once every cycle (when CurrMOps == 0), switch to normal heuristics.
2755   if (Rem.IsAcyclicLatencyLimited && !Zone.getCurrMOps()
2756       && tryLatency(TryCand, Cand, Zone))
2757     return;
2758
2759   // Prioritize instructions that read unbuffered resources by stall cycles.
2760   if (tryLess(Zone.getLatencyStallCycles(TryCand.SU),
2761               Zone.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
2762     return;
2763
2764   // Keep clustered nodes together to encourage downstream peephole
2765   // optimizations which may reduce resource requirements.
2766   //
2767   // This is a best effort to set things up for a post-RA pass. Optimizations
2768   // like generating loads of multiple registers should ideally be done within
2769   // the scheduler pass by combining the loads during DAG postprocessing.
2770   const SUnit *NextClusterSU =
2771     Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2772   if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
2773                  TryCand, Cand, Cluster))
2774     return;
2775
2776   // Weak edges are for clustering and other constraints.
2777   if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
2778               getWeakLeft(Cand.SU, Zone.isTop()),
2779               TryCand, Cand, Weak)) {
2780     return;
2781   }
2782   // Avoid increasing the max pressure of the entire region.
2783   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
2784                                                Cand.RPDelta.CurrentMax,
2785                                                TryCand, Cand, RegMax))
2786     return;
2787
2788   // Avoid critical resource consumption and balance the schedule.
2789   TryCand.initResourceDelta(DAG, SchedModel);
2790   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
2791               TryCand, Cand, ResourceReduce))
2792     return;
2793   if (tryGreater(TryCand.ResDelta.DemandedResources,
2794                  Cand.ResDelta.DemandedResources,
2795                  TryCand, Cand, ResourceDemand))
2796     return;
2797
2798   // Avoid serializing long latency dependence chains.
2799   // For acyclic path limited loops, latency was already checked above.
2800   if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited
2801       && tryLatency(TryCand, Cand, Zone)) {
2802     return;
2803   }
2804
2805   // Prefer immediate defs/users of the last scheduled instruction. This is a
2806   // local pressure avoidance strategy that also makes the machine code
2807   // readable.
2808   if (tryGreater(Zone.isNextSU(TryCand.SU), Zone.isNextSU(Cand.SU),
2809                  TryCand, Cand, NextDefUse))
2810     return;
2811
2812   // Fall through to original instruction order.
2813   if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
2814       || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
2815     TryCand.Reason = NodeOrder;
2816   }
2817 }
2818
2819 /// Pick the best candidate from the queue.
2820 ///
2821 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
2822 /// DAG building. To adjust for the current scheduling location we need to
2823 /// maintain the number of vreg uses remaining to be top-scheduled.
2824 void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
2825                                          const RegPressureTracker &RPTracker,
2826                                          SchedCandidate &Cand) {
2827   ReadyQueue &Q = Zone.Available;
2828
2829   DEBUG(Q.dump());
2830
2831   // getMaxPressureDelta temporarily modifies the tracker.
2832   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
2833
2834   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
2835
2836     SchedCandidate TryCand(Cand.Policy);
2837     TryCand.SU = *I;
2838     tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
2839     if (TryCand.Reason != NoCand) {
2840       // Initialize resource delta if needed in case future heuristics query it.
2841       if (TryCand.ResDelta == SchedResourceDelta())
2842         TryCand.initResourceDelta(DAG, SchedModel);
2843       Cand.setBest(TryCand);
2844       DEBUG(traceCandidate(Cand));
2845     }
2846   }
2847 }
2848
2849 /// Pick the best candidate node from either the top or bottom queue.
2850 SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
2851   // Schedule as far as possible in the direction of no choice. This is most
2852   // efficient, but also provides the best heuristics for CriticalPSets.
2853   if (SUnit *SU = Bot.pickOnlyChoice()) {
2854     IsTopNode = false;
2855     DEBUG(dbgs() << "Pick Bot NOCAND\n");
2856     return SU;
2857   }
2858   if (SUnit *SU = Top.pickOnlyChoice()) {
2859     IsTopNode = true;
2860     DEBUG(dbgs() << "Pick Top NOCAND\n");
2861     return SU;
2862   }
2863   CandPolicy NoPolicy;
2864   SchedCandidate BotCand(NoPolicy);
2865   SchedCandidate TopCand(NoPolicy);
2866   // Set the bottom-up policy based on the state of the current bottom zone and
2867   // the instructions outside the zone, including the top zone.
2868   setPolicy(BotCand.Policy, /*IsPostRA=*/false, Bot, &Top);
2869   // Set the top-down policy based on the state of the current top zone and
2870   // the instructions outside the zone, including the bottom zone.
2871   setPolicy(TopCand.Policy, /*IsPostRA=*/false, Top, &Bot);
2872
2873   // Prefer bottom scheduling when heuristics are silent.
2874   pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2875   assert(BotCand.Reason != NoCand && "failed to find the first candidate");
2876
2877   // If either Q has a single candidate that provides the least increase in
2878   // Excess pressure, we can immediately schedule from that Q.
2879   //
2880   // RegionCriticalPSets summarizes the pressure within the scheduled region and
2881   // affects picking from either Q. If scheduling in one direction must
2882   // increase pressure for one of the excess PSets, then schedule in that
2883   // direction first to provide more freedom in the other direction.
2884   if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess))
2885       || (BotCand.Reason == RegCritical
2886           && !BotCand.isRepeat(RegCritical)))
2887   {
2888     IsTopNode = false;
2889     tracePick(BotCand, IsTopNode);
2890     return BotCand.SU;
2891   }
2892   // Check if the top Q has a better candidate.
2893   pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
2894   assert(TopCand.Reason != NoCand && "failed to find the first candidate");
2895
2896   // Choose the queue with the most important (lowest enum) reason.
2897   if (TopCand.Reason < BotCand.Reason) {
2898     IsTopNode = true;
2899     tracePick(TopCand, IsTopNode);
2900     return TopCand.SU;
2901   }
2902   // Otherwise prefer the bottom candidate, in node order if all else failed.
2903   IsTopNode = false;
2904   tracePick(BotCand, IsTopNode);
2905   return BotCand.SU;
2906 }
2907
2908 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
2909 SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
2910   if (DAG->top() == DAG->bottom()) {
2911     assert(Top.Available.empty() && Top.Pending.empty() &&
2912            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
2913     return NULL;
2914   }
2915   SUnit *SU;
2916   do {
2917     if (RegionPolicy.OnlyTopDown) {
2918       SU = Top.pickOnlyChoice();
2919       if (!SU) {
2920         CandPolicy NoPolicy;
2921         SchedCandidate TopCand(NoPolicy);
2922         pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
2923         assert(TopCand.Reason != NoCand && "failed to find a candidate");
2924         tracePick(TopCand, true);
2925         SU = TopCand.SU;
2926       }
2927       IsTopNode = true;
2928     }
2929     else if (RegionPolicy.OnlyBottomUp) {
2930       SU = Bot.pickOnlyChoice();
2931       if (!SU) {
2932         CandPolicy NoPolicy;
2933         SchedCandidate BotCand(NoPolicy);
2934         pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2935         assert(BotCand.Reason != NoCand && "failed to find a candidate");
2936         tracePick(BotCand, false);
2937         SU = BotCand.SU;
2938       }
2939       IsTopNode = false;
2940     }
2941     else {
2942       SU = pickNodeBidirectional(IsTopNode);
2943     }
2944   } while (SU->isScheduled);
2945
2946   if (SU->isTopReady())
2947     Top.removeReady(SU);
2948   if (SU->isBottomReady())
2949     Bot.removeReady(SU);
2950
2951   DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
2952   return SU;
2953 }
2954
2955 void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
2956
2957   MachineBasicBlock::iterator InsertPos = SU->getInstr();
2958   if (!isTop)
2959     ++InsertPos;
2960   SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
2961
2962   // Find already scheduled copies with a single physreg dependence and move
2963   // them just above the scheduled instruction.
2964   for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
2965        I != E; ++I) {
2966     if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
2967       continue;
2968     SUnit *DepSU = I->getSUnit();
2969     if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
2970       continue;
2971     MachineInstr *Copy = DepSU->getInstr();
2972     if (!Copy->isCopy())
2973       continue;
2974     DEBUG(dbgs() << "  Rescheduling physreg copy ";
2975           I->getSUnit()->dump(DAG));
2976     DAG->moveInstruction(Copy, InsertPos);
2977   }
2978 }
2979
2980 /// Update the scheduler's state after scheduling a node. This is the same node
2981 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
2982 /// update it's state based on the current cycle before MachineSchedStrategy
2983 /// does.
2984 ///
2985 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
2986 /// them here. See comments in biasPhysRegCopy.
2987 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
2988   if (IsTopNode) {
2989     SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
2990     Top.bumpNode(SU);
2991     if (SU->hasPhysRegUses)
2992       reschedulePhysRegCopies(SU, true);
2993   }
2994   else {
2995     SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
2996     Bot.bumpNode(SU);
2997     if (SU->hasPhysRegDefs)
2998       reschedulePhysRegCopies(SU, false);
2999   }
3000 }
3001
3002 /// Create the standard converging machine scheduler. This will be used as the
3003 /// default scheduler if the target does not set a default.
3004 static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
3005   ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, new GenericScheduler(C));
3006   // Register DAG post-processors.
3007   //
3008   // FIXME: extend the mutation API to allow earlier mutations to instantiate
3009   // data and pass it to later mutations. Have a single mutation that gathers
3010   // the interesting nodes in one pass.
3011   DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI));
3012   if (EnableLoadCluster && DAG->TII->enableClusterLoads())
3013     DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
3014   if (EnableMacroFusion)
3015     DAG->addMutation(new MacroFusion(DAG->TII));
3016   return DAG;
3017 }
3018
3019 static MachineSchedRegistry
3020 GenericSchedRegistry("converge", "Standard converging scheduler.",
3021                      createGenericSchedLive);
3022
3023 //===----------------------------------------------------------------------===//
3024 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3025 //===----------------------------------------------------------------------===//
3026
3027 namespace {
3028 /// PostGenericScheduler - Interface to the scheduling algorithm used by
3029 /// ScheduleDAGMI.
3030 ///
3031 /// Callbacks from ScheduleDAGMI:
3032 ///   initPolicy -> initialize(DAG) -> registerRoots -> pickNode ...
3033 class PostGenericScheduler : public GenericSchedulerBase {
3034   ScheduleDAGMI *DAG;
3035   SchedBoundary Top;
3036   SmallVector<SUnit*, 8> BotRoots;
3037 public:
3038   PostGenericScheduler(const MachineSchedContext *C):
3039     GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {}
3040
3041   virtual ~PostGenericScheduler() {}
3042
3043   void initPolicy(MachineBasicBlock::iterator Begin,
3044                   MachineBasicBlock::iterator End,
3045                   unsigned NumRegionInstrs) override {
3046     /* no configurable policy */
3047   };
3048
3049   /// PostRA scheduling does not track pressure.
3050   bool shouldTrackPressure() const override { return false; }
3051
3052   void initialize(ScheduleDAGMI *Dag) override {
3053     DAG = Dag;
3054     SchedModel = DAG->getSchedModel();
3055     TRI = DAG->TRI;
3056
3057     Rem.init(DAG, SchedModel);
3058     Top.init(DAG, SchedModel, &Rem);
3059     BotRoots.clear();
3060
3061     // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3062     // or are disabled, then these HazardRecs will be disabled.
3063     const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3064     const TargetMachine &TM = DAG->MF.getTarget();
3065     if (!Top.HazardRec) {
3066       Top.HazardRec =
3067         TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
3068     }
3069   }
3070
3071   void registerRoots() override;
3072
3073   SUnit *pickNode(bool &IsTopNode) override;
3074
3075   void scheduleTree(unsigned SubtreeID) override {
3076     llvm_unreachable("PostRA scheduler does not support subtree analysis.");
3077   }
3078
3079   void schedNode(SUnit *SU, bool IsTopNode) override;
3080
3081   void releaseTopNode(SUnit *SU) override {
3082     Top.releaseTopNode(SU);
3083   }
3084
3085   // Only called for roots.
3086   void releaseBottomNode(SUnit *SU) override {
3087     BotRoots.push_back(SU);
3088   }
3089
3090 protected:
3091   void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand);
3092
3093   void pickNodeFromQueue(SchedCandidate &Cand);
3094 };
3095 } // namespace
3096
3097 void PostGenericScheduler::registerRoots() {
3098   Rem.CriticalPath = DAG->ExitSU.getDepth();
3099
3100   // Some roots may not feed into ExitSU. Check all of them in case.
3101   for (SmallVectorImpl<SUnit*>::const_iterator
3102          I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) {
3103     if ((*I)->getDepth() > Rem.CriticalPath)
3104       Rem.CriticalPath = (*I)->getDepth();
3105   }
3106   DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
3107 }
3108
3109 /// Apply a set of heursitics to a new candidate for PostRA scheduling.
3110 ///
3111 /// \param Cand provides the policy and current best candidate.
3112 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3113 void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
3114                                         SchedCandidate &TryCand) {
3115
3116   // Initialize the candidate if needed.
3117   if (!Cand.isValid()) {
3118     TryCand.Reason = NodeOrder;
3119     return;
3120   }
3121
3122   // Prioritize instructions that read unbuffered resources by stall cycles.
3123   if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3124               Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3125     return;
3126
3127   // Avoid critical resource consumption and balance the schedule.
3128   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3129               TryCand, Cand, ResourceReduce))
3130     return;
3131   if (tryGreater(TryCand.ResDelta.DemandedResources,
3132                  Cand.ResDelta.DemandedResources,
3133                  TryCand, Cand, ResourceDemand))
3134     return;
3135
3136   // Avoid serializing long latency dependence chains.
3137   if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3138     return;
3139   }
3140
3141   // Fall through to original instruction order.
3142   if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3143     TryCand.Reason = NodeOrder;
3144 }
3145
3146 void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
3147   ReadyQueue &Q = Top.Available;
3148
3149   DEBUG(Q.dump());
3150
3151   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
3152     SchedCandidate TryCand(Cand.Policy);
3153     TryCand.SU = *I;
3154     TryCand.initResourceDelta(DAG, SchedModel);
3155     tryCandidate(Cand, TryCand);
3156     if (TryCand.Reason != NoCand) {
3157       Cand.setBest(TryCand);
3158       DEBUG(traceCandidate(Cand));
3159     }
3160   }
3161 }
3162
3163 /// Pick the next node to schedule.
3164 SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
3165   if (DAG->top() == DAG->bottom()) {
3166     assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3167     return NULL;
3168   }
3169   SUnit *SU;
3170   do {
3171     SU = Top.pickOnlyChoice();
3172     if (!SU) {
3173       CandPolicy NoPolicy;
3174       SchedCandidate TopCand(NoPolicy);
3175       // Set the top-down policy based on the state of the current top zone and
3176       // the instructions outside the zone, including the bottom zone.
3177       setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, NULL);
3178       pickNodeFromQueue(TopCand);
3179       assert(TopCand.Reason != NoCand && "failed to find a candidate");
3180       tracePick(TopCand, true);
3181       SU = TopCand.SU;
3182     }
3183   } while (SU->isScheduled);
3184
3185   IsTopNode = true;
3186   Top.removeReady(SU);
3187
3188   DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
3189   return SU;
3190 }
3191
3192 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3193 /// scheduled/remaining flags in the DAG nodes.
3194 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3195   SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3196   Top.bumpNode(SU);
3197 }
3198
3199 /// Create a generic scheduler with no vreg liveness or DAG mutation passes.
3200 static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) {
3201   return new ScheduleDAGMI(C, new PostGenericScheduler(C), /*IsPostRA=*/true);
3202 }
3203
3204 //===----------------------------------------------------------------------===//
3205 // ILP Scheduler. Currently for experimental analysis of heuristics.
3206 //===----------------------------------------------------------------------===//
3207
3208 namespace {
3209 /// \brief Order nodes by the ILP metric.
3210 struct ILPOrder {
3211   const SchedDFSResult *DFSResult;
3212   const BitVector *ScheduledTrees;
3213   bool MaximizeILP;
3214
3215   ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {}
3216
3217   /// \brief Apply a less-than relation on node priority.
3218   ///
3219   /// (Return true if A comes after B in the Q.)
3220   bool operator()(const SUnit *A, const SUnit *B) const {
3221     unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3222     unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3223     if (SchedTreeA != SchedTreeB) {
3224       // Unscheduled trees have lower priority.
3225       if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3226         return ScheduledTrees->test(SchedTreeB);
3227
3228       // Trees with shallower connections have have lower priority.
3229       if (DFSResult->getSubtreeLevel(SchedTreeA)
3230           != DFSResult->getSubtreeLevel(SchedTreeB)) {
3231         return DFSResult->getSubtreeLevel(SchedTreeA)
3232           < DFSResult->getSubtreeLevel(SchedTreeB);
3233       }
3234     }
3235     if (MaximizeILP)
3236       return DFSResult->getILP(A) < DFSResult->getILP(B);
3237     else
3238       return DFSResult->getILP(A) > DFSResult->getILP(B);
3239   }
3240 };
3241
3242 /// \brief Schedule based on the ILP metric.
3243 class ILPScheduler : public MachineSchedStrategy {
3244   ScheduleDAGMILive *DAG;
3245   ILPOrder Cmp;
3246
3247   std::vector<SUnit*> ReadyQ;
3248 public:
3249   ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {}
3250
3251   void initialize(ScheduleDAGMI *dag) override {
3252     assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3253     DAG = static_cast<ScheduleDAGMILive*>(dag);
3254     DAG->computeDFSResult();
3255     Cmp.DFSResult = DAG->getDFSResult();
3256     Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3257     ReadyQ.clear();
3258   }
3259
3260   void registerRoots() override {
3261     // Restore the heap in ReadyQ with the updated DFS results.
3262     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3263   }
3264
3265   /// Implement MachineSchedStrategy interface.
3266   /// -----------------------------------------
3267
3268   /// Callback to select the highest priority node from the ready Q.
3269   SUnit *pickNode(bool &IsTopNode) override {
3270     if (ReadyQ.empty()) return NULL;
3271     std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3272     SUnit *SU = ReadyQ.back();
3273     ReadyQ.pop_back();
3274     IsTopNode = false;
3275     DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
3276           << " ILP: " << DAG->getDFSResult()->getILP(SU)
3277           << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
3278           << DAG->getDFSResult()->getSubtreeLevel(
3279             DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
3280           << "Scheduling " << *SU->getInstr());
3281     return SU;
3282   }
3283
3284   /// \brief Scheduler callback to notify that a new subtree is scheduled.
3285   void scheduleTree(unsigned SubtreeID) override {
3286     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3287   }
3288
3289   /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3290   /// DFSResults, and resort the priority Q.
3291   void schedNode(SUnit *SU, bool IsTopNode) override {
3292     assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3293   }
3294
3295   void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3296
3297   void releaseBottomNode(SUnit *SU) override {
3298     ReadyQ.push_back(SU);
3299     std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3300   }
3301 };
3302 } // namespace
3303
3304 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
3305   return new ScheduleDAGMILive(C, new ILPScheduler(true));
3306 }
3307 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
3308   return new ScheduleDAGMILive(C, new ILPScheduler(false));
3309 }
3310 static MachineSchedRegistry ILPMaxRegistry(
3311   "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3312 static MachineSchedRegistry ILPMinRegistry(
3313   "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3314
3315 //===----------------------------------------------------------------------===//
3316 // Machine Instruction Shuffler for Correctness Testing
3317 //===----------------------------------------------------------------------===//
3318
3319 #ifndef NDEBUG
3320 namespace {
3321 /// Apply a less-than relation on the node order, which corresponds to the
3322 /// instruction order prior to scheduling. IsReverse implements greater-than.
3323 template<bool IsReverse>
3324 struct SUnitOrder {
3325   bool operator()(SUnit *A, SUnit *B) const {
3326     if (IsReverse)
3327       return A->NodeNum > B->NodeNum;
3328     else
3329       return A->NodeNum < B->NodeNum;
3330   }
3331 };
3332
3333 /// Reorder instructions as much as possible.
3334 class InstructionShuffler : public MachineSchedStrategy {
3335   bool IsAlternating;
3336   bool IsTopDown;
3337
3338   // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3339   // gives nodes with a higher number higher priority causing the latest
3340   // instructions to be scheduled first.
3341   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
3342     TopQ;
3343   // When scheduling bottom-up, use greater-than as the queue priority.
3344   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
3345     BottomQ;
3346 public:
3347   InstructionShuffler(bool alternate, bool topdown)
3348     : IsAlternating(alternate), IsTopDown(topdown) {}
3349
3350   virtual void initialize(ScheduleDAGMI*) {
3351     TopQ.clear();
3352     BottomQ.clear();
3353   }
3354
3355   /// Implement MachineSchedStrategy interface.
3356   /// -----------------------------------------
3357
3358   virtual SUnit *pickNode(bool &IsTopNode) {
3359     SUnit *SU;
3360     if (IsTopDown) {
3361       do {
3362         if (TopQ.empty()) return NULL;
3363         SU = TopQ.top();
3364         TopQ.pop();
3365       } while (SU->isScheduled);
3366       IsTopNode = true;
3367     }
3368     else {
3369       do {
3370         if (BottomQ.empty()) return NULL;
3371         SU = BottomQ.top();
3372         BottomQ.pop();
3373       } while (SU->isScheduled);
3374       IsTopNode = false;
3375     }
3376     if (IsAlternating)
3377       IsTopDown = !IsTopDown;
3378     return SU;
3379   }
3380
3381   virtual void schedNode(SUnit *SU, bool IsTopNode) {}
3382
3383   virtual void releaseTopNode(SUnit *SU) {
3384     TopQ.push(SU);
3385   }
3386   virtual void releaseBottomNode(SUnit *SU) {
3387     BottomQ.push(SU);
3388   }
3389 };
3390 } // namespace
3391
3392 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
3393   bool Alternate = !ForceTopDown && !ForceBottomUp;
3394   bool TopDown = !ForceBottomUp;
3395   assert((TopDown || !ForceTopDown) &&
3396          "-misched-topdown incompatible with -misched-bottomup");
3397   return new ScheduleDAGMILive(C, new InstructionShuffler(Alternate, TopDown));
3398 }
3399 static MachineSchedRegistry ShufflerRegistry(
3400   "shuffle", "Shuffle machine instructions alternating directions",
3401   createInstructionShuffler);
3402 #endif // !NDEBUG
3403
3404 //===----------------------------------------------------------------------===//
3405 // GraphWriter support for ScheduleDAGMILive.
3406 //===----------------------------------------------------------------------===//
3407
3408 #ifndef NDEBUG
3409 namespace llvm {
3410
3411 template<> struct GraphTraits<
3412   ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
3413
3414 template<>
3415 struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
3416
3417   DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
3418
3419   static std::string getGraphName(const ScheduleDAG *G) {
3420     return G->MF.getName();
3421   }
3422
3423   static bool renderGraphFromBottomUp() {
3424     return true;
3425   }
3426
3427   static bool isNodeHidden(const SUnit *Node) {
3428     return (Node->Preds.size() > 10 || Node->Succs.size() > 10);
3429   }
3430
3431   static bool hasNodeAddressLabel(const SUnit *Node,
3432                                   const ScheduleDAG *Graph) {
3433     return false;
3434   }
3435
3436   /// If you want to override the dot attributes printed for a particular
3437   /// edge, override this method.
3438   static std::string getEdgeAttributes(const SUnit *Node,
3439                                        SUnitIterator EI,
3440                                        const ScheduleDAG *Graph) {
3441     if (EI.isArtificialDep())
3442       return "color=cyan,style=dashed";
3443     if (EI.isCtrlDep())
3444       return "color=blue,style=dashed";
3445     return "";
3446   }
3447
3448   static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3449     std::string Str;
3450     raw_string_ostream SS(Str);
3451     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3452     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3453       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : 0;
3454     SS << "SU:" << SU->NodeNum;
3455     if (DFS)
3456       SS << " I:" << DFS->getNumInstrs(SU);
3457     return SS.str();
3458   }
3459   static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3460     return G->getGraphNodeLabel(SU);
3461   }
3462
3463   static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3464     std::string Str("shape=Mrecord");
3465     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3466     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3467       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : 0;
3468     if (DFS) {
3469       Str += ",style=filled,fillcolor=\"#";
3470       Str += DOT::getColorString(DFS->getSubtreeID(N));
3471       Str += '"';
3472     }
3473     return Str;
3474   }
3475 };
3476 } // namespace llvm
3477 #endif // NDEBUG
3478
3479 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3480 /// rendered using 'dot'.
3481 ///
3482 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3483 #ifndef NDEBUG
3484   ViewGraph(this, Name, false, Title);
3485 #else
3486   errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3487          << "systems with Graphviz or gv!\n";
3488 #endif  // NDEBUG
3489 }
3490
3491 /// Out-of-line implementation with no arguments is handy for gdb.
3492 void ScheduleDAGMI::viewGraph() {
3493   viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3494 }