lib/CodeGen/InstrSched/InstrScheduling.cpp

   1 // $Id$
   2 //***************************************************************************
   3 // File:
   4 //      InstrScheduling.cpp
   5 //
   6 // Purpose:
   7 //
   8 // History:
   9 //      7/23/01  -  Vikram Adve  -  Created
  10 //**************************************************************************/
  11
  12
  13 #include "llvm/CodeGen/InstrScheduling.h"
  14 #include "llvm/CodeGen/MachineInstr.h"
  15 #include "llvm/CodeGen/MachineCodeForInstruction.h"
  16 #include "llvm/CodeGen/MachineCodeForMethod.h"
  17 #include "llvm/Analysis/LiveVar/MethodLiveVarInfo.h" // FIXME: Remove when AnalysisUsage sets can be symbolic!
  18 #include "llvm/Target/TargetMachine.h"
  19 #include "llvm/BasicBlock.h"
  20 #include "llvm/Instruction.h"
  21 #include "SchedPriorities.h"
  22 #include <ext/hash_set>
  23 #include <algorithm>
  24 #include <iterator>
  25 #include <iostream>
  26 using std::cerr;
  27 using std::vector;
  28
  29 //************************* External Data Types *****************************/
  30
  31 cl::Enum<enum SchedDebugLevel_t> SchedDebugLevel("dsched", cl::NoFlags,
  32   "enable instruction scheduling debugging information",
  33   clEnumValN(Sched_NoDebugInfo,      "n", "disable debug output"),
  34   clEnumValN(Sched_Disable,        "off", "disable instruction scheduling"),
  35   clEnumValN(Sched_PrintMachineCode, "y", "print machine code after scheduling"),
  36   clEnumValN(Sched_PrintSchedTrace,  "t", "print trace of scheduling actions"),
  37   clEnumValN(Sched_PrintSchedGraphs, "g", "print scheduling graphs"), 0);
  38
  39
  40 //************************* Internal Data Types *****************************/
  41
  42 class InstrSchedule;
  43 class SchedulingManager;
  44
  45
  46 //----------------------------------------------------------------------
  47 // class InstrGroup:
  48 //
  49 // Represents a group of instructions scheduled to be issued
  50 // in a single cycle.
  51 //----------------------------------------------------------------------
  52
  53 class InstrGroup: public NonCopyable {
  54 public:
  55   inline const SchedGraphNode* operator[](unsigned int slotNum) const {
  56     assert(slotNum  < group.size());
  57     return group[slotNum];
  58   }
  59
  60 private:
  61   friend class InstrSchedule;
  62
  63   inline void   addInstr(const SchedGraphNode* node, unsigned int slotNum) {
  64     assert(slotNum < group.size());
  65     group[slotNum] = node;
  66   }
  67
  68   /*ctor*/      InstrGroup(unsigned int nslots)
  69     : group(nslots, NULL) {}
  70
  71   /*ctor*/      InstrGroup();           // disable: DO NOT IMPLEMENT
  72
  73 private:
  74   vector<const SchedGraphNode*> group;
  75 };
  76
  77
  78 //----------------------------------------------------------------------
  79 // class ScheduleIterator:
  80 //
  81 // Iterates over the machine instructions in the for a single basic block.
  82 // The schedule is represented by an InstrSchedule object.
  83 //----------------------------------------------------------------------
  84
  85 template<class _NodeType>
  86 class ScheduleIterator: public std::forward_iterator<_NodeType, ptrdiff_t> {
  87 private:
  88   unsigned cycleNum;
  89   unsigned slotNum;
  90   const InstrSchedule& S;
  91 public:
  92   typedef ScheduleIterator<_NodeType> _Self;
  93
  94   /*ctor*/ inline ScheduleIterator(const InstrSchedule& _schedule,
  95                                    unsigned _cycleNum,
  96                                    unsigned _slotNum)
  97     : cycleNum(_cycleNum), slotNum(_slotNum), S(_schedule) {
  98     skipToNextInstr();
  99   }
 100
 101   /*ctor*/ inline ScheduleIterator(const _Self& x)
 102     : cycleNum(x.cycleNum), slotNum(x.slotNum), S(x.S) {}
 103
 104   inline bool operator==(const _Self& x) const {
 105     return (slotNum == x.slotNum && cycleNum== x.cycleNum && &S==&x.S);
 106   }
 107
 108   inline bool operator!=(const _Self& x) const { return !operator==(x); }
 109
 110   inline _NodeType* operator*() const {
 111     assert(cycleNum < S.groups.size());
 112     return (*S.groups[cycleNum])[slotNum];
 113   }
 114   inline _NodeType* operator->() const { return operator*(); }
 115
 116          _Self& operator++();                           // Preincrement
 117   inline _Self operator++(int) {                        // Postincrement
 118     _Self tmp(*this); ++*this; return tmp;
 119   }
 120
 121   static _Self begin(const InstrSchedule& _schedule);
 122   static _Self end(  const InstrSchedule& _schedule);
 123
 124 private:
 125   inline _Self& operator=(const _Self& x); // DISABLE -- DO NOT IMPLEMENT
 126   void  skipToNextInstr();
 127 };
 128
 129
 130 //----------------------------------------------------------------------
 131 // class InstrSchedule:
 132 //
 133 // Represents the schedule of machine instructions for a single basic block.
 134 //----------------------------------------------------------------------
 135
 136 class InstrSchedule: public NonCopyable {
 137 private:
 138   const unsigned int nslots;
 139   unsigned int numInstr;
 140   vector<InstrGroup*> groups;           // indexed by cycle number
 141   vector<cycles_t> startTime;           // indexed by node id
 142
 143 public: // iterators
 144   typedef ScheduleIterator<SchedGraphNode> iterator;
 145   typedef ScheduleIterator<const SchedGraphNode> const_iterator;
 146
 147         iterator begin();
 148   const_iterator begin() const;
 149         iterator end();
 150   const_iterator end()   const;
 151
 152 public: // constructors and destructor
 153   /*ctor*/              InstrSchedule   (unsigned int _nslots,
 154                                          unsigned int _numNodes);
 155   /*dtor*/              ~InstrSchedule  ();
 156
 157 public: // accessor functions to query chosen schedule
 158   const SchedGraphNode* getInstr        (unsigned int slotNum,
 159                                          cycles_t c) const {
 160     const InstrGroup* igroup = this->getIGroup(c);
 161     return (igroup == NULL)? NULL : (*igroup)[slotNum];
 162   }
 163
 164   inline InstrGroup*    getIGroup       (cycles_t c) {
 165     if ((unsigned)c >= groups.size())
 166       groups.resize(c+1);
 167     if (groups[c] == NULL)
 168       groups[c] = new InstrGroup(nslots);
 169     return groups[c];
 170   }
 171
 172   inline const InstrGroup* getIGroup    (cycles_t c) const {
 173     assert((unsigned)c < groups.size());
 174     return groups[c];
 175   }
 176
 177   inline cycles_t       getStartTime    (unsigned int nodeId) const {
 178     assert(nodeId < startTime.size());
 179     return startTime[nodeId];
 180   }
 181
 182   unsigned int          getNumInstructions() const {
 183     return numInstr;
 184   }
 185
 186   inline void           scheduleInstr   (const SchedGraphNode* node,
 187                                          unsigned int slotNum,
 188                                          cycles_t cycle) {
 189     InstrGroup* igroup = this->getIGroup(cycle);
 190     assert((*igroup)[slotNum] == NULL &&  "Slot already filled?");
 191     igroup->addInstr(node, slotNum);
 192     assert(node->getNodeId() < startTime.size());
 193     startTime[node->getNodeId()] = cycle;
 194     ++numInstr;
 195   }
 196
 197 private:
 198   friend class iterator;
 199   friend class const_iterator;
 200   /*ctor*/      InstrSchedule   ();     // Disable: DO NOT IMPLEMENT.
 201 };
 202
 203
 204 /*ctor*/
 205 InstrSchedule::InstrSchedule(unsigned int _nslots, unsigned int _numNodes)
 206   : nslots(_nslots),
 207     numInstr(0),
 208     groups(2 * _numNodes / _nslots),            // 2 x lower-bound for #cycles
 209     startTime(_numNodes, (cycles_t) -1)         // set all to -1
 210 {
 211 }
 212
 213
 214 /*dtor*/
 215 InstrSchedule::~InstrSchedule()
 216 {
 217   for (unsigned c=0, NC=groups.size(); c < NC; c++)
 218     if (groups[c] != NULL)
 219       delete groups[c];                 // delete InstrGroup objects
 220 }
 221
 222
 223 template<class _NodeType>
 224 inline
 225 void
 226 ScheduleIterator<_NodeType>::skipToNextInstr()
 227 {
 228   while(cycleNum < S.groups.size() && S.groups[cycleNum] == NULL)
 229     ++cycleNum;                 // skip cycles with no instructions
 230
 231   while (cycleNum < S.groups.size() &&
 232          (*S.groups[cycleNum])[slotNum] == NULL)
 233     {
 234       ++slotNum;
 235       if (slotNum == S.nslots)
 236         {
 237           ++cycleNum;
 238           slotNum = 0;
 239           while(cycleNum < S.groups.size() && S.groups[cycleNum] == NULL)
 240             ++cycleNum;                 // skip cycles with no instructions
 241         }
 242     }
 243 }
 244
 245 template<class _NodeType>
 246 inline
 247 ScheduleIterator<_NodeType>&
 248 ScheduleIterator<_NodeType>::operator++()       // Preincrement
 249 {
 250   ++slotNum;
 251   if (slotNum == S.nslots)
 252     {
 253       ++cycleNum;
 254       slotNum = 0;
 255     }
 256   skipToNextInstr();
 257   return *this;
 258 }
 259
 260 template<class _NodeType>
 261 ScheduleIterator<_NodeType>
 262 ScheduleIterator<_NodeType>::begin(const InstrSchedule& _schedule)
 263 {
 264   return _Self(_schedule, 0, 0);
 265 }
 266
 267 template<class _NodeType>
 268 ScheduleIterator<_NodeType>
 269 ScheduleIterator<_NodeType>::end(const InstrSchedule& _schedule)
 270 {
 271   return _Self(_schedule, _schedule.groups.size(), 0);
 272 }
 273
 274 InstrSchedule::iterator
 275 InstrSchedule::begin()
 276 {
 277   return iterator::begin(*this);
 278 }
 279
 280 InstrSchedule::const_iterator
 281 InstrSchedule::begin() const
 282 {
 283   return const_iterator::begin(*this);
 284 }
 285
 286 InstrSchedule::iterator
 287 InstrSchedule::end()
 288 {
 289   return iterator::end(*this);
 290 }
 291
 292 InstrSchedule::const_iterator
 293 InstrSchedule::end() const
 294 {
 295   return const_iterator::end(  *this);
 296 }
 297
 298
 299 //----------------------------------------------------------------------
 300 // class DelaySlotInfo:
 301 //
 302 // Record information about delay slots for a single branch instruction.
 303 // Delay slots are simply indexed by slot number 1 ... numDelaySlots
 304 //----------------------------------------------------------------------
 305
 306 class DelaySlotInfo: public NonCopyable {
 307 private:
 308   const SchedGraphNode* brNode;
 309   unsigned int ndelays;
 310   vector<const SchedGraphNode*> delayNodeVec;
 311   cycles_t delayedNodeCycle;
 312   unsigned int delayedNodeSlotNum;
 313
 314 public:
 315   /*ctor*/      DelaySlotInfo           (const SchedGraphNode* _brNode,
 316                                          unsigned _ndelays)
 317     : brNode(_brNode), ndelays(_ndelays),
 318       delayedNodeCycle(0), delayedNodeSlotNum(0) {}
 319
 320   inline unsigned getNumDelays  () {
 321     return ndelays;
 322   }
 323
 324   inline const vector<const SchedGraphNode*>& getDelayNodeVec() {
 325     return delayNodeVec;
 326   }
 327
 328   inline void   addDelayNode            (const SchedGraphNode* node) {
 329     delayNodeVec.push_back(node);
 330     assert(delayNodeVec.size() <= ndelays && "Too many delay slot instrs!");
 331   }
 332
 333   inline void   recordChosenSlot        (cycles_t cycle, unsigned slotNum) {
 334     delayedNodeCycle = cycle;
 335     delayedNodeSlotNum = slotNum;
 336   }
 337
 338   unsigned      scheduleDelayedNode     (SchedulingManager& S);
 339 };
 340
 341
 342 //----------------------------------------------------------------------
 343 // class SchedulingManager:
 344 //
 345 // Represents the schedule of machine instructions for a single basic block.
 346 //----------------------------------------------------------------------
 347
 348 class SchedulingManager: public NonCopyable {
 349 public: // publicly accessible data members
 350   const unsigned int nslots;
 351   const MachineSchedInfo& schedInfo;
 352   SchedPriorities& schedPrio;
 353   InstrSchedule isched;
 354
 355 private:
 356   unsigned int totalInstrCount;
 357   cycles_t curTime;
 358   cycles_t nextEarliestIssueTime;               // next cycle we can issue
 359   vector<std::hash_set<const SchedGraphNode*> > choicesForSlot; // indexed by slot#
 360   vector<const SchedGraphNode*> choiceVec;      // indexed by node ptr
 361   vector<int> numInClass;                       // indexed by sched class
 362   vector<cycles_t> nextEarliestStartTime;       // indexed by opCode
 363   std::hash_map<const SchedGraphNode*, DelaySlotInfo*> delaySlotInfoForBranches;
 364                                                 // indexed by branch node ptr
 365
 366 public:
 367   /*ctor*/      SchedulingManager       (const TargetMachine& _target,
 368                                          const SchedGraph* graph,
 369                                          SchedPriorities& schedPrio);
 370   /*dtor*/      ~SchedulingManager      () {}
 371
 372   //----------------------------------------------------------------------
 373   // Simplify access to the machine instruction info
 374   //----------------------------------------------------------------------
 375
 376   inline const MachineInstrInfo& getInstrInfo   () const {
 377     return schedInfo.getInstrInfo();
 378   }
 379
 380   //----------------------------------------------------------------------
 381   // Interface for checking and updating the current time
 382   //----------------------------------------------------------------------
 383
 384   inline cycles_t       getTime                 () const {
 385     return curTime;
 386   }
 387
 388   inline cycles_t       getEarliestIssueTime() const {
 389     return nextEarliestIssueTime;
 390   }
 391
 392   inline cycles_t       getEarliestStartTimeForOp(MachineOpCode opCode) const {
 393     assert(opCode < (int) nextEarliestStartTime.size());
 394     return nextEarliestStartTime[opCode];
 395   }
 396
 397   // Update current time to specified cycle
 398   inline void   updateTime              (cycles_t c) {
 399     curTime = c;
 400     schedPrio.updateTime(c);
 401   }
 402
 403   //----------------------------------------------------------------------
 404   // Functions to manage the choices for the current cycle including:
 405   // -- a vector of choices by priority (choiceVec)
 406   // -- vectors of the choices for each instruction slot (choicesForSlot[])
 407   // -- number of choices in each sched class, used to check issue conflicts
 408   //    between choices for a single cycle
 409   //----------------------------------------------------------------------
 410
 411   inline unsigned int getNumChoices     () const {
 412     return choiceVec.size();
 413   }
 414
 415   inline unsigned getNumChoicesInClass  (const InstrSchedClass& sc) const {
 416     assert(sc < (int) numInClass.size() && "Invalid op code or sched class!");
 417     return numInClass[sc];
 418   }
 419
 420   inline const SchedGraphNode* getChoice(unsigned int i) const {
 421     // assert(i < choiceVec.size());    don't check here.
 422     return choiceVec[i];
 423   }
 424
 425   inline std::hash_set<const SchedGraphNode*>& getChoicesForSlot(unsigned slotNum) {
 426     assert(slotNum < nslots);
 427     return choicesForSlot[slotNum];
 428   }
 429
 430   inline void   addChoice               (const SchedGraphNode* node) {
 431     // Append the instruction to the vector of choices for current cycle.
 432     // Increment numInClass[c] for the sched class to which the instr belongs.
 433     choiceVec.push_back(node);
 434     const InstrSchedClass& sc = schedInfo.getSchedClass(node->getOpCode());
 435     assert(sc < (int) numInClass.size());
 436     numInClass[sc]++;
 437   }
 438
 439   inline void   addChoiceToSlot         (unsigned int slotNum,
 440                                          const SchedGraphNode* node) {
 441     // Add the instruction to the choice set for the specified slot
 442     assert(slotNum < nslots);
 443     choicesForSlot[slotNum].insert(node);
 444   }
 445
 446   inline void   resetChoices            () {
 447     choiceVec.clear();
 448     for (unsigned int s=0; s < nslots; s++)
 449       choicesForSlot[s].clear();
 450     for (unsigned int c=0; c < numInClass.size(); c++)
 451       numInClass[c] = 0;
 452   }
 453
 454   //----------------------------------------------------------------------
 455   // Code to query and manage the partial instruction schedule so far
 456   //----------------------------------------------------------------------
 457
 458   inline unsigned int   getNumScheduled () const {
 459     return isched.getNumInstructions();
 460   }
 461
 462   inline unsigned int   getNumUnscheduled() const {
 463     return totalInstrCount - isched.getNumInstructions();
 464   }
 465
 466   inline bool           isScheduled     (const SchedGraphNode* node) const {
 467     return (isched.getStartTime(node->getNodeId()) >= 0);
 468   }
 469
 470   inline void   scheduleInstr           (const SchedGraphNode* node,
 471                                          unsigned int slotNum,
 472                                          cycles_t cycle)
 473   {
 474     assert(! isScheduled(node) && "Instruction already scheduled?");
 475
 476     // add the instruction to the schedule
 477     isched.scheduleInstr(node, slotNum, cycle);
 478
 479     // update the earliest start times of all nodes that conflict with `node'
 480     // and the next-earliest time anything can issue if `node' causes bubbles
 481     updateEarliestStartTimes(node, cycle);
 482
 483     // remove the instruction from the choice sets for all slots
 484     for (unsigned s=0; s < nslots; s++)
 485       choicesForSlot[s].erase(node);
 486
 487     // and decrement the instr count for the sched class to which it belongs
 488     const InstrSchedClass& sc = schedInfo.getSchedClass(node->getOpCode());
 489     assert(sc < (int) numInClass.size());
 490     numInClass[sc]--;
 491   }
 492
 493   //----------------------------------------------------------------------
 494   // Create and retrieve delay slot info for delayed instructions
 495   //----------------------------------------------------------------------
 496
 497   inline DelaySlotInfo* getDelaySlotInfoForInstr(const SchedGraphNode* bn,
 498                                                  bool createIfMissing=false)
 499   {
 500     DelaySlotInfo* dinfo;
 501     std::hash_map<const SchedGraphNode*, DelaySlotInfo* >::const_iterator
 502       I = delaySlotInfoForBranches.find(bn);
 503     if (I == delaySlotInfoForBranches.end())
 504       {
 505         if (createIfMissing)
 506           {
 507             dinfo = new DelaySlotInfo(bn,
 508                           getInstrInfo().getNumDelaySlots(bn->getOpCode()));
 509             delaySlotInfoForBranches[bn] = dinfo;
 510           }
 511         else
 512           dinfo = NULL;
 513       }
 514     else
 515       dinfo = (*I).second;
 516
 517     return dinfo;
 518   }
 519
 520 private:
 521   /*ctor*/      SchedulingManager       ();     // Disable: DO NOT IMPLEMENT.
 522   void          updateEarliestStartTimes(const SchedGraphNode* node,
 523                                          cycles_t schedTime);
 524 };
 525
 526
 527 /*ctor*/
 528 SchedulingManager::SchedulingManager(const TargetMachine& target,
 529                                      const SchedGraph* graph,
 530                                      SchedPriorities& _schedPrio)
 531   : nslots(target.getSchedInfo().getMaxNumIssueTotal()),
 532     schedInfo(target.getSchedInfo()),
 533     schedPrio(_schedPrio),
 534     isched(nslots, graph->getNumNodes()),
 535     totalInstrCount(graph->getNumNodes() - 2),
 536     nextEarliestIssueTime(0),
 537     choicesForSlot(nslots),
 538     numInClass(target.getSchedInfo().getNumSchedClasses(), 0),  // set all to 0
 539     nextEarliestStartTime(target.getInstrInfo().getNumRealOpCodes(),
 540                           (cycles_t) 0)                         // set all to 0
 541 {
 542   updateTime(0);
 543
 544   // Note that an upper bound on #choices for each slot is = nslots since
 545   // we use this vector to hold a feasible set of instructions, and more
 546   // would be infeasible. Reserve that much memory since it is probably small.
 547   for (unsigned int i=0; i < nslots; i++)
 548     choicesForSlot[i].resize(nslots);
 549 }
 550
 551
 552 void
 553 SchedulingManager::updateEarliestStartTimes(const SchedGraphNode* node,
 554                                             cycles_t schedTime)
 555 {
 556   if (schedInfo.numBubblesAfter(node->getOpCode()) > 0)
 557     { // Update next earliest time before which *nothing* can issue.
 558       nextEarliestIssueTime = std::max(nextEarliestIssueTime,
 559                   curTime + 1 + schedInfo.numBubblesAfter(node->getOpCode()));
 560     }
 561
 562   const vector<MachineOpCode>*
 563     conflictVec = schedInfo.getConflictList(node->getOpCode());
 564
 565   if (conflictVec != NULL)
 566     for (unsigned i=0; i < conflictVec->size(); i++)
 567       {
 568         MachineOpCode toOp = (*conflictVec)[i];
 569         cycles_t est = schedTime + schedInfo.getMinIssueGap(node->getOpCode(),
 570                                                             toOp);
 571         assert(toOp < (int) nextEarliestStartTime.size());
 572         if (nextEarliestStartTime[toOp] < est)
 573           nextEarliestStartTime[toOp] = est;
 574       }
 575 }
 576
 577 //************************* Internal Functions *****************************/
 578
 579
 580 static void
 581 AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue)
 582 {
 583   // find the slot to start from, in the current cycle
 584   unsigned int startSlot = 0;
 585   cycles_t curTime = S.getTime();
 586
 587   assert(maxIssue > 0 && maxIssue <= S.nslots - startSlot);
 588
 589   // If only one instruction can be issued, do so.
 590   if (maxIssue == 1)
 591     for (unsigned s=startSlot; s < S.nslots; s++)
 592       if (S.getChoicesForSlot(s).size() > 0)
 593         {// found the one instruction
 594           S.scheduleInstr(*S.getChoicesForSlot(s).begin(), s, curTime);
 595           return;
 596         }
 597
 598   // Otherwise, choose from the choices for each slot
 599   //
 600   InstrGroup* igroup = S.isched.getIGroup(S.getTime());
 601   assert(igroup != NULL && "Group creation failed?");
 602
 603   // Find a slot that has only a single choice, and take it.
 604   // If all slots have 0 or multiple choices, pick the first slot with
 605   // choices and use its last instruction (just to avoid shifting the vector).
 606   unsigned numIssued;
 607   for (numIssued = 0; numIssued < maxIssue; numIssued++)
 608     {
 609       int chosenSlot = -1;
 610       for (unsigned s=startSlot; s < S.nslots; s++)
 611         if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1)
 612           {
 613             chosenSlot = (int) s;
 614             break;
 615           }
 616
 617       if (chosenSlot == -1)
 618         for (unsigned s=startSlot; s < S.nslots; s++)
 619           if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() > 0)
 620             {
 621               chosenSlot = (int) s;
 622               break;
 623             }
 624
 625       if (chosenSlot != -1)
 626         { // Insert the chosen instr in the chosen slot and
 627           // erase it from all slots.
 628           const SchedGraphNode* node= *S.getChoicesForSlot(chosenSlot).begin();
 629           S.scheduleInstr(node, chosenSlot, curTime);
 630         }
 631     }
 632
 633   assert(numIssued > 0 && "Should not happen when maxIssue > 0!");
 634 }
 635
 636
 637 //
 638 // For now, just assume we are scheduling within a single basic block.
 639 // Get the machine instruction vector for the basic block and clear it,
 640 // then append instructions in scheduled order.
 641 // Also, re-insert the dummy PHI instructions that were at the beginning
 642 // of the basic block, since they are not part of the schedule.
 643 //
 644 static void
 645 RecordSchedule(const BasicBlock* bb, const SchedulingManager& S)
 646 {
 647   MachineCodeForBasicBlock& mvec = bb->getMachineInstrVec();
 648   const MachineInstrInfo& mii = S.schedInfo.getInstrInfo();
 649
 650 #ifndef NDEBUG
 651   // Lets make sure we didn't lose any instructions, except possibly
 652   // some NOPs from delay slots.  Also, PHIs are not included in the schedule.
 653   unsigned numInstr = 0;
 654   for (MachineCodeForBasicBlock::iterator I=mvec.begin(); I != mvec.end(); ++I)
 655     if (! mii.isNop((*I)->getOpCode()) &&
 656         ! mii.isDummyPhiInstr((*I)->getOpCode()))
 657       ++numInstr;
 658   assert(S.isched.getNumInstructions() >= numInstr &&
 659          "Lost some non-NOP instructions during scheduling!");
 660 #endif
 661
 662   if (S.isched.getNumInstructions() == 0)
 663     return;                             // empty basic block!
 664
 665   // First find the dummy instructions at the start of the basic block
 666   MachineCodeForBasicBlock::iterator I = mvec.begin();
 667   for ( ; I != mvec.end(); ++I)
 668     if (! mii.isDummyPhiInstr((*I)->getOpCode()))
 669       break;
 670
 671   // Erase all except the dummy PHI instructions from mvec, and
 672   // pre-allocate create space for the ones we will put back in.
 673   mvec.erase(I, mvec.end());
 674   mvec.reserve(mvec.size() + S.isched.getNumInstructions());
 675
 676   InstrSchedule::const_iterator NIend = S.isched.end();
 677   for (InstrSchedule::const_iterator NI = S.isched.begin(); NI != NIend; ++NI)
 678     mvec.push_back(const_cast<MachineInstr*>((*NI)->getMachineInstr()));
 679 }
 680
 681
 682
 683 static void
 684 MarkSuccessorsReady(SchedulingManager& S, const SchedGraphNode* node)
 685 {
 686   // Check if any successors are now ready that were not already marked
 687   // ready before, and that have not yet been scheduled.
 688   //
 689   for (sg_succ_const_iterator SI = succ_begin(node); SI !=succ_end(node); ++SI)
 690     if (! (*SI)->isDummyNode()
 691         && ! S.isScheduled(*SI)
 692         && ! S.schedPrio.nodeIsReady(*SI))
 693       {// successor not scheduled and not marked ready; check *its* preds.
 694
 695         bool succIsReady = true;
 696         for (sg_pred_const_iterator P=pred_begin(*SI); P != pred_end(*SI); ++P)
 697           if (! (*P)->isDummyNode()
 698               && ! S.isScheduled(*P))
 699             {
 700               succIsReady = false;
 701               break;
 702             }
 703
 704         if (succIsReady)        // add the successor to the ready list
 705           S.schedPrio.insertReady(*SI);
 706       }
 707 }
 708
 709
 710 // Choose up to `nslots' FEASIBLE instructions and assign each
 711 // instruction to all possible slots that do not violate feasibility.
 712 // FEASIBLE means it should be guaranteed that the set
 713 // of chosen instructions can be issued in a single group.
 714 //
 715 // Return value:
 716 //      maxIssue : total number of feasible instructions
 717 //      S.choicesForSlot[i=0..nslots] : set of instructions feasible in slot i
 718 //
 719 static unsigned
 720 FindSlotChoices(SchedulingManager& S,
 721                 DelaySlotInfo*& getDelaySlotInfo)
 722 {
 723   // initialize result vectors to empty
 724   S.resetChoices();
 725
 726   // find the slot to start from, in the current cycle
 727   unsigned int startSlot = 0;
 728   InstrGroup* igroup = S.isched.getIGroup(S.getTime());
 729   for (int s = S.nslots - 1; s >= 0; s--)
 730     if ((*igroup)[s] != NULL)
 731       {
 732         startSlot = s+1;
 733         break;
 734       }
 735
 736   // Make sure we pick at most one instruction that would break the group.
 737   // Also, if we do pick one, remember which it was.
 738   unsigned int indexForBreakingNode = S.nslots;
 739   unsigned int indexForDelayedInstr = S.nslots;
 740   DelaySlotInfo* delaySlotInfo = NULL;
 741
 742   getDelaySlotInfo = NULL;
 743
 744   // Choose instructions in order of priority.
 745   // Add choices to the choice vector in the SchedulingManager class as
 746   // we choose them so that subsequent choices will be correctly tested
 747   // for feasibility, w.r.t. higher priority choices for the same cycle.
 748   //
 749   while (S.getNumChoices() < S.nslots - startSlot)
 750     {
 751       const SchedGraphNode* nextNode=S.schedPrio.getNextHighest(S,S.getTime());
 752       if (nextNode == NULL)
 753         break;                  // no more instructions for this cycle
 754
 755       if (S.getInstrInfo().getNumDelaySlots(nextNode->getOpCode()) > 0)
 756         {
 757           delaySlotInfo = S.getDelaySlotInfoForInstr(nextNode);
 758           if (delaySlotInfo != NULL)
 759             {
 760               if (indexForBreakingNode < S.nslots)
 761                 // cannot issue a delayed instr in the same cycle as one
 762                 // that breaks the issue group or as another delayed instr
 763                 nextNode = NULL;
 764               else
 765                 indexForDelayedInstr = S.getNumChoices();
 766             }
 767         }
 768       else if (S.schedInfo.breaksIssueGroup(nextNode->getOpCode()))
 769         {
 770           if (indexForBreakingNode < S.nslots)
 771             // have a breaking instruction already so throw this one away
 772             nextNode = NULL;
 773           else
 774             indexForBreakingNode = S.getNumChoices();
 775         }
 776
 777       if (nextNode != NULL)
 778         {
 779           S.addChoice(nextNode);
 780
 781           if (S.schedInfo.isSingleIssue(nextNode->getOpCode()))
 782             {
 783               assert(S.getNumChoices() == 1 &&
 784                      "Prioritizer returned invalid instr for this cycle!");
 785               break;
 786             }
 787         }
 788
 789       if (indexForDelayedInstr < S.nslots)
 790         break;                  // leave the rest for delay slots
 791     }
 792
 793   assert(S.getNumChoices() <= S.nslots);
 794   assert(! (indexForDelayedInstr < S.nslots &&
 795             indexForBreakingNode < S.nslots) && "Cannot have both in a cycle");
 796
 797   // Assign each chosen instruction to all possible slots for that instr.
 798   // But if only one instruction was chosen, put it only in the first
 799   // feasible slot; no more analysis will be needed.
 800   //
 801   if (indexForDelayedInstr >= S.nslots &&
 802       indexForBreakingNode >= S.nslots)
 803     { // No instructions that break the issue group or that have delay slots.
 804       // This is the common case, so handle it separately for efficiency.
 805
 806       if (S.getNumChoices() == 1)
 807         {
 808           MachineOpCode opCode = S.getChoice(0)->getOpCode();
 809           unsigned int s;
 810           for (s=startSlot; s < S.nslots; s++)
 811             if (S.schedInfo.instrCanUseSlot(opCode, s))
 812               break;
 813           assert(s < S.nslots && "No feasible slot for this opCode?");
 814           S.addChoiceToSlot(s, S.getChoice(0));
 815         }
 816       else
 817         {
 818           for (unsigned i=0; i < S.getNumChoices(); i++)
 819             {
 820               MachineOpCode opCode = S.getChoice(i)->getOpCode();
 821               for (unsigned int s=startSlot; s < S.nslots; s++)
 822                 if (S.schedInfo.instrCanUseSlot(opCode, s))
 823                   S.addChoiceToSlot(s, S.getChoice(i));
 824             }
 825         }
 826     }
 827   else if (indexForDelayedInstr < S.nslots)
 828     {
 829       // There is an instruction that needs delay slots.
 830       // Try to assign that instruction to a higher slot than any other
 831       // instructions in the group, so that its delay slots can go
 832       // right after it.
 833       //
 834
 835       assert(indexForDelayedInstr == S.getNumChoices() - 1 &&
 836              "Instruction with delay slots should be last choice!");
 837       assert(delaySlotInfo != NULL && "No delay slot info for instr?");
 838
 839       const SchedGraphNode* delayedNode = S.getChoice(indexForDelayedInstr);
 840       MachineOpCode delayOpCode = delayedNode->getOpCode();
 841       unsigned ndelays= S.getInstrInfo().getNumDelaySlots(delayOpCode);
 842
 843       unsigned delayedNodeSlot = S.nslots;
 844       int highestSlotUsed;
 845
 846       // Find the last possible slot for the delayed instruction that leaves
 847       // at least `d' slots vacant after it (d = #delay slots)
 848       for (int s = S.nslots-ndelays-1; s >= (int) startSlot; s--)
 849         if (S.schedInfo.instrCanUseSlot(delayOpCode, s))
 850           {
 851             delayedNodeSlot = s;
 852             break;
 853           }
 854
 855       highestSlotUsed = -1;
 856       for (unsigned i=0; i < S.getNumChoices() - 1; i++)
 857         {
 858           // Try to assign every other instruction to a lower numbered
 859           // slot than delayedNodeSlot.
 860           MachineOpCode opCode =S.getChoice(i)->getOpCode();
 861           bool noSlotFound = true;
 862           unsigned int s;
 863           for (s=startSlot; s < delayedNodeSlot; s++)
 864             if (S.schedInfo.instrCanUseSlot(opCode, s))
 865               {
 866                 S.addChoiceToSlot(s, S.getChoice(i));
 867                 noSlotFound = false;
 868               }
 869
 870           // No slot before `delayedNodeSlot' was found for this opCode
 871           // Use a later slot, and allow some delay slots to fall in
 872           // the next cycle.
 873           if (noSlotFound)
 874             for ( ; s < S.nslots; s++)
 875               if (S.schedInfo.instrCanUseSlot(opCode, s))
 876                 {
 877                   S.addChoiceToSlot(s, S.getChoice(i));
 878                   break;
 879                 }
 880
 881           assert(s < S.nslots && "No feasible slot for instruction?");
 882
 883           highestSlotUsed = std::max(highestSlotUsed, (int) s);
 884         }
 885
 886       assert(highestSlotUsed <= (int) S.nslots-1 && "Invalid slot used?");
 887
 888       // We will put the delayed node in the first slot after the
 889       // highest slot used.  But we just mark that for now, and
 890       // schedule it separately because we want to schedule the delay
 891       // slots for the node at the same time.
 892       cycles_t dcycle = S.getTime();
 893       unsigned int dslot = highestSlotUsed + 1;
 894       if (dslot == S.nslots)
 895         {
 896           dslot = 0;
 897           ++dcycle;
 898         }
 899       delaySlotInfo->recordChosenSlot(dcycle, dslot);
 900       getDelaySlotInfo = delaySlotInfo;
 901     }
 902   else
 903     { // There is an instruction that breaks the issue group.
 904       // For such an instruction, assign to the last possible slot in
 905       // the current group, and then don't assign any other instructions
 906       // to later slots.
 907       assert(indexForBreakingNode < S.nslots);
 908       const SchedGraphNode* breakingNode=S.getChoice(indexForBreakingNode);
 909       unsigned breakingSlot = INT_MAX;
 910       unsigned int nslotsToUse = S.nslots;
 911
 912       // Find the last possible slot for this instruction.
 913       for (int s = S.nslots-1; s >= (int) startSlot; s--)
 914         if (S.schedInfo.instrCanUseSlot(breakingNode->getOpCode(), s))
 915           {
 916             breakingSlot = s;
 917             break;
 918           }
 919       assert(breakingSlot < S.nslots &&
 920              "No feasible slot for `breakingNode'?");
 921
 922       // Higher priority instructions than the one that breaks the group:
 923       // These can be assigned to all slots, but will be assigned only
 924       // to earlier slots if possible.
 925       for (unsigned i=0;
 926            i < S.getNumChoices() && i < indexForBreakingNode; i++)
 927         {
 928           MachineOpCode opCode =S.getChoice(i)->getOpCode();
 929
 930           // If a higher priority instruction cannot be assigned to
 931           // any earlier slots, don't schedule the breaking instruction.
 932           //
 933           bool foundLowerSlot = false;
 934           nslotsToUse = S.nslots;           // May be modified in the loop
 935           for (unsigned int s=startSlot; s < nslotsToUse; s++)
 936             if (S.schedInfo.instrCanUseSlot(opCode, s))
 937               {
 938                 if (breakingSlot < S.nslots && s < breakingSlot)
 939                   {
 940                     foundLowerSlot = true;
 941                     nslotsToUse = breakingSlot; // RESETS LOOP UPPER BOUND!
 942                   }
 943
 944                 S.addChoiceToSlot(s, S.getChoice(i));
 945               }
 946
 947           if (!foundLowerSlot)
 948             breakingSlot = INT_MAX;             // disable breaking instr
 949         }
 950
 951       // Assign the breaking instruction (if any) to a single slot
 952       // Otherwise, just ignore the instruction.  It will simply be
 953       // scheduled in a later cycle.
 954       if (breakingSlot < S.nslots)
 955         {
 956           S.addChoiceToSlot(breakingSlot, breakingNode);
 957           nslotsToUse = breakingSlot;
 958         }
 959       else
 960         nslotsToUse = S.nslots;
 961
 962       // For lower priority instructions than the one that breaks the
 963       // group, only assign them to slots lower than the breaking slot.
 964       // Otherwise, just ignore the instruction.
 965       for (unsigned i=indexForBreakingNode+1; i < S.getNumChoices(); i++)
 966         {
 967           MachineOpCode opCode = S.getChoice(i)->getOpCode();
 968           for (unsigned int s=startSlot; s < nslotsToUse; s++)
 969             if (S.schedInfo.instrCanUseSlot(opCode, s))
 970               S.addChoiceToSlot(s, S.getChoice(i));
 971         }
 972     } // endif (no delay slots and no breaking slots)
 973
 974   return S.getNumChoices();
 975 }
 976
 977
 978 static unsigned
 979 ChooseOneGroup(SchedulingManager& S)
 980 {
 981   assert(S.schedPrio.getNumReady() > 0
 982          && "Don't get here without ready instructions.");
 983
 984   cycles_t firstCycle = S.getTime();
 985   DelaySlotInfo* getDelaySlotInfo = NULL;
 986
 987   // Choose up to `nslots' feasible instructions and their possible slots.
 988   unsigned numIssued = FindSlotChoices(S, getDelaySlotInfo);
 989
 990   while (numIssued == 0)
 991     {
 992       S.updateTime(S.getTime()+1);
 993       numIssued = FindSlotChoices(S, getDelaySlotInfo);
 994     }
 995
 996   AssignInstructionsToSlots(S, numIssued);
 997
 998   if (getDelaySlotInfo != NULL)
 999     numIssued += getDelaySlotInfo->scheduleDelayedNode(S);
1000
1001   // Print trace of scheduled instructions before newly ready ones
1002   if (SchedDebugLevel >= Sched_PrintSchedTrace)
1003     {
1004       for (cycles_t c = firstCycle; c <= S.getTime(); c++)
1005         {
1006           cerr << "    Cycle " << (long)c << " : Scheduled instructions:\n";
1007           const InstrGroup* igroup = S.isched.getIGroup(c);
1008           for (unsigned int s=0; s < S.nslots; s++)
1009             {
1010               cerr << "        ";
1011               if ((*igroup)[s] != NULL)
1012                 cerr << * ((*igroup)[s])->getMachineInstr() << "\n";
1013               else
1014                 cerr << "<none>\n";
1015             }
1016         }
1017     }
1018
1019   return numIssued;
1020 }
1021
1022
1023 static void
1024 ForwardListSchedule(SchedulingManager& S)
1025 {
1026   unsigned N;
1027   const SchedGraphNode* node;
1028
1029   S.schedPrio.initialize();
1030
1031   while ((N = S.schedPrio.getNumReady()) > 0)
1032     {
1033       cycles_t nextCycle = S.getTime();
1034
1035       // Choose one group of instructions for a cycle, plus any delay slot
1036       // instructions (which may overflow into successive cycles).
1037       // This will advance S.getTime() to the last cycle in which
1038       // instructions are actually issued.
1039       //
1040       unsigned numIssued = ChooseOneGroup(S);
1041       assert(numIssued > 0 && "Deadlock in list scheduling algorithm?");
1042
1043       // Notify the priority manager of scheduled instructions and mark
1044       // any successors that may now be ready
1045       //
1046       for (cycles_t c = nextCycle; c <= S.getTime(); c++)
1047         {
1048           const InstrGroup* igroup = S.isched.getIGroup(c);
1049           for (unsigned int s=0; s < S.nslots; s++)
1050             if ((node = (*igroup)[s]) != NULL)
1051               {
1052                 S.schedPrio.issuedReadyNodeAt(S.getTime(), node);
1053                 MarkSuccessorsReady(S, node);
1054               }
1055         }
1056
1057       // Move to the next the next earliest cycle for which
1058       // an instruction can be issued, or the next earliest in which
1059       // one will be ready, or to the next cycle, whichever is latest.
1060       //
1061       S.updateTime(std::max(S.getTime() + 1,
1062                             std::max(S.getEarliestIssueTime(),
1063                                      S.schedPrio.getEarliestReadyTime())));
1064     }
1065 }
1066
1067
1068 //---------------------------------------------------------------------
1069 // Code for filling delay slots for delayed terminator instructions
1070 // (e.g., BRANCH and RETURN).  Delay slots for non-terminator
1071 // instructions (e.g., CALL) are not handled here because they almost
1072 // always can be filled with instructions from the call sequence code
1073 // before a call.  That's preferable because we incur many tradeoffs here
1074 // when we cannot find single-cycle instructions that can be reordered.
1075 //----------------------------------------------------------------------
1076
1077 static bool
1078 NodeCanFillDelaySlot(const SchedulingManager& S,
1079                      const SchedGraphNode* node,
1080                      const SchedGraphNode* brNode,
1081                      bool nodeIsPredecessor)
1082 {
1083   assert(! node->isDummyNode());
1084
1085   // don't put a branch in the delay slot of another branch
1086   if (S.getInstrInfo().isBranch(node->getOpCode()))
1087     return false;
1088
1089   // don't put a single-issue instruction in the delay slot of a branch
1090   if (S.schedInfo.isSingleIssue(node->getOpCode()))
1091     return false;
1092
1093   // don't put a load-use dependence in the delay slot of a branch
1094   const MachineInstrInfo& mii = S.getInstrInfo();
1095
1096   for (SchedGraphNode::const_iterator EI = node->beginInEdges();
1097        EI != node->endInEdges(); ++EI)
1098     if (! (*EI)->getSrc()->isDummyNode()
1099         && mii.isLoad((*EI)->getSrc()->getOpCode())
1100         && (*EI)->getDepType() == SchedGraphEdge::CtrlDep)
1101       return false;
1102
1103   // for now, don't put an instruction that does not have operand
1104   // interlocks in the delay slot of a branch
1105   if (! S.getInstrInfo().hasOperandInterlock(node->getOpCode()))
1106     return false;
1107
1108   // Finally, if the instruction preceeds the branch, we make sure the
1109   // instruction can be reordered relative to the branch.  We simply check
1110   // if the instr. has only 1 outgoing edge, viz., a CD edge to the branch.
1111   //
1112   if (nodeIsPredecessor)
1113     {
1114       bool onlyCDEdgeToBranch = true;
1115       for (SchedGraphNode::const_iterator OEI = node->beginOutEdges();
1116            OEI != node->endOutEdges(); ++OEI)
1117         if (! (*OEI)->getSink()->isDummyNode()
1118             && ((*OEI)->getSink() != brNode
1119                 || (*OEI)->getDepType() != SchedGraphEdge::CtrlDep))
1120           {
1121             onlyCDEdgeToBranch = false;
1122             break;
1123           }
1124
1125       if (!onlyCDEdgeToBranch)
1126         return false;
1127     }
1128
1129   return true;
1130 }
1131
1132
1133 static void
1134 MarkNodeForDelaySlot(SchedulingManager& S,
1135                      SchedGraph* graph,
1136                      SchedGraphNode* node,
1137                      const SchedGraphNode* brNode,
1138                      bool nodeIsPredecessor)
1139 {
1140   if (nodeIsPredecessor)
1141     { // If node is in the same basic block (i.e., preceeds brNode),
1142       // remove it and all its incident edges from the graph.  Make sure we
1143       // add dummy edges for pred/succ nodes that become entry/exit nodes.
1144       graph->eraseIncidentEdges(node, /*addDummyEdges*/ true);
1145     }
1146   else
1147     { // If the node was from a target block, add the node to the graph
1148       // and add a CD edge from brNode to node.
1149       assert(0 && "NOT IMPLEMENTED YET");
1150     }
1151
1152   DelaySlotInfo* dinfo = S.getDelaySlotInfoForInstr(brNode, /*create*/ true);
1153   dinfo->addDelayNode(node);
1154 }
1155
1156
1157 void
1158 FindUsefulInstructionsForDelaySlots(SchedulingManager& S,
1159                                     SchedGraphNode* brNode,
1160                                     vector<SchedGraphNode*>& sdelayNodeVec)
1161 {
1162   const MachineInstrInfo& mii = S.getInstrInfo();
1163   unsigned ndelays =
1164     mii.getNumDelaySlots(brNode->getOpCode());
1165
1166   if (ndelays == 0)
1167     return;
1168
1169   sdelayNodeVec.reserve(ndelays);
1170
1171   // Use a separate vector to hold the feasible multi-cycle nodes.
1172   // These will be used if not enough single-cycle nodes are found.
1173   //
1174   vector<SchedGraphNode*> mdelayNodeVec;
1175
1176   for (sg_pred_iterator P = pred_begin(brNode);
1177        P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P)
1178     if (! (*P)->isDummyNode() &&
1179         ! mii.isNop((*P)->getOpCode()) &&
1180         NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true))
1181       {
1182         if (mii.maxLatency((*P)->getOpCode()) > 1)
1183           mdelayNodeVec.push_back(*P);
1184         else
1185           sdelayNodeVec.push_back(*P);
1186       }
1187
1188   // If not enough single-cycle instructions were found, select the
1189   // lowest-latency multi-cycle instructions and use them.
1190   // Note that this is the most efficient code when only 1 (or even 2)
1191   // values need to be selected.
1192   //
1193   while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0)
1194     {
1195       unsigned lmin =
1196         mii.maxLatency(mdelayNodeVec[0]->getOpCode());
1197       unsigned minIndex   = 0;
1198       for (unsigned i=1; i < mdelayNodeVec.size(); i++)
1199         {
1200           unsigned li =
1201             mii.maxLatency(mdelayNodeVec[i]->getOpCode());
1202           if (lmin >= li)
1203             {
1204               lmin = li;
1205               minIndex = i;
1206             }
1207         }
1208       sdelayNodeVec.push_back(mdelayNodeVec[minIndex]);
1209       if (sdelayNodeVec.size() < ndelays) // avoid the last erase!
1210         mdelayNodeVec.erase(mdelayNodeVec.begin() + minIndex);
1211     }
1212 }
1213
1214
1215 // Remove the NOPs currently in delay slots from the graph.
1216 // Mark instructions specified in sdelayNodeVec to replace them.
1217 // If not enough useful instructions were found, mark the NOPs to be used
1218 // for filling delay slots, otherwise, otherwise just discard them.
1219 //
1220 void
1221 ReplaceNopsWithUsefulInstr(SchedulingManager& S,
1222                            SchedGraphNode* node,
1223                            vector<SchedGraphNode*> sdelayNodeVec,
1224                            SchedGraph* graph)
1225 {
1226   vector<SchedGraphNode*> nopNodeVec;   // this will hold unused NOPs
1227   const MachineInstrInfo& mii = S.getInstrInfo();
1228   const MachineInstr* brInstr = node->getMachineInstr();
1229   unsigned ndelays= mii.getNumDelaySlots(brInstr->getOpCode());
1230   assert(ndelays > 0 && "Unnecessary call to replace NOPs");
1231
1232   // Remove the NOPs currently in delay slots from the graph.
1233   // If not enough useful instructions were found, use the NOPs to
1234   // fill delay slots, otherwise, just discard them.
1235   //
1236   unsigned int firstDelaySlotIdx = node->getOrigIndexInBB() + 1;
1237   MachineCodeForBasicBlock& bbMvec  = node->getBB()->getMachineInstrVec();
1238   assert(bbMvec[firstDelaySlotIdx - 1] == brInstr &&
1239          "Incorrect instr. index in basic block for brInstr");
1240
1241   // First find all useful instructions already in the delay slots
1242   // and USE THEM.  We'll throw away the unused alternatives below
1243   //
1244   for (unsigned i=firstDelaySlotIdx; i < firstDelaySlotIdx + ndelays; ++i)
1245     if (! mii.isNop(bbMvec[i]->getOpCode()))
1246       sdelayNodeVec.insert(sdelayNodeVec.begin(),
1247                            graph->getGraphNodeForInstr(bbMvec[i]));
1248
1249   // Then find the NOPs and keep only as many as are needed.
1250   // Put the rest in nopNodeVec to be deleted.
1251   for (unsigned i=firstDelaySlotIdx; i < firstDelaySlotIdx + ndelays; ++i)
1252     if (mii.isNop(bbMvec[i]->getOpCode()))
1253       if (sdelayNodeVec.size() < ndelays)
1254         sdelayNodeVec.push_back(graph->getGraphNodeForInstr(bbMvec[i]));
1255       else
1256         nopNodeVec.push_back(graph->getGraphNodeForInstr(bbMvec[i]));
1257
1258   assert(sdelayNodeVec.size() >= ndelays);
1259
1260   // If some delay slots were already filled, throw away that many new choices
1261   if (sdelayNodeVec.size() > ndelays)
1262     sdelayNodeVec.resize(ndelays);
1263
1264   // Mark the nodes chosen for delay slots.  This removes them from the graph.
1265   for (unsigned i=0; i < sdelayNodeVec.size(); i++)
1266     MarkNodeForDelaySlot(S, graph, sdelayNodeVec[i], node, true);
1267
1268   // And remove the unused NOPs from the graph.
1269   for (unsigned i=0; i < nopNodeVec.size(); i++)
1270     graph->eraseIncidentEdges(nopNodeVec[i], /*addDummyEdges*/ true);
1271 }
1272
1273
1274 // For all delayed instructions, choose instructions to put in the delay
1275 // slots and pull those out of the graph.  Mark them for the delay slots
1276 // in the DelaySlotInfo object for that graph node.  If no useful work
1277 // is found for a delay slot, use the NOP that is currently in that slot.
1278 //
1279 // We try to fill the delay slots with useful work for all instructions
1280 // EXCEPT CALLS AND RETURNS.
1281 // For CALLs and RETURNs, it is nearly always possible to use one of the
1282 // call sequence instrs and putting anything else in the delay slot could be
1283 // suboptimal.  Also, it complicates generating the calling sequence code in
1284 // regalloc.
1285 //
1286 static void
1287 ChooseInstructionsForDelaySlots(SchedulingManager& S,
1288                                 const BasicBlock *bb,
1289                                 SchedGraph *graph)
1290 {
1291   const MachineInstrInfo& mii = S.getInstrInfo();
1292   const Instruction *termInstr = (Instruction*)bb->getTerminator();
1293   MachineCodeForInstruction &termMvec=MachineCodeForInstruction::get(termInstr);
1294   vector<SchedGraphNode*> delayNodeVec;
1295   const MachineInstr* brInstr = NULL;
1296
1297   if (termInstr->getOpcode() != Instruction::Ret)
1298     {
1299       // To find instructions that need delay slots without searching the full
1300       // machine code, we assume that the only delayed instructions are CALLs
1301       // or instructions generated for the terminator inst.
1302       // Find the first branch instr in the sequence of machine instrs for term
1303       //
1304       unsigned first = 0;
1305       while (first < termMvec.size() &&
1306              ! mii.isBranch(termMvec[first]->getOpCode()))
1307         {
1308           ++first;
1309         }
1310       assert(first < termMvec.size() &&
1311          "No branch instructions for BR?  Ok, but weird!  Delete assertion.");
1312
1313       brInstr = (first < termMvec.size())? termMvec[first] : NULL;
1314
1315       // Compute a vector of the nodes chosen for delay slots and then
1316       // mark delay slots to replace NOPs with these useful instructions.
1317       //
1318       if (brInstr != NULL)
1319         {
1320           SchedGraphNode* brNode = graph->getGraphNodeForInstr(brInstr);
1321           FindUsefulInstructionsForDelaySlots(S, brNode, delayNodeVec);
1322           ReplaceNopsWithUsefulInstr(S, brNode, delayNodeVec, graph);
1323         }
1324     }
1325
1326   // Also mark delay slots for other delayed instructions to hold NOPs.
1327   // Simply passing in an empty delayNodeVec will have this effect.
1328   //
1329   delayNodeVec.clear();
1330   const MachineCodeForBasicBlock& bbMvec = bb->getMachineInstrVec();
1331   for (unsigned i=0; i < bbMvec.size(); i++)
1332     if (bbMvec[i] != brInstr &&
1333         mii.getNumDelaySlots(bbMvec[i]->getOpCode()) > 0)
1334       {
1335         SchedGraphNode* node = graph->getGraphNodeForInstr(bbMvec[i]);
1336         ReplaceNopsWithUsefulInstr(S, node, delayNodeVec, graph);
1337       }
1338 }
1339
1340
1341 //
1342 // Schedule the delayed branch and its delay slots
1343 //
1344 unsigned
1345 DelaySlotInfo::scheduleDelayedNode(SchedulingManager& S)
1346 {
1347   assert(delayedNodeSlotNum < S.nslots && "Illegal slot for branch");
1348   assert(S.isched.getInstr(delayedNodeSlotNum, delayedNodeCycle) == NULL
1349          && "Slot for branch should be empty");
1350
1351   unsigned int nextSlot = delayedNodeSlotNum;
1352   cycles_t nextTime = delayedNodeCycle;
1353
1354   S.scheduleInstr(brNode, nextSlot, nextTime);
1355
1356   for (unsigned d=0; d < ndelays; d++)
1357     {
1358       ++nextSlot;
1359       if (nextSlot == S.nslots)
1360         {
1361           nextSlot = 0;
1362           nextTime++;
1363         }
1364
1365       // Find the first feasible instruction for this delay slot
1366       // Note that we only check for issue restrictions here.
1367       // We do *not* check for flow dependences but rely on pipeline
1368       // interlocks to resolve them.  Machines without interlocks
1369       // will require this code to be modified.
1370       for (unsigned i=0; i < delayNodeVec.size(); i++)
1371         {
1372           const SchedGraphNode* dnode = delayNodeVec[i];
1373           if ( ! S.isScheduled(dnode)
1374                && S.schedInfo.instrCanUseSlot(dnode->getOpCode(), nextSlot)
1375                && instrIsFeasible(S, dnode->getOpCode()))
1376             {
1377               assert(S.getInstrInfo().hasOperandInterlock(dnode->getOpCode())
1378                      && "Instructions without interlocks not yet supported "
1379                      "when filling branch delay slots");
1380               S.scheduleInstr(dnode, nextSlot, nextTime);
1381               break;
1382             }
1383         }
1384     }
1385
1386   // Update current time if delay slots overflowed into later cycles.
1387   // Do this here because we know exactly which cycle is the last cycle
1388   // that contains delay slots.  The next loop doesn't compute that.
1389   if (nextTime > S.getTime())
1390     S.updateTime(nextTime);
1391
1392   // Now put any remaining instructions in the unfilled delay slots.
1393   // This could lead to suboptimal performance but needed for correctness.
1394   nextSlot = delayedNodeSlotNum;
1395   nextTime = delayedNodeCycle;
1396   for (unsigned i=0; i < delayNodeVec.size(); i++)
1397     if (! S.isScheduled(delayNodeVec[i]))
1398       {
1399         do { // find the next empty slot
1400           ++nextSlot;
1401           if (nextSlot == S.nslots)
1402             {
1403               nextSlot = 0;
1404               nextTime++;
1405             }
1406         } while (S.isched.getInstr(nextSlot, nextTime) != NULL);
1407
1408         S.scheduleInstr(delayNodeVec[i], nextSlot, nextTime);
1409         break;
1410       }
1411
1412   return 1 + ndelays;
1413 }
1414
1415
1416 // Check if the instruction would conflict with instructions already
1417 // chosen for the current cycle
1418 //
1419 static inline bool
1420 ConflictsWithChoices(const SchedulingManager& S,
1421                      MachineOpCode opCode)
1422 {
1423   // Check if the instruction must issue by itself, and some feasible
1424   // choices have already been made for this cycle
1425   if (S.getNumChoices() > 0 && S.schedInfo.isSingleIssue(opCode))
1426     return true;
1427
1428   // For each class that opCode belongs to, check if there are too many
1429   // instructions of that class.
1430   //
1431   const InstrSchedClass sc = S.schedInfo.getSchedClass(opCode);
1432   return (S.getNumChoicesInClass(sc) == S.schedInfo.getMaxIssueForClass(sc));
1433 }
1434
1435
1436 //************************* External Functions *****************************/
1437
1438
1439 //---------------------------------------------------------------------------
1440 // Function: ViolatesMinimumGap
1441 //
1442 // Purpose:
1443 //   Check minimum gap requirements relative to instructions scheduled in
1444 //   previous cycles.
1445 //   Note that we do not need to consider `nextEarliestIssueTime' here because
1446 //   that is also captured in the earliest start times for each opcode.
1447 //---------------------------------------------------------------------------
1448
1449 static inline bool
1450 ViolatesMinimumGap(const SchedulingManager& S,
1451                    MachineOpCode opCode,
1452                    const cycles_t inCycle)
1453 {
1454   return (inCycle < S.getEarliestStartTimeForOp(opCode));
1455 }
1456
1457
1458 //---------------------------------------------------------------------------
1459 // Function: instrIsFeasible
1460 //
1461 // Purpose:
1462 //   Check if any issue restrictions would prevent the instruction from
1463 //   being issued in the current cycle
1464 //---------------------------------------------------------------------------
1465
1466 bool
1467 instrIsFeasible(const SchedulingManager& S,
1468                 MachineOpCode opCode)
1469 {
1470   // skip the instruction if it cannot be issued due to issue restrictions
1471   // caused by previously issued instructions
1472   if (ViolatesMinimumGap(S, opCode, S.getTime()))
1473     return false;
1474
1475   // skip the instruction if it cannot be issued due to issue restrictions
1476   // caused by previously chosen instructions for the current cycle
1477   if (ConflictsWithChoices(S, opCode))
1478     return false;
1479
1480   return true;
1481 }
1482
1483 //---------------------------------------------------------------------------
1484 // Function: ScheduleInstructionsWithSSA
1485 //
1486 // Purpose:
1487 //   Entry point for instruction scheduling on SSA form.
1488 //   Schedules the machine instructions generated by instruction selection.
1489 //   Assumes that register allocation has not been done, i.e., operands
1490 //   are still in SSA form.
1491 //---------------------------------------------------------------------------
1492
1493 namespace {
1494   class InstructionSchedulingWithSSA : public MethodPass {
1495     const TargetMachine &target;
1496   public:
1497     inline InstructionSchedulingWithSSA(const TargetMachine &T) : target(T) {}
1498
1499     // getAnalysisUsageInfo - We use LiveVarInfo...
1500     virtual void getAnalysisUsageInfo(Pass::AnalysisSet &Requires,
1501                                       Pass::AnalysisSet &Destroyed,
1502                                       Pass::AnalysisSet &Provided) {
1503       Requires.push_back(MethodLiveVarInfo::ID);
1504       Destroyed.push_back(MethodLiveVarInfo::ID);
1505     }
1506
1507     bool runOnMethod(Method *M);
1508   };
1509 } // end anonymous namespace
1510
1511
1512 bool
1513 InstructionSchedulingWithSSA::runOnMethod(Method *M)
1514 {
1515   if (SchedDebugLevel == Sched_Disable)
1516     return false;
1517
1518   SchedGraphSet graphSet(M, target);
1519
1520   if (SchedDebugLevel >= Sched_PrintSchedGraphs)
1521     {
1522       cerr << "\n*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING\n";
1523       graphSet.dump();
1524     }
1525
1526   for (SchedGraphSet::const_iterator GI=graphSet.begin(), GE=graphSet.end();
1527        GI != GE; ++GI)
1528     {
1529       SchedGraph* graph = (*GI);
1530       const vector<const BasicBlock*> &bbvec = graph->getBasicBlocks();
1531       assert(bbvec.size() == 1 && "Cannot schedule multiple basic blocks");
1532       const BasicBlock* bb = bbvec[0];
1533
1534       if (SchedDebugLevel >= Sched_PrintSchedTrace)
1535         cerr << "\n*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n";
1536
1537       // expensive!
1538       SchedPriorities schedPrio(M, graph,getAnalysis<MethodLiveVarInfo>());
1539       SchedulingManager S(target, graph, schedPrio);
1540
1541       ChooseInstructionsForDelaySlots(S, bb, graph); // modifies graph
1542
1543       ForwardListSchedule(S);               // computes schedule in S
1544
1545       RecordSchedule(bb, S);                // records schedule in BB
1546     }
1547
1548   if (SchedDebugLevel >= Sched_PrintMachineCode)
1549     {
1550       cerr << "\n*** Machine instructions after INSTRUCTION SCHEDULING\n";
1551       MachineCodeForMethod::get(M).dump();
1552     }
1553
1554   return false;
1555 }
1556
1557
1558 MethodPass*
1559 createInstructionSchedulingWithSSAPass(const TargetMachine &tgt)
1560 {
1561   return new InstructionSchedulingWithSSA(tgt);
1562 }