Bug fixes:
authorVikram S. Adve <vadve@cs.uiuc.edu>
Sun, 30 Sep 2001 23:43:34 +0000 (23:43 +0000)
committerVikram S. Adve <vadve@cs.uiuc.edu>
Sun, 30 Sep 2001 23:43:34 +0000 (23:43 +0000)
(1) Ensure that delay slot instructions are not moved out of place (this
    was happening for some CALL instructions).  Basically, we need to
    move all delay slot instructions out of the graph and handle them
    along with the delayed control transfer instruction.
(2) Mark scheduled instructions correctly when instructions are scheduled
    in more than one cycle in a single step (due to delay slots).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@678 91177308-0d34-0410-b5e6-96231b3b80d8

lib/CodeGen/InstrSched/InstrScheduling.cpp
lib/Target/SparcV9/InstrSched/InstrScheduling.cpp

index 34802c9e5bb7ab89575d44596a4a65d25bd086bf..0b194207ca46aaf8ac2016ad60b52eed4a8844f3 100644 (file)
@@ -9,16 +9,26 @@
 //     7/23/01  -  Vikram Adve  -  Created
 //**************************************************************************/
 
+
+//************************* User Include Files *****************************/
+
 #include "llvm/CodeGen/InstrScheduling.h"
 #include "SchedPriorities.h"
 #include "llvm/Analysis/LiveVar/BBLiveVar.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Instruction.h"
+
+
+//************************ System Include Files *****************************/
+
 #include <hash_set>
 #include <algorithm>
 #include <iterator>
 
+
+//************************* External Data Types *****************************/
+
 cl::Enum<enum SchedDebugLevel_t> SchedDebugLevel("dsched", cl::NoFlags,
   "enable instruction scheduling debugging information",
   clEnumValN(Sched_NoDebugInfo,      "n", "disable debug output"),
@@ -27,55 +37,12 @@ cl::Enum<enum SchedDebugLevel_t> SchedDebugLevel("dsched", cl::NoFlags,
   clEnumValN(Sched_PrintSchedGraphs, "g", "print scheduling graphs"), 0);
 
 
+//************************* Internal Data Types *****************************/
+
 class InstrSchedule;
 class SchedulingManager;
 class DelaySlotInfo;
 
-static void    ForwardListSchedule     (SchedulingManager& S);
-
-static void    RecordSchedule          (const BasicBlock* bb,
-                                        const SchedulingManager& S);
-
-static unsigned        ChooseOneGroup          (SchedulingManager& S);
-
-static void    MarkSuccessorsReady     (SchedulingManager& S,
-                                        const SchedGraphNode* node);
-
-static unsigned        FindSlotChoices         (SchedulingManager& S,
-                                        DelaySlotInfo*& getDelaySlotInfo);
-
-static void    AssignInstructionsToSlots(class SchedulingManager& S,
-                                         unsigned maxIssue);
-
-static void    ScheduleInstr           (class SchedulingManager& S,
-                                        const SchedGraphNode* node,
-                                        unsigned int slotNum,
-                                        cycles_t curTime);
-
-static bool    ViolatesMinimumGap      (const SchedulingManager& S,
-                                        MachineOpCode opCode,
-                                        const cycles_t inCycle);
-
-static bool    ConflictsWithChoices    (const SchedulingManager& S,
-                                        MachineOpCode opCode);
-
-static void    ChooseInstructionsForDelaySlots(SchedulingManager& S,
-                                        const BasicBlock* bb,
-                                        SchedGraph* graph);
-
-static bool    NodeCanFillDelaySlot    (const SchedulingManager& S,
-                                        const SchedGraphNode* node,
-                                        const SchedGraphNode* brNode,
-                                        bool nodeIsPredecessor);
-
-static void    MarkNodeForDelaySlot    (SchedulingManager& S,
-                                        SchedGraph* graph,
-                                        SchedGraphNode* node,
-                                        const SchedGraphNode* brNode,
-                                        bool nodeIsPredecessor);
-
-//************************* Internal Data Types *****************************/
-
 
 //----------------------------------------------------------------------
 // class InstrGroup:
@@ -369,7 +336,7 @@ public:
     delayedNodeSlotNum = slotNum;
   }
   
-  void         scheduleDelayedNode     (SchedulingManager& S);
+  unsigned     scheduleDelayedNode     (SchedulingManager& S);
 };
 
 
@@ -539,7 +506,7 @@ public:
        if (createIfMissing)
          {
            dinfo = new DelaySlotInfo(bn,
-                         getInstrInfo().getNumDelaySlots(bn->getMachineInstr()->getOpCode()));
+                                      getInstrInfo().getNumDelaySlots(bn->getMachineInstr()->getOpCode()));
            delaySlotInfoForBranches[bn] = dinfo;
          }
        else
@@ -608,159 +575,63 @@ SchedulingManager::updateEarliestStartTimes(const SchedGraphNode* node,
       }
 }
 
-//************************* External Functions *****************************/
-
+//************************* Internal Functions *****************************/
 
-//---------------------------------------------------------------------------
-// Function: ScheduleInstructionsWithSSA
-// 
-// Purpose:
-//   Entry point for instruction scheduling on SSA form.
-//   Schedules the machine instructions generated by instruction selection.
-//   Assumes that register allocation has not been done, i.e., operands
-//   are still in SSA form.
-//---------------------------------------------------------------------------
 
-bool
-ScheduleInstructionsWithSSA(Method* method,
-                           const TargetMachine &target)
+static void
+AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue)
 {
-  SchedGraphSet graphSet(method, target);      
-  
-  if (SchedDebugLevel >= Sched_PrintSchedGraphs)
-    {
-      cout << endl << "*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING"
-          << endl;
-      graphSet.dump();
-    }
-  
-  for (SchedGraphSet::const_iterator GI=graphSet.begin();
-       GI != graphSet.end(); ++GI)
-    {
-      SchedGraph* graph = (*GI).second;
-      const vector<const BasicBlock*>& bbvec = graph->getBasicBlocks();
-      assert(bbvec.size() == 1 && "Cannot schedule multiple basic blocks");
-      const BasicBlock* bb = bbvec[0];
-      
-      if (SchedDebugLevel >= Sched_PrintSchedTrace)
-       cout << endl << "*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n";
-      
-      SchedPriorities schedPrio(method, graph);             // expensive!
-      SchedulingManager S(target, graph, schedPrio);
-      
-      ChooseInstructionsForDelaySlots(S, bb, graph); // modifies graph
-      
-      ForwardListSchedule(S);                       // computes schedule in S
-      
-      RecordSchedule((*GI).first, S);               // records schedule in BB
-    }
+  // find the slot to start from, in the current cycle
+  unsigned int startSlot = 0;
+  cycles_t curTime = S.getTime();
   
-  if (SchedDebugLevel >= Sched_PrintMachineCode)
-    {
-      cout << endl
-          << "*** Machine instructions after INSTRUCTION SCHEDULING" << endl;
-      PrintMachineInstructions(method);
-    }
+  assert(maxIssue > 0 && maxIssue <= S.nslots - startSlot);
   
-  return false;                                         // no reason to fail yet
-}
-
-
-// Check minimum gap requirements relative to instructions scheduled in
-// previous cycles.
-// Note that we do not need to consider `nextEarliestIssueTime' here because
-// that is also captured in the earliest start times for each opcode.
-// 
-static inline bool
-ViolatesMinimumGap(const SchedulingManager& S,
-                  MachineOpCode opCode,
-                  const cycles_t inCycle)
-{
-  return (inCycle < S.getEarliestStartTimeForOp(opCode));
-}
-
-
-// Check if the instruction would conflict with instructions already
-// chosen for the current cycle
-// 
-static inline bool
-ConflictsWithChoices(const SchedulingManager& S,
-                    MachineOpCode opCode)
-{
-  // Check if the instruction must issue by itself, and some feasible
-  // choices have already been made for this cycle
-  if (S.getNumChoices() > 0 && S.schedInfo.isSingleIssue(opCode))
-    return true;
+  // If only one instruction can be issued, do so.
+  if (maxIssue == 1)
+    for (unsigned s=startSlot; s < S.nslots; s++)
+      if (S.getChoicesForSlot(s).size() > 0)
+       {// found the one instruction
+         S.scheduleInstr(*S.getChoicesForSlot(s).begin(), s, curTime);
+         return;
+       }
   
-  // For each class that opCode belongs to, check if there are too many
-  // instructions of that class.
+  // Otherwise, choose from the choices for each slot
   // 
-  const InstrSchedClass sc = S.schedInfo.getSchedClass(opCode);
-  return (S.getNumChoicesInClass(sc) == S.schedInfo.getMaxIssueForClass(sc));
-}
-
-
-// Check if any issue restrictions would prevent the instruction from
-// being issued in the current cycle
-// 
-bool
-instrIsFeasible(const SchedulingManager& S,
-               MachineOpCode opCode)
-{
-  // skip the instruction if it cannot be issued due to issue restrictions
-  // caused by previously issued instructions
-  if (ViolatesMinimumGap(S, opCode, S.getTime()))
-    return false;
-  
-  // skip the instruction if it cannot be issued due to issue restrictions
-  // caused by previously chosen instructions for the current cycle
-  if (ConflictsWithChoices(S, opCode))
-    return false;
-  
-  return true;
-}
-
-
-//************************* Internal Functions *****************************/
-
-
-static void
-ForwardListSchedule(SchedulingManager& S)
-{
-  unsigned N;
-  const SchedGraphNode* node;
-  
-  S.schedPrio.initialize();
+  InstrGroup* igroup = S.isched.getIGroup(S.getTime());
+  assert(igroup != NULL && "Group creation failed?");
   
-  while ((N = S.schedPrio.getNumReady()) > 0)
+  // Find a slot that has only a single choice, and take it.
+  // If all slots have 0 or multiple choices, pick the first slot with
+  // choices and use its last instruction (just to avoid shifting the vector).
+  unsigned numIssued;
+  for (numIssued = 0; numIssued < maxIssue; numIssued++)
     {
-      // Choose one group of instructions for a cycle.  This will
-      // advance S.getTime() to the first cycle instructions can be issued.
-      // It may also schedule delay slot instructions in later cycles,
-      // but those are ignored here because they are outside the graph.
-      // 
-      unsigned numIssued = ChooseOneGroup(S);
-      assert(numIssued > 0 && "Deadlock in list scheduling algorithm?");
-      
-      // Notify the priority manager of scheduled instructions and mark
-      // any successors that may now be ready
-      // 
-      const InstrGroup* igroup = S.isched.getIGroup(S.getTime());
-      for (unsigned int s=0; s < S.nslots; s++)
-       if ((node = (*igroup)[s]) != NULL)
+      int chosenSlot = -1, chosenNodeIndex = -1;
+      for (unsigned s=startSlot; s < S.nslots; s++)
+       if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1)
          {
-           S.schedPrio.issuedReadyNodeAt(S.getTime(), node);
-           MarkSuccessorsReady(S, node);
+           chosenSlot = (int) s;
+           break;
          }
       
-      // Move to the next the next earliest cycle for which
-      // an instruction can be issued, or the next earliest in which
-      // one will be ready, or to the next cycle, whichever is latest.
-      // 
-      S.updateTime(max(S.getTime() + 1,
-                      max(S.getEarliestIssueTime(),
-                          S.schedPrio.getEarliestReadyTime())));
+      if (chosenSlot == -1)
+       for (unsigned s=startSlot; s < S.nslots; s++)
+         if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() > 0)
+           {
+             chosenSlot = (int) s;
+             break;
+           }
+      
+      if (chosenSlot != -1)
+       { // Insert the chosen instr in the chosen slot and
+         // erase it from all slots.
+         const SchedGraphNode* node= *S.getChoicesForSlot(chosenSlot).begin();
+         S.scheduleInstr(node, chosenSlot, curTime);
+       }
     }
+  
+  assert(numIssued > 0 && "Should not happen when maxIssue > 0!");
 }
 
 
@@ -809,46 +680,6 @@ RecordSchedule(const BasicBlock* bb, const SchedulingManager& S)
 }
 
 
-static unsigned
-ChooseOneGroup(SchedulingManager& S)
-{
-  assert(S.schedPrio.getNumReady() > 0
-        && "Don't get here without ready instructions.");
-  
-  DelaySlotInfo* getDelaySlotInfo = NULL;
-  
-  // Choose up to `nslots' feasible instructions and their possible slots.
-  unsigned numIssued = FindSlotChoices(S, getDelaySlotInfo);
-  
-  while (numIssued == 0)
-    {
-      S.updateTime(S.getTime()+1);
-      numIssued = FindSlotChoices(S, getDelaySlotInfo);
-    }
-  
-  AssignInstructionsToSlots(S, numIssued);
-  
-  if (getDelaySlotInfo != NULL)
-    getDelaySlotInfo->scheduleDelayedNode(S); 
-  
-  // Print trace of scheduled instructions before newly ready ones
-  if (SchedDebugLevel >= Sched_PrintSchedTrace)
-    {
-      cout << "    Cycle " << S.getTime() << " : Scheduled instructions:\n";
-      const InstrGroup* igroup = S.isched.getIGroup(S.getTime());
-      for (unsigned int s=0; s < S.nslots; s++)
-       {
-         cout << "        ";
-         if ((*igroup)[s] != NULL)
-           cout << * ((*igroup)[s])->getMachineInstr() << endl;
-         else
-           cout << "<none>" << endl;
-       }
-    }
-  
-  return numIssued;
-}
-
 
 static void
 MarkSuccessorsReady(SchedulingManager& S, const SchedGraphNode* node)
@@ -1025,7 +856,7 @@ FindSlotChoices(SchedulingManager& S,
        {
          // Try to assign every other instruction to a lower numbered
          // slot than delayedNodeSlot.
-         MachineOpCode opCode = S.getChoice(i)->getMachineInstr()->getOpCode();
+         MachineOpCode opCode =S.getChoice(i)->getMachineInstr()->getOpCode();
          bool noSlotFound = true;
          unsigned int s;
          for (s=startSlot; s < delayedNodeSlot; s++)
@@ -1093,7 +924,7 @@ FindSlotChoices(SchedulingManager& S,
       for (unsigned i=0;
           i < S.getNumChoices() && i < indexForBreakingNode; i++)
        {
-         MachineOpCode opCode = S.getChoice(i)->getMachineInstr()->getOpCode();
+         MachineOpCode opCode =S.getChoice(i)->getMachineInstr()->getOpCode();
          
          // If a higher priority instruction cannot be assigned to
          // any earlier slots, don't schedule the breaking instruction.
@@ -1144,63 +975,95 @@ FindSlotChoices(SchedulingManager& S,
 }
 
 
-static void
-AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue)
+static unsigned
+ChooseOneGroup(SchedulingManager& S)
 {
-  // find the slot to start from, in the current cycle
-  unsigned int startSlot = 0;
-  cycles_t curTime = S.getTime();
-  
-  assert(maxIssue > 0 && maxIssue <= S.nslots - startSlot);
+  assert(S.schedPrio.getNumReady() > 0
+        && "Don't get here without ready instructions.");
   
-  // If only one instruction can be issued, do so.
-  if (maxIssue == 1)
-    for (unsigned s=startSlot; s < S.nslots; s++)
-      if (S.getChoicesForSlot(s).size() > 0)
-       {// found the one instruction
-         S.scheduleInstr(*S.getChoicesForSlot(s).begin(), s, curTime);
-         return;
-       }
+  cycles_t firstCycle = S.getTime();
+  DelaySlotInfo* getDelaySlotInfo = NULL;
   
-  // Otherwise, choose from the choices for each slot
-  // 
-  InstrGroup* igroup = S.isched.getIGroup(S.getTime());
-  assert(igroup != NULL && "Group creation failed?");
+  // Choose up to `nslots' feasible instructions and their possible slots.
+  unsigned numIssued = FindSlotChoices(S, getDelaySlotInfo);
   
-  // Find a slot that has only a single choice, and take it.
-  // If all slots have 0 or multiple choices, pick the first slot with
-  // choices and use its last instruction (just to avoid shifting the vector).
-  unsigned numIssued;
-  for (numIssued = 0; numIssued < maxIssue; numIssued++)
+  while (numIssued == 0)
     {
-      int chosenSlot = -1, chosenNodeIndex = -1;
-      for (unsigned s=startSlot; s < S.nslots; s++)
-       if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1)
-         {
-           chosenSlot = (int) s;
-           break;
-         }
-      
-      if (chosenSlot == -1)
-       for (unsigned s=startSlot; s < S.nslots; s++)
-         if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() > 0)
-           {
-             chosenSlot = (int) s;
-             break;
-           }
-      
-      if (chosenSlot != -1)
-       { // Insert the chosen instr in the chosen slot and
-         // erase it from all slots.
-         const SchedGraphNode* node= *S.getChoicesForSlot(chosenSlot).begin();
-         S.scheduleInstr(node, chosenSlot, curTime);
-       }
+      S.updateTime(S.getTime()+1);
+      numIssued = FindSlotChoices(S, getDelaySlotInfo);
     }
   
-  assert(numIssued > 0 && "Should not happen when maxIssue > 0!");
+  AssignInstructionsToSlots(S, numIssued);
+  
+  if (getDelaySlotInfo != NULL)
+    numIssued += getDelaySlotInfo->scheduleDelayedNode(S); 
+  
+  // Print trace of scheduled instructions before newly ready ones
+  if (SchedDebugLevel >= Sched_PrintSchedTrace)
+    {
+      for (cycles_t c = firstCycle; c <= S.getTime(); c++)
+        {
+          cout << "    Cycle " << c << " : Scheduled instructions:\n";
+          const InstrGroup* igroup = S.isched.getIGroup(c);
+          for (unsigned int s=0; s < S.nslots; s++)
+            {
+              cout << "        ";
+              if ((*igroup)[s] != NULL)
+                cout << * ((*igroup)[s])->getMachineInstr() << endl;
+              else
+                cout << "<none>" << endl;
+            }
+        }
+    }
+  
+  return numIssued;
 }
 
 
+static void
+ForwardListSchedule(SchedulingManager& S)
+{
+  unsigned N;
+  const SchedGraphNode* node;
+  
+  S.schedPrio.initialize();
+  
+  while ((N = S.schedPrio.getNumReady()) > 0)
+    {
+      cycles_t nextCycle = S.getTime();
+      
+      // Choose one group of instructions for a cycle, plus any delay slot
+      // instructions (which may overflow into successive cycles).
+      // This will advance S.getTime() to the last cycle in which
+      // instructions are actually issued.
+      // 
+      unsigned numIssued = ChooseOneGroup(S);
+      assert(numIssued > 0 && "Deadlock in list scheduling algorithm?");
+      
+      // Notify the priority manager of scheduled instructions and mark
+      // any successors that may now be ready
+      // 
+      for (cycles_t c = nextCycle; c <= S.getTime(); c++)
+        {
+          const InstrGroup* igroup = S.isched.getIGroup(c);
+          for (unsigned int s=0; s < S.nslots; s++)
+            if ((node = (*igroup)[s]) != NULL)
+              {
+                S.schedPrio.issuedReadyNodeAt(S.getTime(), node);
+                MarkSuccessorsReady(S, node);
+              }
+        }
+      
+      // Move to the next the next earliest cycle for which
+      // an instruction can be issued, or the next earliest in which
+      // one will be ready, or to the next cycle, whichever is latest.
+      // 
+      S.updateTime(max(S.getTime() + 1,
+                      max(S.getEarliestIssueTime(),
+                          S.schedPrio.getEarliestReadyTime())));
+    }
+}
+
 
 //---------------------------------------------------------------------
 // Code for filling delay slots for delayed terminator instructions
@@ -1211,107 +1074,7 @@ AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue)
 // when we cannot find single-cycle instructions that can be reordered.
 //----------------------------------------------------------------------
 
-static void
-ChooseInstructionsForDelaySlots(SchedulingManager& S,
-                               const BasicBlock* bb,
-                               SchedGraph* graph)
-{
-  // Look for instructions that can be used for delay slots.
-  // Remove them from the graph, and mark them to be used for delay slots.
-  const MachineInstrInfo& mii = S.getInstrInfo();
-  const TerminatorInst* term = bb->getTerminator();
-  MachineCodeForVMInstr& termMvec = term->getMachineInstrVec();
-  
-  // Find the first branch instr in the sequence of machine instrs for term
-  // 
-  unsigned first = 0;
-  while (! mii.isBranch(termMvec[first]->getOpCode()))
-    ++first;
-  assert(first < termMvec.size() &&
-        "No branch instructions for BR?  Ok, but weird!  Delete assertion.");
-  if (first == termMvec.size())
-    return;
-  
-  SchedGraphNode* brNode = graph->getGraphNodeForInstr(termMvec[first]);
-  assert(! mii.isCall(brNode->getMachineInstr()->getOpCode()) && "Call used as terminator?");
-  
-  unsigned ndelays = mii.getNumDelaySlots(brNode->getMachineInstr()->getOpCode());
-  if (ndelays == 0)
-    return;
-  
-  // Use vectors to remember the nodes chosen for delay slots, and the
-  // NOPs that will be unused.  We cannot remove them from the graph while
-  // walking through the preds and succs of the brNode here, so we
-  // remember the nodes in the vectors and remove them later.
-  // We use separate vectors for the single-cycle and multi-cycle nodes,
-  // so that we can give preference to single-cycle nodes.
-  // 
-  vector<SchedGraphNode*> sdelayNodeVec;
-  vector<SchedGraphNode*> mdelayNodeVec;
-  vector<SchedGraphNode*> nopNodeVec;
-  unsigned numUseful = 0;
-  
-  sdelayNodeVec.reserve(ndelays);
-  
-  for (sg_pred_iterator P = pred_begin(brNode);
-       P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P)
-    if (! (*P)->isDummyNode() &&
-       ! mii.isNop((*P)->getMachineInstr()->getOpCode()) &&
-       NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true))
-      {
-       ++numUseful;
-       if (mii.maxLatency((*P)->getMachineInstr()->getOpCode()) > 1)
-         mdelayNodeVec.push_back(*P);
-       else
-         sdelayNodeVec.push_back(*P);
-      }
-  
-  // If not enough single-cycle instructions were found, select the
-  // lowest-latency multi-cycle instructions and use them.
-  // Note that this is the most efficient code when only 1 (or even 2)
-  // values need to be selected.
-  // 
-  while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0)
-    {
-      unsigned latency;
-      unsigned minLatency = mii.maxLatency(mdelayNodeVec[0]->getMachineInstr()->getOpCode());
-      unsigned minIndex   = 0;
-      for (unsigned i=1; i < mdelayNodeVec.size(); i++)
-       if (minLatency >=
-           (latency = mii.maxLatency(mdelayNodeVec[i]->getMachineInstr()->getOpCode())))
-         {
-           minLatency = latency;
-           minIndex   = i;
-         }
-      sdelayNodeVec.push_back(mdelayNodeVec[minIndex]);
-      if (sdelayNodeVec.size() < ndelays) // avoid the last erase!
-       mdelayNodeVec.erase(mdelayNodeVec.begin() + minIndex);
-    }
-  
-  // Now, remove the NOPs currently in delay slots from the graph.
-  // If not enough useful instructions were found, use the NOPs to
-  // fill delay slots, otherwise, just discard them.
-  for (sg_succ_iterator I=succ_begin(brNode); I != succ_end(brNode); ++I)
-    if (! (*I)->isDummyNode()
-       && mii.isNop((*I)->getMachineInstr()->getOpCode()))
-      {
-       if (sdelayNodeVec.size() < ndelays)
-         sdelayNodeVec.push_back(*I);
-       else
-         nopNodeVec.push_back(*I);
-      }
-  
-  // Mark the nodes chosen for delay slots.  This removes them from the graph.
-  for (unsigned i=0; i < sdelayNodeVec.size(); i++)
-    MarkNodeForDelaySlot(S, graph, sdelayNodeVec[i], brNode, true);
-  
-  // And remove the unused NOPs from the graph.
-  for (unsigned i=0; i < nopNodeVec.size(); i++)
-    graph->eraseIncidentEdges(nopNodeVec[i], /*addDummyEdges*/ true);
-}
-
-
-bool
+static bool
 NodeCanFillDelaySlot(const SchedulingManager& S,
                     const SchedGraphNode* node,
                     const SchedGraphNode* brNode,
@@ -1367,7 +1130,7 @@ NodeCanFillDelaySlot(const SchedulingManager& S,
 }
 
 
-void
+static void
 MarkNodeForDelaySlot(SchedulingManager& S,
                     SchedGraph* graph,
                     SchedGraphNode* node,
@@ -1391,10 +1154,173 @@ MarkNodeForDelaySlot(SchedulingManager& S,
 }
 
 
+void
+FindUsefulInstructionsForDelaySlots(SchedulingManager& S,
+                                    SchedGraphNode* brNode,
+                                    vector<SchedGraphNode*>& sdelayNodeVec)
+{
+  const MachineInstrInfo& mii = S.getInstrInfo();
+  unsigned ndelays =
+    mii.getNumDelaySlots(brNode->getMachineInstr()->getOpCode());
+  
+  if (ndelays == 0)
+    return;
+  
+  sdelayNodeVec.reserve(ndelays);
+  
+  // Use a separate vector to hold the feasible multi-cycle nodes.
+  // These will be used if not enough single-cycle nodes are found.
+  // 
+  vector<SchedGraphNode*> mdelayNodeVec;
+  
+  for (sg_pred_iterator P = pred_begin(brNode);
+       P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P)
+    if (! (*P)->isDummyNode() &&
+       ! mii.isNop((*P)->getMachineInstr()->getOpCode()) &&
+       NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true))
+      {
+       if (mii.maxLatency((*P)->getMachineInstr()->getOpCode()) > 1)
+         mdelayNodeVec.push_back(*P);
+       else
+         sdelayNodeVec.push_back(*P);
+      }
+  
+  // If not enough single-cycle instructions were found, select the
+  // lowest-latency multi-cycle instructions and use them.
+  // Note that this is the most efficient code when only 1 (or even 2)
+  // values need to be selected.
+  // 
+  while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0)
+    {
+      unsigned lmin =
+        mii.maxLatency(mdelayNodeVec[0]->getMachineInstr()->getOpCode());
+      unsigned minIndex   = 0;
+      for (unsigned i=1; i < mdelayNodeVec.size(); i++)
+        {
+          unsigned li = 
+            mii.maxLatency(mdelayNodeVec[i]->getMachineInstr()->getOpCode());
+          if (lmin >= li)
+            {
+              lmin = li;
+              minIndex = i;
+            }
+        }
+      sdelayNodeVec.push_back(mdelayNodeVec[minIndex]);
+      if (sdelayNodeVec.size() < ndelays) // avoid the last erase!
+       mdelayNodeVec.erase(mdelayNodeVec.begin() + minIndex);
+    }
+}
+
+
+// Remove the NOPs currently in delay slots from the graph.
+// Mark instructions specified in sdelayNodeVec to replace them.
+// If not enough useful instructions were found, mark the NOPs to be used
+// for filling delay slots, otherwise, otherwise just discard them.
+// 
+void
+ReplaceNopsWithUsefulInstr(SchedulingManager& S,
+                           SchedGraphNode* node,
+                           vector<SchedGraphNode*> sdelayNodeVec,
+                           SchedGraph* graph)
+{
+  vector<SchedGraphNode*> nopNodeVec;
+  const MachineInstrInfo& mii = S.getInstrInfo();
+  unsigned ndelays= mii.getNumDelaySlots(node->getMachineInstr()->getOpCode());
+  assert(ndelays > 0 && "Unnecessary call to replace NOPs");
+  
+  // Remove the NOPs currently in delay slots from the graph.
+  // If not enough useful instructions were found, use the NOPs to
+  // fill delay slots, otherwise, just discard them.
+  for (sg_succ_iterator I=succ_begin(node); I != succ_end(node); ++I)
+    if (! (*I)->isDummyNode()
+       && mii.isNop((*I)->getMachineInstr()->getOpCode()))
+      {
+       if (sdelayNodeVec.size() < ndelays)
+         sdelayNodeVec.push_back(*I);
+       else
+         nopNodeVec.push_back(*I);
+      }
+  assert(sdelayNodeVec.size() == ndelays);
+  
+  // Mark the nodes chosen for delay slots.  This removes them from the graph.
+  for (unsigned i=0; i < sdelayNodeVec.size(); i++)
+    MarkNodeForDelaySlot(S, graph, sdelayNodeVec[i], node, true);
+  
+  // And remove the unused NOPs from the graph.
+  for (unsigned i=0; i < nopNodeVec.size(); i++)
+    graph->eraseIncidentEdges(nopNodeVec[i], /*addDummyEdges*/ true);
+}
+
+
+// For all delayed instructions, choose instructions to put in the delay
+// slots and pull those out of the graph.  Mark them for the delay slots
+// in the DelaySlotInfo object for that graph node.  If no useful work
+// is found for a delay slot, use the NOP that is currently in that slot.
+// 
+// We try to fill the delay slots with useful work for all instructions
+// except CALLs.  For CALLs, it is nearly always possible to use one of the
+// call sequence instrs and putting anything else in the delay slot could be
+// suboptimal.
+// 
+static void
+ChooseInstructionsForDelaySlots(SchedulingManager& S,
+                               const BasicBlock* bb,
+                               SchedGraph* graph)
+{
+  const MachineInstrInfo& mii = S.getInstrInfo();
+  const TerminatorInst* termInstr = bb->getTerminator();
+  MachineCodeForVMInstr& termMvec = termInstr->getMachineInstrVec();
+  vector<SchedGraphNode*> delayNodeVec;
+  const MachineInstr* brInstr;
+  
+  assert(termInstr->getOpcode() != Instruction::Call
+         && "Call used as terminator?");
+  
+  // To find instructions that need delay slots without searching the entire
+  // machine code, we assume the only delayed instructions are CALLs or
+  // instructions generated for the terminator inst.
+  // Find the first branch instr in the sequence of machine instrs for term
+  // 
+  unsigned first = 0;
+  while (first < termMvec.size() &&
+         ! mii.isBranch(termMvec[first]->getOpCode()))
+    {
+      ++first;
+    }
+  assert(first < termMvec.size() &&
+        "No branch instructions for BR?  Ok, but weird!  Delete assertion.");
+  
+  brInstr = (first < termMvec.size())? termMvec[first] : NULL;
+  
+  // Compute a vector of the nodes chosen for delay slots and then
+  // mark delay slots to replace NOPs with these useful instructions.
+  // 
+  if (brInstr != NULL)
+    {
+      SchedGraphNode* brNode = graph->getGraphNodeForInstr(brInstr);
+      FindUsefulInstructionsForDelaySlots(S, brNode, delayNodeVec);
+      ReplaceNopsWithUsefulInstr(S, brNode, delayNodeVec, graph);
+    }
+  
+  // Also mark delay slots for other delayed instructions to hold NOPs. 
+  // Simply passing in an empty delayNodeVec will have this effect.
+  // 
+  delayNodeVec.clear();
+  const MachineCodeForBasicBlock& bbMvec = bb->getMachineInstrVec();
+  for (unsigned i=0; i < bbMvec.size(); i++)
+    if (bbMvec[i] != brInstr &&
+        mii.getNumDelaySlots(bbMvec[i]->getOpCode()) > 0)
+      {
+        SchedGraphNode* node = graph->getGraphNodeForInstr(bbMvec[i]);
+        ReplaceNopsWithUsefulInstr(S, node, delayNodeVec, graph);
+      }
+}
+
+
 // 
 // Schedule the delayed branch and its delay slots
 // 
-void
+unsigned
 DelaySlotInfo::scheduleDelayedNode(SchedulingManager& S)
 {
   assert(delayedNodeSlotNum < S.nslots && "Illegal slot for branch");
@@ -1461,5 +1387,130 @@ DelaySlotInfo::scheduleDelayedNode(SchedulingManager& S)
        S.scheduleInstr(delayNodeVec[i], nextSlot, nextTime);
        break;
       }
+
+  return 1 + ndelays;
+}
+
+
+// Check if the instruction would conflict with instructions already
+// chosen for the current cycle
+// 
+static inline bool
+ConflictsWithChoices(const SchedulingManager& S,
+                    MachineOpCode opCode)
+{
+  // Check if the instruction must issue by itself, and some feasible
+  // choices have already been made for this cycle
+  if (S.getNumChoices() > 0 && S.schedInfo.isSingleIssue(opCode))
+    return true;
+  
+  // For each class that opCode belongs to, check if there are too many
+  // instructions of that class.
+  // 
+  const InstrSchedClass sc = S.schedInfo.getSchedClass(opCode);
+  return (S.getNumChoicesInClass(sc) == S.schedInfo.getMaxIssueForClass(sc));
 }
 
+
+//************************* External Functions *****************************/
+
+
+//---------------------------------------------------------------------------
+// Function: ViolatesMinimumGap
+// 
+// Purpose:
+//   Check minimum gap requirements relative to instructions scheduled in
+//   previous cycles.
+//   Note that we do not need to consider `nextEarliestIssueTime' here because
+//   that is also captured in the earliest start times for each opcode.
+//---------------------------------------------------------------------------
+
+static inline bool
+ViolatesMinimumGap(const SchedulingManager& S,
+                  MachineOpCode opCode,
+                  const cycles_t inCycle)
+{
+  return (inCycle < S.getEarliestStartTimeForOp(opCode));
+}
+
+
+//---------------------------------------------------------------------------
+// Function: instrIsFeasible
+// 
+// Purpose:
+//   Check if any issue restrictions would prevent the instruction from
+//   being issued in the current cycle
+//---------------------------------------------------------------------------
+
+bool
+instrIsFeasible(const SchedulingManager& S,
+               MachineOpCode opCode)
+{
+  // skip the instruction if it cannot be issued due to issue restrictions
+  // caused by previously issued instructions
+  if (ViolatesMinimumGap(S, opCode, S.getTime()))
+    return false;
+  
+  // skip the instruction if it cannot be issued due to issue restrictions
+  // caused by previously chosen instructions for the current cycle
+  if (ConflictsWithChoices(S, opCode))
+    return false;
+  
+  return true;
+}
+
+//---------------------------------------------------------------------------
+// Function: ScheduleInstructionsWithSSA
+// 
+// Purpose:
+//   Entry point for instruction scheduling on SSA form.
+//   Schedules the machine instructions generated by instruction selection.
+//   Assumes that register allocation has not been done, i.e., operands
+//   are still in SSA form.
+//---------------------------------------------------------------------------
+
+bool
+ScheduleInstructionsWithSSA(Method* method,
+                           const TargetMachine &target)
+{
+  SchedGraphSet graphSet(method, target);      
+  
+  if (SchedDebugLevel >= Sched_PrintSchedGraphs)
+    {
+      cout << endl << "*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING"
+          << endl;
+      graphSet.dump();
+    }
+  
+  for (SchedGraphSet::const_iterator GI=graphSet.begin();
+       GI != graphSet.end(); ++GI)
+    {
+      SchedGraph* graph = (*GI).second;
+      const vector<const BasicBlock*>& bbvec = graph->getBasicBlocks();
+      assert(bbvec.size() == 1 && "Cannot schedule multiple basic blocks");
+      const BasicBlock* bb = bbvec[0];
+      
+      if (SchedDebugLevel >= Sched_PrintSchedTrace)
+       cout << endl << "*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n";
+      
+      SchedPriorities schedPrio(method, graph);             // expensive!
+      SchedulingManager S(target, graph, schedPrio);
+      
+      ChooseInstructionsForDelaySlots(S, bb, graph); // modifies graph
+      
+      ForwardListSchedule(S);                       // computes schedule in S
+      
+      RecordSchedule((*GI).first, S);               // records schedule in BB
+    }
+  
+  if (SchedDebugLevel >= Sched_PrintMachineCode)
+    {
+      cout << endl
+          << "*** Machine instructions after INSTRUCTION SCHEDULING" << endl;
+      PrintMachineInstructions(method);
+    }
+  
+  return false;                                         // no reason to fail yet
+}
+
+
index 34802c9e5bb7ab89575d44596a4a65d25bd086bf..0b194207ca46aaf8ac2016ad60b52eed4a8844f3 100644 (file)
@@ -9,16 +9,26 @@
 //     7/23/01  -  Vikram Adve  -  Created
 //**************************************************************************/
 
+
+//************************* User Include Files *****************************/
+
 #include "llvm/CodeGen/InstrScheduling.h"
 #include "SchedPriorities.h"
 #include "llvm/Analysis/LiveVar/BBLiveVar.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Instruction.h"
+
+
+//************************ System Include Files *****************************/
+
 #include <hash_set>
 #include <algorithm>
 #include <iterator>
 
+
+//************************* External Data Types *****************************/
+
 cl::Enum<enum SchedDebugLevel_t> SchedDebugLevel("dsched", cl::NoFlags,
   "enable instruction scheduling debugging information",
   clEnumValN(Sched_NoDebugInfo,      "n", "disable debug output"),
@@ -27,55 +37,12 @@ cl::Enum<enum SchedDebugLevel_t> SchedDebugLevel("dsched", cl::NoFlags,
   clEnumValN(Sched_PrintSchedGraphs, "g", "print scheduling graphs"), 0);
 
 
+//************************* Internal Data Types *****************************/
+
 class InstrSchedule;
 class SchedulingManager;
 class DelaySlotInfo;
 
-static void    ForwardListSchedule     (SchedulingManager& S);
-
-static void    RecordSchedule          (const BasicBlock* bb,
-                                        const SchedulingManager& S);
-
-static unsigned        ChooseOneGroup          (SchedulingManager& S);
-
-static void    MarkSuccessorsReady     (SchedulingManager& S,
-                                        const SchedGraphNode* node);
-
-static unsigned        FindSlotChoices         (SchedulingManager& S,
-                                        DelaySlotInfo*& getDelaySlotInfo);
-
-static void    AssignInstructionsToSlots(class SchedulingManager& S,
-                                         unsigned maxIssue);
-
-static void    ScheduleInstr           (class SchedulingManager& S,
-                                        const SchedGraphNode* node,
-                                        unsigned int slotNum,
-                                        cycles_t curTime);
-
-static bool    ViolatesMinimumGap      (const SchedulingManager& S,
-                                        MachineOpCode opCode,
-                                        const cycles_t inCycle);
-
-static bool    ConflictsWithChoices    (const SchedulingManager& S,
-                                        MachineOpCode opCode);
-
-static void    ChooseInstructionsForDelaySlots(SchedulingManager& S,
-                                        const BasicBlock* bb,
-                                        SchedGraph* graph);
-
-static bool    NodeCanFillDelaySlot    (const SchedulingManager& S,
-                                        const SchedGraphNode* node,
-                                        const SchedGraphNode* brNode,
-                                        bool nodeIsPredecessor);
-
-static void    MarkNodeForDelaySlot    (SchedulingManager& S,
-                                        SchedGraph* graph,
-                                        SchedGraphNode* node,
-                                        const SchedGraphNode* brNode,
-                                        bool nodeIsPredecessor);
-
-//************************* Internal Data Types *****************************/
-
 
 //----------------------------------------------------------------------
 // class InstrGroup:
@@ -369,7 +336,7 @@ public:
     delayedNodeSlotNum = slotNum;
   }
   
-  void         scheduleDelayedNode     (SchedulingManager& S);
+  unsigned     scheduleDelayedNode     (SchedulingManager& S);
 };
 
 
@@ -539,7 +506,7 @@ public:
        if (createIfMissing)
          {
            dinfo = new DelaySlotInfo(bn,
-                         getInstrInfo().getNumDelaySlots(bn->getMachineInstr()->getOpCode()));
+                                      getInstrInfo().getNumDelaySlots(bn->getMachineInstr()->getOpCode()));
            delaySlotInfoForBranches[bn] = dinfo;
          }
        else
@@ -608,159 +575,63 @@ SchedulingManager::updateEarliestStartTimes(const SchedGraphNode* node,
       }
 }
 
-//************************* External Functions *****************************/
-
+//************************* Internal Functions *****************************/
 
-//---------------------------------------------------------------------------
-// Function: ScheduleInstructionsWithSSA
-// 
-// Purpose:
-//   Entry point for instruction scheduling on SSA form.
-//   Schedules the machine instructions generated by instruction selection.
-//   Assumes that register allocation has not been done, i.e., operands
-//   are still in SSA form.
-//---------------------------------------------------------------------------
 
-bool
-ScheduleInstructionsWithSSA(Method* method,
-                           const TargetMachine &target)
+static void
+AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue)
 {
-  SchedGraphSet graphSet(method, target);      
-  
-  if (SchedDebugLevel >= Sched_PrintSchedGraphs)
-    {
-      cout << endl << "*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING"
-          << endl;
-      graphSet.dump();
-    }
-  
-  for (SchedGraphSet::const_iterator GI=graphSet.begin();
-       GI != graphSet.end(); ++GI)
-    {
-      SchedGraph* graph = (*GI).second;
-      const vector<const BasicBlock*>& bbvec = graph->getBasicBlocks();
-      assert(bbvec.size() == 1 && "Cannot schedule multiple basic blocks");
-      const BasicBlock* bb = bbvec[0];
-      
-      if (SchedDebugLevel >= Sched_PrintSchedTrace)
-       cout << endl << "*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n";
-      
-      SchedPriorities schedPrio(method, graph);             // expensive!
-      SchedulingManager S(target, graph, schedPrio);
-      
-      ChooseInstructionsForDelaySlots(S, bb, graph); // modifies graph
-      
-      ForwardListSchedule(S);                       // computes schedule in S
-      
-      RecordSchedule((*GI).first, S);               // records schedule in BB
-    }
+  // find the slot to start from, in the current cycle
+  unsigned int startSlot = 0;
+  cycles_t curTime = S.getTime();
   
-  if (SchedDebugLevel >= Sched_PrintMachineCode)
-    {
-      cout << endl
-          << "*** Machine instructions after INSTRUCTION SCHEDULING" << endl;
-      PrintMachineInstructions(method);
-    }
+  assert(maxIssue > 0 && maxIssue <= S.nslots - startSlot);
   
-  return false;                                         // no reason to fail yet
-}
-
-
-// Check minimum gap requirements relative to instructions scheduled in
-// previous cycles.
-// Note that we do not need to consider `nextEarliestIssueTime' here because
-// that is also captured in the earliest start times for each opcode.
-// 
-static inline bool
-ViolatesMinimumGap(const SchedulingManager& S,
-                  MachineOpCode opCode,
-                  const cycles_t inCycle)
-{
-  return (inCycle < S.getEarliestStartTimeForOp(opCode));
-}
-
-
-// Check if the instruction would conflict with instructions already
-// chosen for the current cycle
-// 
-static inline bool
-ConflictsWithChoices(const SchedulingManager& S,
-                    MachineOpCode opCode)
-{
-  // Check if the instruction must issue by itself, and some feasible
-  // choices have already been made for this cycle
-  if (S.getNumChoices() > 0 && S.schedInfo.isSingleIssue(opCode))
-    return true;
+  // If only one instruction can be issued, do so.
+  if (maxIssue == 1)
+    for (unsigned s=startSlot; s < S.nslots; s++)
+      if (S.getChoicesForSlot(s).size() > 0)
+       {// found the one instruction
+         S.scheduleInstr(*S.getChoicesForSlot(s).begin(), s, curTime);
+         return;
+       }
   
-  // For each class that opCode belongs to, check if there are too many
-  // instructions of that class.
+  // Otherwise, choose from the choices for each slot
   // 
-  const InstrSchedClass sc = S.schedInfo.getSchedClass(opCode);
-  return (S.getNumChoicesInClass(sc) == S.schedInfo.getMaxIssueForClass(sc));
-}
-
-
-// Check if any issue restrictions would prevent the instruction from
-// being issued in the current cycle
-// 
-bool
-instrIsFeasible(const SchedulingManager& S,
-               MachineOpCode opCode)
-{
-  // skip the instruction if it cannot be issued due to issue restrictions
-  // caused by previously issued instructions
-  if (ViolatesMinimumGap(S, opCode, S.getTime()))
-    return false;
-  
-  // skip the instruction if it cannot be issued due to issue restrictions
-  // caused by previously chosen instructions for the current cycle
-  if (ConflictsWithChoices(S, opCode))
-    return false;
-  
-  return true;
-}
-
-
-//************************* Internal Functions *****************************/
-
-
-static void
-ForwardListSchedule(SchedulingManager& S)
-{
-  unsigned N;
-  const SchedGraphNode* node;
-  
-  S.schedPrio.initialize();
+  InstrGroup* igroup = S.isched.getIGroup(S.getTime());
+  assert(igroup != NULL && "Group creation failed?");
   
-  while ((N = S.schedPrio.getNumReady()) > 0)
+  // Find a slot that has only a single choice, and take it.
+  // If all slots have 0 or multiple choices, pick the first slot with
+  // choices and use its last instruction (just to avoid shifting the vector).
+  unsigned numIssued;
+  for (numIssued = 0; numIssued < maxIssue; numIssued++)
     {
-      // Choose one group of instructions for a cycle.  This will
-      // advance S.getTime() to the first cycle instructions can be issued.
-      // It may also schedule delay slot instructions in later cycles,
-      // but those are ignored here because they are outside the graph.
-      // 
-      unsigned numIssued = ChooseOneGroup(S);
-      assert(numIssued > 0 && "Deadlock in list scheduling algorithm?");
-      
-      // Notify the priority manager of scheduled instructions and mark
-      // any successors that may now be ready
-      // 
-      const InstrGroup* igroup = S.isched.getIGroup(S.getTime());
-      for (unsigned int s=0; s < S.nslots; s++)
-       if ((node = (*igroup)[s]) != NULL)
+      int chosenSlot = -1, chosenNodeIndex = -1;
+      for (unsigned s=startSlot; s < S.nslots; s++)
+       if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1)
          {
-           S.schedPrio.issuedReadyNodeAt(S.getTime(), node);
-           MarkSuccessorsReady(S, node);
+           chosenSlot = (int) s;
+           break;
          }
       
-      // Move to the next the next earliest cycle for which
-      // an instruction can be issued, or the next earliest in which
-      // one will be ready, or to the next cycle, whichever is latest.
-      // 
-      S.updateTime(max(S.getTime() + 1,
-                      max(S.getEarliestIssueTime(),
-                          S.schedPrio.getEarliestReadyTime())));
+      if (chosenSlot == -1)
+       for (unsigned s=startSlot; s < S.nslots; s++)
+         if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() > 0)
+           {
+             chosenSlot = (int) s;
+             break;
+           }
+      
+      if (chosenSlot != -1)
+       { // Insert the chosen instr in the chosen slot and
+         // erase it from all slots.
+         const SchedGraphNode* node= *S.getChoicesForSlot(chosenSlot).begin();
+         S.scheduleInstr(node, chosenSlot, curTime);
+       }
     }
+  
+  assert(numIssued > 0 && "Should not happen when maxIssue > 0!");
 }
 
 
@@ -809,46 +680,6 @@ RecordSchedule(const BasicBlock* bb, const SchedulingManager& S)
 }
 
 
-static unsigned
-ChooseOneGroup(SchedulingManager& S)
-{
-  assert(S.schedPrio.getNumReady() > 0
-        && "Don't get here without ready instructions.");
-  
-  DelaySlotInfo* getDelaySlotInfo = NULL;
-  
-  // Choose up to `nslots' feasible instructions and their possible slots.
-  unsigned numIssued = FindSlotChoices(S, getDelaySlotInfo);
-  
-  while (numIssued == 0)
-    {
-      S.updateTime(S.getTime()+1);
-      numIssued = FindSlotChoices(S, getDelaySlotInfo);
-    }
-  
-  AssignInstructionsToSlots(S, numIssued);
-  
-  if (getDelaySlotInfo != NULL)
-    getDelaySlotInfo->scheduleDelayedNode(S); 
-  
-  // Print trace of scheduled instructions before newly ready ones
-  if (SchedDebugLevel >= Sched_PrintSchedTrace)
-    {
-      cout << "    Cycle " << S.getTime() << " : Scheduled instructions:\n";
-      const InstrGroup* igroup = S.isched.getIGroup(S.getTime());
-      for (unsigned int s=0; s < S.nslots; s++)
-       {
-         cout << "        ";
-         if ((*igroup)[s] != NULL)
-           cout << * ((*igroup)[s])->getMachineInstr() << endl;
-         else
-           cout << "<none>" << endl;
-       }
-    }
-  
-  return numIssued;
-}
-
 
 static void
 MarkSuccessorsReady(SchedulingManager& S, const SchedGraphNode* node)
@@ -1025,7 +856,7 @@ FindSlotChoices(SchedulingManager& S,
        {
          // Try to assign every other instruction to a lower numbered
          // slot than delayedNodeSlot.
-         MachineOpCode opCode = S.getChoice(i)->getMachineInstr()->getOpCode();
+         MachineOpCode opCode =S.getChoice(i)->getMachineInstr()->getOpCode();
          bool noSlotFound = true;
          unsigned int s;
          for (s=startSlot; s < delayedNodeSlot; s++)
@@ -1093,7 +924,7 @@ FindSlotChoices(SchedulingManager& S,
       for (unsigned i=0;
           i < S.getNumChoices() && i < indexForBreakingNode; i++)
        {
-         MachineOpCode opCode = S.getChoice(i)->getMachineInstr()->getOpCode();
+         MachineOpCode opCode =S.getChoice(i)->getMachineInstr()->getOpCode();
          
          // If a higher priority instruction cannot be assigned to
          // any earlier slots, don't schedule the breaking instruction.
@@ -1144,63 +975,95 @@ FindSlotChoices(SchedulingManager& S,
 }
 
 
-static void
-AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue)
+static unsigned
+ChooseOneGroup(SchedulingManager& S)
 {
-  // find the slot to start from, in the current cycle
-  unsigned int startSlot = 0;
-  cycles_t curTime = S.getTime();
-  
-  assert(maxIssue > 0 && maxIssue <= S.nslots - startSlot);
+  assert(S.schedPrio.getNumReady() > 0
+        && "Don't get here without ready instructions.");
   
-  // If only one instruction can be issued, do so.
-  if (maxIssue == 1)
-    for (unsigned s=startSlot; s < S.nslots; s++)
-      if (S.getChoicesForSlot(s).size() > 0)
-       {// found the one instruction
-         S.scheduleInstr(*S.getChoicesForSlot(s).begin(), s, curTime);
-         return;
-       }
+  cycles_t firstCycle = S.getTime();
+  DelaySlotInfo* getDelaySlotInfo = NULL;
   
-  // Otherwise, choose from the choices for each slot
-  // 
-  InstrGroup* igroup = S.isched.getIGroup(S.getTime());
-  assert(igroup != NULL && "Group creation failed?");
+  // Choose up to `nslots' feasible instructions and their possible slots.
+  unsigned numIssued = FindSlotChoices(S, getDelaySlotInfo);
   
-  // Find a slot that has only a single choice, and take it.
-  // If all slots have 0 or multiple choices, pick the first slot with
-  // choices and use its last instruction (just to avoid shifting the vector).
-  unsigned numIssued;
-  for (numIssued = 0; numIssued < maxIssue; numIssued++)
+  while (numIssued == 0)
     {
-      int chosenSlot = -1, chosenNodeIndex = -1;
-      for (unsigned s=startSlot; s < S.nslots; s++)
-       if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1)
-         {
-           chosenSlot = (int) s;
-           break;
-         }
-      
-      if (chosenSlot == -1)
-       for (unsigned s=startSlot; s < S.nslots; s++)
-         if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() > 0)
-           {
-             chosenSlot = (int) s;
-             break;
-           }
-      
-      if (chosenSlot != -1)
-       { // Insert the chosen instr in the chosen slot and
-         // erase it from all slots.
-         const SchedGraphNode* node= *S.getChoicesForSlot(chosenSlot).begin();
-         S.scheduleInstr(node, chosenSlot, curTime);
-       }
+      S.updateTime(S.getTime()+1);
+      numIssued = FindSlotChoices(S, getDelaySlotInfo);
     }
   
-  assert(numIssued > 0 && "Should not happen when maxIssue > 0!");
+  AssignInstructionsToSlots(S, numIssued);
+  
+  if (getDelaySlotInfo != NULL)
+    numIssued += getDelaySlotInfo->scheduleDelayedNode(S); 
+  
+  // Print trace of scheduled instructions before newly ready ones
+  if (SchedDebugLevel >= Sched_PrintSchedTrace)
+    {
+      for (cycles_t c = firstCycle; c <= S.getTime(); c++)
+        {
+          cout << "    Cycle " << c << " : Scheduled instructions:\n";
+          const InstrGroup* igroup = S.isched.getIGroup(c);
+          for (unsigned int s=0; s < S.nslots; s++)
+            {
+              cout << "        ";
+              if ((*igroup)[s] != NULL)
+                cout << * ((*igroup)[s])->getMachineInstr() << endl;
+              else
+                cout << "<none>" << endl;
+            }
+        }
+    }
+  
+  return numIssued;
 }
 
 
+static void
+ForwardListSchedule(SchedulingManager& S)
+{
+  unsigned N;
+  const SchedGraphNode* node;
+  
+  S.schedPrio.initialize();
+  
+  while ((N = S.schedPrio.getNumReady()) > 0)
+    {
+      cycles_t nextCycle = S.getTime();
+      
+      // Choose one group of instructions for a cycle, plus any delay slot
+      // instructions (which may overflow into successive cycles).
+      // This will advance S.getTime() to the last cycle in which
+      // instructions are actually issued.
+      // 
+      unsigned numIssued = ChooseOneGroup(S);
+      assert(numIssued > 0 && "Deadlock in list scheduling algorithm?");
+      
+      // Notify the priority manager of scheduled instructions and mark
+      // any successors that may now be ready
+      // 
+      for (cycles_t c = nextCycle; c <= S.getTime(); c++)
+        {
+          const InstrGroup* igroup = S.isched.getIGroup(c);
+          for (unsigned int s=0; s < S.nslots; s++)
+            if ((node = (*igroup)[s]) != NULL)
+              {
+                S.schedPrio.issuedReadyNodeAt(S.getTime(), node);
+                MarkSuccessorsReady(S, node);
+              }
+        }
+      
+      // Move to the next the next earliest cycle for which
+      // an instruction can be issued, or the next earliest in which
+      // one will be ready, or to the next cycle, whichever is latest.
+      // 
+      S.updateTime(max(S.getTime() + 1,
+                      max(S.getEarliestIssueTime(),
+                          S.schedPrio.getEarliestReadyTime())));
+    }
+}
+
 
 //---------------------------------------------------------------------
 // Code for filling delay slots for delayed terminator instructions
@@ -1211,107 +1074,7 @@ AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue)
 // when we cannot find single-cycle instructions that can be reordered.
 //----------------------------------------------------------------------
 
-static void
-ChooseInstructionsForDelaySlots(SchedulingManager& S,
-                               const BasicBlock* bb,
-                               SchedGraph* graph)
-{
-  // Look for instructions that can be used for delay slots.
-  // Remove them from the graph, and mark them to be used for delay slots.
-  const MachineInstrInfo& mii = S.getInstrInfo();
-  const TerminatorInst* term = bb->getTerminator();
-  MachineCodeForVMInstr& termMvec = term->getMachineInstrVec();
-  
-  // Find the first branch instr in the sequence of machine instrs for term
-  // 
-  unsigned first = 0;
-  while (! mii.isBranch(termMvec[first]->getOpCode()))
-    ++first;
-  assert(first < termMvec.size() &&
-        "No branch instructions for BR?  Ok, but weird!  Delete assertion.");
-  if (first == termMvec.size())
-    return;
-  
-  SchedGraphNode* brNode = graph->getGraphNodeForInstr(termMvec[first]);
-  assert(! mii.isCall(brNode->getMachineInstr()->getOpCode()) && "Call used as terminator?");
-  
-  unsigned ndelays = mii.getNumDelaySlots(brNode->getMachineInstr()->getOpCode());
-  if (ndelays == 0)
-    return;
-  
-  // Use vectors to remember the nodes chosen for delay slots, and the
-  // NOPs that will be unused.  We cannot remove them from the graph while
-  // walking through the preds and succs of the brNode here, so we
-  // remember the nodes in the vectors and remove them later.
-  // We use separate vectors for the single-cycle and multi-cycle nodes,
-  // so that we can give preference to single-cycle nodes.
-  // 
-  vector<SchedGraphNode*> sdelayNodeVec;
-  vector<SchedGraphNode*> mdelayNodeVec;
-  vector<SchedGraphNode*> nopNodeVec;
-  unsigned numUseful = 0;
-  
-  sdelayNodeVec.reserve(ndelays);
-  
-  for (sg_pred_iterator P = pred_begin(brNode);
-       P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P)
-    if (! (*P)->isDummyNode() &&
-       ! mii.isNop((*P)->getMachineInstr()->getOpCode()) &&
-       NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true))
-      {
-       ++numUseful;
-       if (mii.maxLatency((*P)->getMachineInstr()->getOpCode()) > 1)
-         mdelayNodeVec.push_back(*P);
-       else
-         sdelayNodeVec.push_back(*P);
-      }
-  
-  // If not enough single-cycle instructions were found, select the
-  // lowest-latency multi-cycle instructions and use them.
-  // Note that this is the most efficient code when only 1 (or even 2)
-  // values need to be selected.
-  // 
-  while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0)
-    {
-      unsigned latency;
-      unsigned minLatency = mii.maxLatency(mdelayNodeVec[0]->getMachineInstr()->getOpCode());
-      unsigned minIndex   = 0;
-      for (unsigned i=1; i < mdelayNodeVec.size(); i++)
-       if (minLatency >=
-           (latency = mii.maxLatency(mdelayNodeVec[i]->getMachineInstr()->getOpCode())))
-         {
-           minLatency = latency;
-           minIndex   = i;
-         }
-      sdelayNodeVec.push_back(mdelayNodeVec[minIndex]);
-      if (sdelayNodeVec.size() < ndelays) // avoid the last erase!
-       mdelayNodeVec.erase(mdelayNodeVec.begin() + minIndex);
-    }
-  
-  // Now, remove the NOPs currently in delay slots from the graph.
-  // If not enough useful instructions were found, use the NOPs to
-  // fill delay slots, otherwise, just discard them.
-  for (sg_succ_iterator I=succ_begin(brNode); I != succ_end(brNode); ++I)
-    if (! (*I)->isDummyNode()
-       && mii.isNop((*I)->getMachineInstr()->getOpCode()))
-      {
-       if (sdelayNodeVec.size() < ndelays)
-         sdelayNodeVec.push_back(*I);
-       else
-         nopNodeVec.push_back(*I);
-      }
-  
-  // Mark the nodes chosen for delay slots.  This removes them from the graph.
-  for (unsigned i=0; i < sdelayNodeVec.size(); i++)
-    MarkNodeForDelaySlot(S, graph, sdelayNodeVec[i], brNode, true);
-  
-  // And remove the unused NOPs from the graph.
-  for (unsigned i=0; i < nopNodeVec.size(); i++)
-    graph->eraseIncidentEdges(nopNodeVec[i], /*addDummyEdges*/ true);
-}
-
-
-bool
+static bool
 NodeCanFillDelaySlot(const SchedulingManager& S,
                     const SchedGraphNode* node,
                     const SchedGraphNode* brNode,
@@ -1367,7 +1130,7 @@ NodeCanFillDelaySlot(const SchedulingManager& S,
 }
 
 
-void
+static void
 MarkNodeForDelaySlot(SchedulingManager& S,
                     SchedGraph* graph,
                     SchedGraphNode* node,
@@ -1391,10 +1154,173 @@ MarkNodeForDelaySlot(SchedulingManager& S,
 }
 
 
+void
+FindUsefulInstructionsForDelaySlots(SchedulingManager& S,
+                                    SchedGraphNode* brNode,
+                                    vector<SchedGraphNode*>& sdelayNodeVec)
+{
+  const MachineInstrInfo& mii = S.getInstrInfo();
+  unsigned ndelays =
+    mii.getNumDelaySlots(brNode->getMachineInstr()->getOpCode());
+  
+  if (ndelays == 0)
+    return;
+  
+  sdelayNodeVec.reserve(ndelays);
+  
+  // Use a separate vector to hold the feasible multi-cycle nodes.
+  // These will be used if not enough single-cycle nodes are found.
+  // 
+  vector<SchedGraphNode*> mdelayNodeVec;
+  
+  for (sg_pred_iterator P = pred_begin(brNode);
+       P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P)
+    if (! (*P)->isDummyNode() &&
+       ! mii.isNop((*P)->getMachineInstr()->getOpCode()) &&
+       NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true))
+      {
+       if (mii.maxLatency((*P)->getMachineInstr()->getOpCode()) > 1)
+         mdelayNodeVec.push_back(*P);
+       else
+         sdelayNodeVec.push_back(*P);
+      }
+  
+  // If not enough single-cycle instructions were found, select the
+  // lowest-latency multi-cycle instructions and use them.
+  // Note that this is the most efficient code when only 1 (or even 2)
+  // values need to be selected.
+  // 
+  while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0)
+    {
+      unsigned lmin =
+        mii.maxLatency(mdelayNodeVec[0]->getMachineInstr()->getOpCode());
+      unsigned minIndex   = 0;
+      for (unsigned i=1; i < mdelayNodeVec.size(); i++)
+        {
+          unsigned li = 
+            mii.maxLatency(mdelayNodeVec[i]->getMachineInstr()->getOpCode());
+          if (lmin >= li)
+            {
+              lmin = li;
+              minIndex = i;
+            }
+        }
+      sdelayNodeVec.push_back(mdelayNodeVec[minIndex]);
+      if (sdelayNodeVec.size() < ndelays) // avoid the last erase!
+       mdelayNodeVec.erase(mdelayNodeVec.begin() + minIndex);
+    }
+}
+
+
+// Remove the NOPs currently in delay slots from the graph.
+// Mark instructions specified in sdelayNodeVec to replace them.
+// If not enough useful instructions were found, mark the NOPs to be used
+// for filling delay slots, otherwise, otherwise just discard them.
+// 
+void
+ReplaceNopsWithUsefulInstr(SchedulingManager& S,
+                           SchedGraphNode* node,
+                           vector<SchedGraphNode*> sdelayNodeVec,
+                           SchedGraph* graph)
+{
+  vector<SchedGraphNode*> nopNodeVec;
+  const MachineInstrInfo& mii = S.getInstrInfo();
+  unsigned ndelays= mii.getNumDelaySlots(node->getMachineInstr()->getOpCode());
+  assert(ndelays > 0 && "Unnecessary call to replace NOPs");
+  
+  // Remove the NOPs currently in delay slots from the graph.
+  // If not enough useful instructions were found, use the NOPs to
+  // fill delay slots, otherwise, just discard them.
+  for (sg_succ_iterator I=succ_begin(node); I != succ_end(node); ++I)
+    if (! (*I)->isDummyNode()
+       && mii.isNop((*I)->getMachineInstr()->getOpCode()))
+      {
+       if (sdelayNodeVec.size() < ndelays)
+         sdelayNodeVec.push_back(*I);
+       else
+         nopNodeVec.push_back(*I);
+      }
+  assert(sdelayNodeVec.size() == ndelays);
+  
+  // Mark the nodes chosen for delay slots.  This removes them from the graph.
+  for (unsigned i=0; i < sdelayNodeVec.size(); i++)
+    MarkNodeForDelaySlot(S, graph, sdelayNodeVec[i], node, true);
+  
+  // And remove the unused NOPs from the graph.
+  for (unsigned i=0; i < nopNodeVec.size(); i++)
+    graph->eraseIncidentEdges(nopNodeVec[i], /*addDummyEdges*/ true);
+}
+
+
+// For all delayed instructions, choose instructions to put in the delay
+// slots and pull those out of the graph.  Mark them for the delay slots
+// in the DelaySlotInfo object for that graph node.  If no useful work
+// is found for a delay slot, use the NOP that is currently in that slot.
+// 
+// We try to fill the delay slots with useful work for all instructions
+// except CALLs.  For CALLs, it is nearly always possible to use one of the
+// call sequence instrs and putting anything else in the delay slot could be
+// suboptimal.
+// 
+static void
+ChooseInstructionsForDelaySlots(SchedulingManager& S,
+                               const BasicBlock* bb,
+                               SchedGraph* graph)
+{
+  const MachineInstrInfo& mii = S.getInstrInfo();
+  const TerminatorInst* termInstr = bb->getTerminator();
+  MachineCodeForVMInstr& termMvec = termInstr->getMachineInstrVec();
+  vector<SchedGraphNode*> delayNodeVec;
+  const MachineInstr* brInstr;
+  
+  assert(termInstr->getOpcode() != Instruction::Call
+         && "Call used as terminator?");
+  
+  // To find instructions that need delay slots without searching the entire
+  // machine code, we assume the only delayed instructions are CALLs or
+  // instructions generated for the terminator inst.
+  // Find the first branch instr in the sequence of machine instrs for term
+  // 
+  unsigned first = 0;
+  while (first < termMvec.size() &&
+         ! mii.isBranch(termMvec[first]->getOpCode()))
+    {
+      ++first;
+    }
+  assert(first < termMvec.size() &&
+        "No branch instructions for BR?  Ok, but weird!  Delete assertion.");
+  
+  brInstr = (first < termMvec.size())? termMvec[first] : NULL;
+  
+  // Compute a vector of the nodes chosen for delay slots and then
+  // mark delay slots to replace NOPs with these useful instructions.
+  // 
+  if (brInstr != NULL)
+    {
+      SchedGraphNode* brNode = graph->getGraphNodeForInstr(brInstr);
+      FindUsefulInstructionsForDelaySlots(S, brNode, delayNodeVec);
+      ReplaceNopsWithUsefulInstr(S, brNode, delayNodeVec, graph);
+    }
+  
+  // Also mark delay slots for other delayed instructions to hold NOPs. 
+  // Simply passing in an empty delayNodeVec will have this effect.
+  // 
+  delayNodeVec.clear();
+  const MachineCodeForBasicBlock& bbMvec = bb->getMachineInstrVec();
+  for (unsigned i=0; i < bbMvec.size(); i++)
+    if (bbMvec[i] != brInstr &&
+        mii.getNumDelaySlots(bbMvec[i]->getOpCode()) > 0)
+      {
+        SchedGraphNode* node = graph->getGraphNodeForInstr(bbMvec[i]);
+        ReplaceNopsWithUsefulInstr(S, node, delayNodeVec, graph);
+      }
+}
+
+
 // 
 // Schedule the delayed branch and its delay slots
 // 
-void
+unsigned
 DelaySlotInfo::scheduleDelayedNode(SchedulingManager& S)
 {
   assert(delayedNodeSlotNum < S.nslots && "Illegal slot for branch");
@@ -1461,5 +1387,130 @@ DelaySlotInfo::scheduleDelayedNode(SchedulingManager& S)
        S.scheduleInstr(delayNodeVec[i], nextSlot, nextTime);
        break;
       }
+
+  return 1 + ndelays;
+}
+
+
+// Check if the instruction would conflict with instructions already
+// chosen for the current cycle
+// 
+static inline bool
+ConflictsWithChoices(const SchedulingManager& S,
+                    MachineOpCode opCode)
+{
+  // Check if the instruction must issue by itself, and some feasible
+  // choices have already been made for this cycle
+  if (S.getNumChoices() > 0 && S.schedInfo.isSingleIssue(opCode))
+    return true;
+  
+  // For each class that opCode belongs to, check if there are too many
+  // instructions of that class.
+  // 
+  const InstrSchedClass sc = S.schedInfo.getSchedClass(opCode);
+  return (S.getNumChoicesInClass(sc) == S.schedInfo.getMaxIssueForClass(sc));
 }
 
+
+//************************* External Functions *****************************/
+
+
+//---------------------------------------------------------------------------
+// Function: ViolatesMinimumGap
+// 
+// Purpose:
+//   Check minimum gap requirements relative to instructions scheduled in
+//   previous cycles.
+//   Note that we do not need to consider `nextEarliestIssueTime' here because
+//   that is also captured in the earliest start times for each opcode.
+//---------------------------------------------------------------------------
+
+static inline bool
+ViolatesMinimumGap(const SchedulingManager& S,
+                  MachineOpCode opCode,
+                  const cycles_t inCycle)
+{
+  return (inCycle < S.getEarliestStartTimeForOp(opCode));
+}
+
+
+//---------------------------------------------------------------------------
+// Function: instrIsFeasible
+// 
+// Purpose:
+//   Check if any issue restrictions would prevent the instruction from
+//   being issued in the current cycle
+//---------------------------------------------------------------------------
+
+bool
+instrIsFeasible(const SchedulingManager& S,
+               MachineOpCode opCode)
+{
+  // skip the instruction if it cannot be issued due to issue restrictions
+  // caused by previously issued instructions
+  if (ViolatesMinimumGap(S, opCode, S.getTime()))
+    return false;
+  
+  // skip the instruction if it cannot be issued due to issue restrictions
+  // caused by previously chosen instructions for the current cycle
+  if (ConflictsWithChoices(S, opCode))
+    return false;
+  
+  return true;
+}
+
+//---------------------------------------------------------------------------
+// Function: ScheduleInstructionsWithSSA
+// 
+// Purpose:
+//   Entry point for instruction scheduling on SSA form.
+//   Schedules the machine instructions generated by instruction selection.
+//   Assumes that register allocation has not been done, i.e., operands
+//   are still in SSA form.
+//---------------------------------------------------------------------------
+
+bool
+ScheduleInstructionsWithSSA(Method* method,
+                           const TargetMachine &target)
+{
+  SchedGraphSet graphSet(method, target);      
+  
+  if (SchedDebugLevel >= Sched_PrintSchedGraphs)
+    {
+      cout << endl << "*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING"
+          << endl;
+      graphSet.dump();
+    }
+  
+  for (SchedGraphSet::const_iterator GI=graphSet.begin();
+       GI != graphSet.end(); ++GI)
+    {
+      SchedGraph* graph = (*GI).second;
+      const vector<const BasicBlock*>& bbvec = graph->getBasicBlocks();
+      assert(bbvec.size() == 1 && "Cannot schedule multiple basic blocks");
+      const BasicBlock* bb = bbvec[0];
+      
+      if (SchedDebugLevel >= Sched_PrintSchedTrace)
+       cout << endl << "*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n";
+      
+      SchedPriorities schedPrio(method, graph);             // expensive!
+      SchedulingManager S(target, graph, schedPrio);
+      
+      ChooseInstructionsForDelaySlots(S, bb, graph); // modifies graph
+      
+      ForwardListSchedule(S);                       // computes schedule in S
+      
+      RecordSchedule((*GI).first, S);               // records schedule in BB
+    }
+  
+  if (SchedDebugLevel >= Sched_PrintMachineCode)
+    {
+      cout << endl
+          << "*** Machine instructions after INSTRUCTION SCHEDULING" << endl;
+      PrintMachineInstructions(method);
+    }
+  
+  return false;                                         // no reason to fail yet
+}
+
+