Allow emission of names that start with an underscore. This is needed to
[oota-llvm.git] / lib / Target / SparcV9 / InstrSched / InstrScheduling.cpp
index 0b194207ca46aaf8ac2016ad60b52eed4a8844f3..a985680da3dd1dbce04b0c12d8c3cc162380edc5 100644 (file)
@@ -1,47 +1,40 @@
-// $Id$
-//***************************************************************************
-// File:
-//     InstrScheduling.cpp
-// 
-// Purpose:
-//     
-// History:
-//     7/23/01  -  Vikram Adve  -  Created
-//**************************************************************************/
-
-
-//************************* User Include Files *****************************/
+//===- InstrScheduling.cpp - Generic Instruction Scheduling support -------===//
+//
+// This file implements the llvm/CodeGen/InstrScheduling.h interface, along with
+// generic support routines for instruction scheduling.
+//
+//===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/InstrScheduling.h"
 #include "SchedPriorities.h"
-#include "llvm/Analysis/LiveVar/BBLiveVar.h"
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Instruction.h"
-
-
-//************************ System Include Files *****************************/
-
-#include <hash_set>
+#include "llvm/CodeGen/MachineCodeForInstruction.h"
+#include "llvm/CodeGen/MachineCodeForBasicBlock.h"
+#include "llvm/CodeGen/MachineCodeForMethod.h"
+#include "llvm/Analysis/LiveVar/FunctionLiveVarInfo.h" // FIXME: Remove when modularized better
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/BasicBlock.h"
+#include "Support/CommandLine.h"
 #include <algorithm>
-#include <iterator>
-
+using std::cerr;
+using std::vector;
 
-//************************* External Data Types *****************************/
+SchedDebugLevel_t SchedDebugLevel;
 
-cl::Enum<enum SchedDebugLevel_t> SchedDebugLevel("dsched", cl::NoFlags,
-  "enable instruction scheduling debugging information",
-  clEnumValN(Sched_NoDebugInfo,      "n", "disable debug output"),
-  clEnumValN(Sched_PrintMachineCode, "y", "print machine code after scheduling"),
-  clEnumValN(Sched_PrintSchedTrace,  "t", "print trace of scheduling actions"),
-  clEnumValN(Sched_PrintSchedGraphs, "g", "print scheduling graphs"), 0);
+static cl::opt<SchedDebugLevel_t, true>
+SDL_opt("dsched", cl::Hidden, cl::location(SchedDebugLevel),
+        cl::desc("enable instruction scheduling debugging information"),
+        cl::values(
+ clEnumValN(Sched_NoDebugInfo,      "n", "disable debug output"),
+ clEnumValN(Sched_PrintMachineCode, "y", "print machine code after scheduling"),
+ clEnumValN(Sched_PrintSchedTrace,  "t", "print trace of scheduling actions"),
+ clEnumValN(Sched_PrintSchedGraphs, "g", "print scheduling graphs"),
+                   0));
 
 
 //************************* Internal Data Types *****************************/
 
 class InstrSchedule;
 class SchedulingManager;
-class DelaySlotInfo;
 
 
 //----------------------------------------------------------------------
@@ -84,7 +77,7 @@ private:
 //----------------------------------------------------------------------
 
 template<class _NodeType>
-class ScheduleIterator: public std::forward_iterator<_NodeType, ptrdiff_t> {
+class ScheduleIterator : public forward_iterator<_NodeType, ptrdiff_t> {
 private:
   unsigned cycleNum;
   unsigned slotNum;
@@ -163,7 +156,7 @@ public: // accessor functions to query chosen schedule
   }
   
   inline InstrGroup*   getIGroup       (cycles_t c) {
-    if (c >= groups.size())
+    if ((unsigned)c >= groups.size())
       groups.resize(c+1);
     if (groups[c] == NULL)
       groups[c] = new InstrGroup(nslots);
@@ -171,7 +164,7 @@ public: // accessor functions to query chosen schedule
   }
   
   inline const InstrGroup* getIGroup   (cycles_t c) const {
-    assert(c < groups.size());
+    assert((unsigned)c < groups.size());
     return groups[c];
   }
   
@@ -365,10 +358,14 @@ private:
                                                // indexed by branch node ptr 
   
 public:
-  /*ctor*/     SchedulingManager       (const TargetMachine& _target,
-                                        const SchedGraph* graph,
-                                        SchedPriorities& schedPrio);
-  /*dtor*/     ~SchedulingManager      () {}
+  SchedulingManager(const TargetMachine& _target, const SchedGraph* graph,
+                    SchedPriorities& schedPrio);
+  ~SchedulingManager() {
+    for (hash_map<const SchedGraphNode*,
+           DelaySlotInfo*>::iterator I = delaySlotInfoForBranches.begin(),
+           E = delaySlotInfoForBranches.end(); I != E; ++I)
+      delete I->second;
+  }
   
   //----------------------------------------------------------------------
   // Simplify access to the machine instruction info
@@ -432,7 +429,7 @@ public:
     // Append the instruction to the vector of choices for current cycle.
     // Increment numInClass[c] for the sched class to which the instr belongs.
     choiceVec.push_back(node);
-    const InstrSchedClass& sc = schedInfo.getSchedClass(node->getMachineInstr()->getOpCode());
+    const InstrSchedClass& sc = schedInfo.getSchedClass(node->getOpCode());
     assert(sc < (int) numInClass.size());
     numInClass[sc]++;
   }
@@ -486,7 +483,7 @@ public:
       choicesForSlot[s].erase(node);
     
     // and decrement the instr count for the sched class to which it belongs
-    const InstrSchedClass& sc = schedInfo.getSchedClass(node->getMachineInstr()->getOpCode());
+    const InstrSchedClass& sc = schedInfo.getSchedClass(node->getOpCode());
     assert(sc < (int) numInClass.size());
     numInClass[sc]--;
   }
@@ -498,30 +495,21 @@ public:
   inline DelaySlotInfo* getDelaySlotInfoForInstr(const SchedGraphNode* bn,
                                                 bool createIfMissing=false)
   {
-    DelaySlotInfo* dinfo;
-    hash_map<const SchedGraphNode*, DelaySlotInfo* >::const_iterator
+    hash_map<const SchedGraphNode*, DelaySlotInfo*>::const_iterator
       I = delaySlotInfoForBranches.find(bn);
-    if (I == delaySlotInfoForBranches.end())
-      {
-       if (createIfMissing)
-         {
-           dinfo = new DelaySlotInfo(bn,
-                                      getInstrInfo().getNumDelaySlots(bn->getMachineInstr()->getOpCode()));
-           delaySlotInfoForBranches[bn] = dinfo;
-         }
-       else
-         dinfo = NULL;
-      }
-    else
-      dinfo = (*I).second;
-    
-    return dinfo;
+    if (I != delaySlotInfoForBranches.end())
+      return I->second;
+
+    if (!createIfMissing) return 0;
+
+    DelaySlotInfo *dinfo =
+      new DelaySlotInfo(bn, getInstrInfo().getNumDelaySlots(bn->getOpCode()));
+    return delaySlotInfoForBranches[bn] = dinfo;
   }
   
 private:
-  /*ctor*/     SchedulingManager       ();     // Disable: DO NOT IMPLEMENT.
-  void         updateEarliestStartTimes(const SchedGraphNode* node,
-                                        cycles_t schedTime);
+  SchedulingManager();     // DISABLED: DO NOT IMPLEMENT
+  void updateEarliestStartTimes(const SchedGraphNode* node, cycles_t schedTime);
 };
 
 
@@ -554,25 +542,23 @@ void
 SchedulingManager::updateEarliestStartTimes(const SchedGraphNode* node,
                                            cycles_t schedTime)
 {
-  if (schedInfo.numBubblesAfter(node->getMachineInstr()->getOpCode()) > 0)
+  if (schedInfo.numBubblesAfter(node->getOpCode()) > 0)
     { // Update next earliest time before which *nothing* can issue.
-      nextEarliestIssueTime = max(nextEarliestIssueTime,
-                 curTime + 1 + schedInfo.numBubblesAfter(node->getMachineInstr()->getOpCode()));
+      nextEarliestIssueTime = std::max(nextEarliestIssueTime,
+                 curTime + 1 + schedInfo.numBubblesAfter(node->getOpCode()));
     }
   
-  const vector<MachineOpCode>*
-    conflictVec = schedInfo.getConflictList(node->getMachineInstr()->getOpCode());
+  const std::vector<MachineOpCode>&
+    conflictVec = schedInfo.getConflictList(node->getOpCode());
   
-  if (conflictVec != NULL)
-    for (unsigned i=0; i < conflictVec->size(); i++)
-      {
-       MachineOpCode toOp = (*conflictVec)[i];
-       cycles_t est = schedTime + schedInfo.getMinIssueGap(node->getMachineInstr()->getOpCode(),
-                                                           toOp);
-       assert(toOp < (int) nextEarliestStartTime.size());
-       if (nextEarliestStartTime[toOp] < est)
-         nextEarliestStartTime[toOp] = est;
-      }
+  for (unsigned i=0; i < conflictVec.size(); i++)
+    {
+      MachineOpCode toOp = conflictVec[i];
+      cycles_t est=schedTime + schedInfo.getMinIssueGap(node->getOpCode(),toOp);
+      assert(toOp < (int) nextEarliestStartTime.size());
+      if (nextEarliestStartTime[toOp] < est)
+        nextEarliestStartTime[toOp] = est;
+    }
 }
 
 //************************* Internal Functions *****************************/
@@ -607,7 +593,7 @@ AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue)
   unsigned numIssued;
   for (numIssued = 0; numIssued < maxIssue; numIssued++)
     {
-      int chosenSlot = -1, chosenNodeIndex = -1;
+      int chosenSlot = -1;
       for (unsigned s=startSlot; s < S.nslots; s++)
        if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1)
          {
@@ -645,7 +631,7 @@ AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue)
 static void
 RecordSchedule(const BasicBlock* bb, const SchedulingManager& S)
 {
-  MachineCodeForBasicBlock& mvec = bb->getMachineInstrVec();
+  MachineCodeForBasicBlock& mvec = MachineCodeForBasicBlock::get(bb);
   const MachineInstrInfo& mii = S.schedInfo.getInstrInfo();
   
 #ifndef NDEBUG
@@ -672,7 +658,6 @@ RecordSchedule(const BasicBlock* bb, const SchedulingManager& S)
   // Erase all except the dummy PHI instructions from mvec, and
   // pre-allocate create space for the ones we will put back in.
   mvec.erase(I, mvec.end());
-  mvec.reserve(mvec.size() + S.isched.getNumInstructions());
   
   InstrSchedule::const_iterator NIend = S.isched.end();
   for (InstrSchedule::const_iterator NI = S.isched.begin(); NI != NIend; ++NI)
@@ -753,7 +738,7 @@ FindSlotChoices(SchedulingManager& S,
       if (nextNode == NULL)
        break;                  // no more instructions for this cycle
       
-      if (S.getInstrInfo().getNumDelaySlots(nextNode->getMachineInstr()->getOpCode()) > 0)
+      if (S.getInstrInfo().getNumDelaySlots(nextNode->getOpCode()) > 0)
        {
          delaySlotInfo = S.getDelaySlotInfoForInstr(nextNode);
          if (delaySlotInfo != NULL)
@@ -766,7 +751,7 @@ FindSlotChoices(SchedulingManager& S,
                indexForDelayedInstr = S.getNumChoices();
            }
        }
-      else if (S.schedInfo.breaksIssueGroup(nextNode->getMachineInstr()->getOpCode()))
+      else if (S.schedInfo.breaksIssueGroup(nextNode->getOpCode()))
        {
          if (indexForBreakingNode < S.nslots)
            // have a breaking instruction already so throw this one away
@@ -776,15 +761,17 @@ FindSlotChoices(SchedulingManager& S,
        }
       
       if (nextNode != NULL)
-       S.addChoice(nextNode);
-      
-      if (S.schedInfo.isSingleIssue(nextNode->getMachineInstr()->getOpCode()))
-       {
-         assert(S.getNumChoices() == 1 &&
-                "Prioritizer returned invalid instr for this cycle!");
-         break;
-       }
+        {
+          S.addChoice(nextNode);
       
+          if (S.schedInfo.isSingleIssue(nextNode->getOpCode()))
+            {
+              assert(S.getNumChoices() == 1 &&
+                     "Prioritizer returned invalid instr for this cycle!");
+              break;
+            }
+        }
+          
       if (indexForDelayedInstr < S.nslots)
        break;                  // leave the rest for delay slots
     }
@@ -804,7 +791,7 @@ FindSlotChoices(SchedulingManager& S,
       
       if (S.getNumChoices() == 1)
        {
-         MachineOpCode opCode = S.getChoice(0)->getMachineInstr()->getOpCode();
+         MachineOpCode opCode = S.getChoice(0)->getOpCode();
          unsigned int s;
          for (s=startSlot; s < S.nslots; s++)
            if (S.schedInfo.instrCanUseSlot(opCode, s))
@@ -816,7 +803,7 @@ FindSlotChoices(SchedulingManager& S,
        {
          for (unsigned i=0; i < S.getNumChoices(); i++)
            {
-             MachineOpCode opCode = S.getChoice(i)->getMachineInstr()->getOpCode();
+             MachineOpCode opCode = S.getChoice(i)->getOpCode();
              for (unsigned int s=startSlot; s < S.nslots; s++)
                if (S.schedInfo.instrCanUseSlot(opCode, s))
                  S.addChoiceToSlot(s, S.getChoice(i));
@@ -836,7 +823,7 @@ FindSlotChoices(SchedulingManager& S,
       assert(delaySlotInfo != NULL && "No delay slot info for instr?");
       
       const SchedGraphNode* delayedNode = S.getChoice(indexForDelayedInstr);
-      MachineOpCode delayOpCode = delayedNode->getMachineInstr()->getOpCode();
+      MachineOpCode delayOpCode = delayedNode->getOpCode();
       unsigned ndelays= S.getInstrInfo().getNumDelaySlots(delayOpCode);
       
       unsigned delayedNodeSlot = S.nslots;
@@ -856,7 +843,7 @@ FindSlotChoices(SchedulingManager& S,
        {
          // Try to assign every other instruction to a lower numbered
          // slot than delayedNodeSlot.
-         MachineOpCode opCode =S.getChoice(i)->getMachineInstr()->getOpCode();
+         MachineOpCode opCode =S.getChoice(i)->getOpCode();
          bool noSlotFound = true;
          unsigned int s;
          for (s=startSlot; s < delayedNodeSlot; s++)
@@ -879,7 +866,7 @@ FindSlotChoices(SchedulingManager& S,
          
          assert(s < S.nslots && "No feasible slot for instruction?");
          
-         highestSlotUsed = max(highestSlotUsed, (int) s);
+         highestSlotUsed = std::max(highestSlotUsed, (int) s);
        }
       
       assert(highestSlotUsed <= (int) S.nslots-1 && "Invalid slot used?");
@@ -910,7 +897,7 @@ FindSlotChoices(SchedulingManager& S,
          
       // Find the last possible slot for this instruction.
       for (int s = S.nslots-1; s >= (int) startSlot; s--)
-       if (S.schedInfo.instrCanUseSlot(breakingNode->getMachineInstr()->getOpCode(), s))
+       if (S.schedInfo.instrCanUseSlot(breakingNode->getOpCode(), s))
          {
            breakingSlot = s;
            break;
@@ -924,7 +911,7 @@ FindSlotChoices(SchedulingManager& S,
       for (unsigned i=0;
           i < S.getNumChoices() && i < indexForBreakingNode; i++)
        {
-         MachineOpCode opCode =S.getChoice(i)->getMachineInstr()->getOpCode();
+         MachineOpCode opCode =S.getChoice(i)->getOpCode();
          
          // If a higher priority instruction cannot be assigned to
          // any earlier slots, don't schedule the breaking instruction.
@@ -963,8 +950,7 @@ FindSlotChoices(SchedulingManager& S,
       // Otherwise, just ignore the instruction.
       for (unsigned i=indexForBreakingNode+1; i < S.getNumChoices(); i++)
        {
-         bool foundLowerSlot = false;
-         MachineOpCode opCode = S.getChoice(i)->getMachineInstr()->getOpCode();
+         MachineOpCode opCode = S.getChoice(i)->getOpCode();
          for (unsigned int s=startSlot; s < nslotsToUse; s++)
            if (S.schedInfo.instrCanUseSlot(opCode, s))
              S.addChoiceToSlot(s, S.getChoice(i));
@@ -1003,15 +989,15 @@ ChooseOneGroup(SchedulingManager& S)
     {
       for (cycles_t c = firstCycle; c <= S.getTime(); c++)
         {
-          cout << "    Cycle " << c << " : Scheduled instructions:\n";
+          cerr << "    Cycle " << (long)c << " : Scheduled instructions:\n";
           const InstrGroup* igroup = S.isched.getIGroup(c);
           for (unsigned int s=0; s < S.nslots; s++)
             {
-              cout << "        ";
+              cerr << "        ";
               if ((*igroup)[s] != NULL)
-                cout << * ((*igroup)[s])->getMachineInstr() << endl;
+                cerr << * ((*igroup)[s])->getMachineInstr() << "\n";
               else
-                cout << "<none>" << endl;
+                cerr << "<none>\n";
             }
         }
     }
@@ -1058,9 +1044,9 @@ ForwardListSchedule(SchedulingManager& S)
       // an instruction can be issued, or the next earliest in which
       // one will be ready, or to the next cycle, whichever is latest.
       // 
-      S.updateTime(max(S.getTime() + 1,
-                      max(S.getEarliestIssueTime(),
-                          S.schedPrio.getEarliestReadyTime())));
+      S.updateTime(std::max(S.getTime() + 1,
+                            std::max(S.getEarliestIssueTime(),
+                                     S.schedPrio.getEarliestReadyTime())));
     }
 }
 
@@ -1083,11 +1069,11 @@ NodeCanFillDelaySlot(const SchedulingManager& S,
   assert(! node->isDummyNode());
   
   // don't put a branch in the delay slot of another branch
-  if (S.getInstrInfo().isBranch(node->getMachineInstr()->getOpCode()))
+  if (S.getInstrInfo().isBranch(node->getOpCode()))
     return false;
   
   // don't put a single-issue instruction in the delay slot of a branch
-  if (S.schedInfo.isSingleIssue(node->getMachineInstr()->getOpCode()))
+  if (S.schedInfo.isSingleIssue(node->getOpCode()))
     return false;
   
   // don't put a load-use dependence in the delay slot of a branch
@@ -1096,13 +1082,13 @@ NodeCanFillDelaySlot(const SchedulingManager& S,
   for (SchedGraphNode::const_iterator EI = node->beginInEdges();
        EI != node->endInEdges(); ++EI)
     if (! (*EI)->getSrc()->isDummyNode()
-       && mii.isLoad((*EI)->getSrc()->getMachineInstr()->getOpCode())
+       && mii.isLoad((*EI)->getSrc()->getOpCode())
        && (*EI)->getDepType() == SchedGraphEdge::CtrlDep)
       return false;
   
   // for now, don't put an instruction that does not have operand
   // interlocks in the delay slot of a branch
-  if (! S.getInstrInfo().hasOperandInterlock(node->getMachineInstr()->getOpCode()))
+  if (! S.getInstrInfo().hasOperandInterlock(node->getOpCode()))
     return false;
   
   // Finally, if the instruction preceeds the branch, we make sure the
@@ -1161,7 +1147,7 @@ FindUsefulInstructionsForDelaySlots(SchedulingManager& S,
 {
   const MachineInstrInfo& mii = S.getInstrInfo();
   unsigned ndelays =
-    mii.getNumDelaySlots(brNode->getMachineInstr()->getOpCode());
+    mii.getNumDelaySlots(brNode->getOpCode());
   
   if (ndelays == 0)
     return;
@@ -1176,10 +1162,10 @@ FindUsefulInstructionsForDelaySlots(SchedulingManager& S,
   for (sg_pred_iterator P = pred_begin(brNode);
        P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P)
     if (! (*P)->isDummyNode() &&
-       ! mii.isNop((*P)->getMachineInstr()->getOpCode()) &&
+       ! mii.isNop((*P)->getOpCode()) &&
        NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true))
       {
-       if (mii.maxLatency((*P)->getMachineInstr()->getOpCode()) > 1)
+       if (mii.maxLatency((*P)->getOpCode()) > 1)
          mdelayNodeVec.push_back(*P);
        else
          sdelayNodeVec.push_back(*P);
@@ -1193,12 +1179,12 @@ FindUsefulInstructionsForDelaySlots(SchedulingManager& S,
   while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0)
     {
       unsigned lmin =
-        mii.maxLatency(mdelayNodeVec[0]->getMachineInstr()->getOpCode());
+        mii.maxLatency(mdelayNodeVec[0]->getOpCode());
       unsigned minIndex   = 0;
       for (unsigned i=1; i < mdelayNodeVec.size(); i++)
         {
           unsigned li = 
-            mii.maxLatency(mdelayNodeVec[i]->getMachineInstr()->getOpCode());
+            mii.maxLatency(mdelayNodeVec[i]->getOpCode());
           if (lmin >= li)
             {
               lmin = li;
@@ -1223,24 +1209,55 @@ ReplaceNopsWithUsefulInstr(SchedulingManager& S,
                            vector<SchedGraphNode*> sdelayNodeVec,
                            SchedGraph* graph)
 {
-  vector<SchedGraphNode*> nopNodeVec;
+  vector<SchedGraphNode*> nopNodeVec;   // this will hold unused NOPs
   const MachineInstrInfo& mii = S.getInstrInfo();
-  unsigned ndelays= mii.getNumDelaySlots(node->getMachineInstr()->getOpCode());
+  const MachineInstr* brInstr = node->getMachineInstr();
+  unsigned ndelays= mii.getNumDelaySlots(brInstr->getOpCode());
   assert(ndelays > 0 && "Unnecessary call to replace NOPs");
   
   // Remove the NOPs currently in delay slots from the graph.
   // If not enough useful instructions were found, use the NOPs to
   // fill delay slots, otherwise, just discard them.
-  for (sg_succ_iterator I=succ_begin(node); I != succ_end(node); ++I)
-    if (! (*I)->isDummyNode()
-       && mii.isNop((*I)->getMachineInstr()->getOpCode()))
-      {
-       if (sdelayNodeVec.size() < ndelays)
-         sdelayNodeVec.push_back(*I);
-       else
-         nopNodeVec.push_back(*I);
-      }
-  assert(sdelayNodeVec.size() == ndelays);
+  //  
+  unsigned int firstDelaySlotIdx = node->getOrigIndexInBB() + 1;
+  MachineCodeForBasicBlock& bbMvec = MachineCodeForBasicBlock::get(node->getBB());
+  assert(bbMvec[firstDelaySlotIdx - 1] == brInstr &&
+         "Incorrect instr. index in basic block for brInstr");
+  
+  // First find all useful instructions already in the delay slots
+  // and USE THEM.  We'll throw away the unused alternatives below
+  // 
+  for (unsigned i=firstDelaySlotIdx; i < firstDelaySlotIdx + ndelays; ++i)
+    if (! mii.isNop(bbMvec[i]->getOpCode()))
+      sdelayNodeVec.insert(sdelayNodeVec.begin(),
+                           graph->getGraphNodeForInstr(bbMvec[i]));
+  
+  // Then find the NOPs and keep only as many as are needed.
+  // Put the rest in nopNodeVec to be deleted.
+  for (unsigned i=firstDelaySlotIdx; i < firstDelaySlotIdx + ndelays; ++i)
+    if (mii.isNop(bbMvec[i]->getOpCode()))
+      if (sdelayNodeVec.size() < ndelays)
+        sdelayNodeVec.push_back(graph->getGraphNodeForInstr(bbMvec[i]));
+      else
+       {
+         nopNodeVec.push_back(graph->getGraphNodeForInstr(bbMvec[i]));
+         
+         //remove the MI from the Machine Code For Instruction
+         MachineCodeForInstruction& llvmMvec = 
+           MachineCodeForInstruction::get((Instruction *)
+                                          (node->getBB()->getTerminator()));
+         for(MachineCodeForInstruction::iterator mciI=llvmMvec.begin(), 
+               mciE=llvmMvec.end(); mciI!=mciE; ++mciI){
+           if(*mciI==bbMvec[i])
+             llvmMvec.erase(mciI);
+         }
+       }
+
+  assert(sdelayNodeVec.size() >= ndelays);
+  
+  // If some delay slots were already filled, throw away that many new choices
+  if (sdelayNodeVec.size() > ndelays)
+    sdelayNodeVec.resize(ndelays);
   
   // Mark the nodes chosen for delay slots.  This removes them from the graph.
   for (unsigned i=0; i < sdelayNodeVec.size(); i++)
@@ -1258,55 +1275,57 @@ ReplaceNopsWithUsefulInstr(SchedulingManager& S,
 // is found for a delay slot, use the NOP that is currently in that slot.
 // 
 // We try to fill the delay slots with useful work for all instructions
-// except CALLs.  For CALLs, it is nearly always possible to use one of the
+// EXCEPT CALLS AND RETURNS.
+// For CALLs and RETURNs, it is nearly always possible to use one of the
 // call sequence instrs and putting anything else in the delay slot could be
-// suboptimal.
+// suboptimal.  Also, it complicates generating the calling sequence code in
+// regalloc.
 // 
 static void
 ChooseInstructionsForDelaySlots(SchedulingManager& S,
-                               const BasicBlockbb,
-                               SchedGraphgraph)
+                               const BasicBlock *bb,
+                               SchedGraph *graph)
 {
   const MachineInstrInfo& mii = S.getInstrInfo();
-  const TerminatorInst* termInstr = bb->getTerminator();
-  MachineCodeForVMInstr& termMvec = termInstr->getMachineInstrVec();
+  const Instruction *termInstr = (Instruction*)bb->getTerminator();
+  MachineCodeForInstruction &termMvec=MachineCodeForInstruction::get(termInstr);
   vector<SchedGraphNode*> delayNodeVec;
-  const MachineInstr* brInstr;
+  const MachineInstr* brInstr = NULL;
   
-  assert(termInstr->getOpcode() != Instruction::Call
-         && "Call used as terminator?");
-  
-  // To find instructions that need delay slots without searching the entire
-  // machine code, we assume the only delayed instructions are CALLs or
-  // instructions generated for the terminator inst.
-  // Find the first branch instr in the sequence of machine instrs for term
-  // 
-  unsigned first = 0;
-  while (first < termMvec.size() &&
-         ! mii.isBranch(termMvec[first]->getOpCode()))
-    {
-      ++first;
-    }
-  assert(first < termMvec.size() &&
-        "No branch instructions for BR?  Ok, but weird!  Delete assertion.");
-  
-  brInstr = (first < termMvec.size())? termMvec[first] : NULL;
-  
-  // Compute a vector of the nodes chosen for delay slots and then
-  // mark delay slots to replace NOPs with these useful instructions.
-  // 
-  if (brInstr != NULL)
+  if (termInstr->getOpcode() != Instruction::Ret)
     {
-      SchedGraphNode* brNode = graph->getGraphNodeForInstr(brInstr);
-      FindUsefulInstructionsForDelaySlots(S, brNode, delayNodeVec);
-      ReplaceNopsWithUsefulInstr(S, brNode, delayNodeVec, graph);
+      // To find instructions that need delay slots without searching the full
+      // machine code, we assume that the only delayed instructions are CALLs
+      // or instructions generated for the terminator inst.
+      // Find the first branch instr in the sequence of machine instrs for term
+      // 
+      unsigned first = 0;
+      while (first < termMvec.size() &&
+             ! mii.isBranch(termMvec[first]->getOpCode()))
+        {
+          ++first;
+        }
+      assert(first < termMvec.size() &&
+         "No branch instructions for BR?  Ok, but weird!  Delete assertion.");
+      
+      brInstr = (first < termMvec.size())? termMvec[first] : NULL;
+      
+      // Compute a vector of the nodes chosen for delay slots and then
+      // mark delay slots to replace NOPs with these useful instructions.
+      // 
+      if (brInstr != NULL)
+        {
+          SchedGraphNode* brNode = graph->getGraphNodeForInstr(brInstr);
+          FindUsefulInstructionsForDelaySlots(S, brNode, delayNodeVec);
+          ReplaceNopsWithUsefulInstr(S, brNode, delayNodeVec, graph);
+        }
     }
   
   // Also mark delay slots for other delayed instructions to hold NOPs. 
   // Simply passing in an empty delayNodeVec will have this effect.
   // 
   delayNodeVec.clear();
-  const MachineCodeForBasicBlock& bbMvec = bb->getMachineInstrVec();
+  const MachineCodeForBasicBlock& bbMvec = MachineCodeForBasicBlock::get(bb);
   for (unsigned i=0; i < bbMvec.size(); i++)
     if (bbMvec[i] != brInstr &&
         mii.getNumDelaySlots(bbMvec[i]->getOpCode()) > 0)
@@ -1350,10 +1369,10 @@ DelaySlotInfo::scheduleDelayedNode(SchedulingManager& S)
        {
          const SchedGraphNode* dnode = delayNodeVec[i];
          if ( ! S.isScheduled(dnode)
-              && S.schedInfo.instrCanUseSlot(dnode->getMachineInstr()->getOpCode(), nextSlot)
-              && instrIsFeasible(S, dnode->getMachineInstr()->getOpCode()))
+              && S.schedInfo.instrCanUseSlot(dnode->getOpCode(), nextSlot)
+              && instrIsFeasible(S, dnode->getOpCode()))
            {
-             assert(S.getInstrInfo().hasOperandInterlock(dnode->getMachineInstr()->getOpCode())
+             assert(S.getInstrInfo().hasOperandInterlock(dnode->getOpCode())
                     && "Instructions without interlocks not yet supported "
                     "when filling branch delay slots");
              S.scheduleInstr(dnode, nextSlot, nextTime);
@@ -1469,48 +1488,66 @@ instrIsFeasible(const SchedulingManager& S,
 //   are still in SSA form.
 //---------------------------------------------------------------------------
 
-bool
-ScheduleInstructionsWithSSA(Method* method,
-                           const TargetMachine &target)
+namespace {
+  class InstructionSchedulingWithSSA : public FunctionPass {
+    const TargetMachine &target;
+  public:
+    inline InstructionSchedulingWithSSA(const TargetMachine &T) : target(T) {}
+
+    const char *getPassName() const { return "Instruction Scheduling"; }
+  
+    // getAnalysisUsage - We use LiveVarInfo...
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<FunctionLiveVarInfo>();
+    }
+    
+    bool runOnFunction(Function &F);
+  };
+} // end anonymous namespace
+
+
+bool InstructionSchedulingWithSSA::runOnFunction(Function &F)
 {
-  SchedGraphSet graphSet(method, target);      
+  SchedGraphSet graphSet(&F, target);  
   
   if (SchedDebugLevel >= Sched_PrintSchedGraphs)
     {
-      cout << endl << "*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING"
-          << endl;
+      cerr << "\n*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING\n";
       graphSet.dump();
     }
   
-  for (SchedGraphSet::const_iterator GI=graphSet.begin();
-       GI != graphSet.end(); ++GI)
+  for (SchedGraphSet::const_iterator GI=graphSet.begin(), GE=graphSet.end();
+       GI != GE; ++GI)
     {
-      SchedGraph* graph = (*GI).second;
-      const vector<const BasicBlock*>bbvec = graph->getBasicBlocks();
+      SchedGraph* graph = (*GI);
+      const vector<const BasicBlock*> &bbvec = graph->getBasicBlocks();
       assert(bbvec.size() == 1 && "Cannot schedule multiple basic blocks");
       const BasicBlock* bb = bbvec[0];
       
       if (SchedDebugLevel >= Sched_PrintSchedTrace)
-       cout << endl << "*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n";
+        cerr << "\n*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n";
       
-      SchedPriorities schedPrio(method, graph);             // expensive!
+      // expensive!
+      SchedPriorities schedPrio(&F, graph,getAnalysis<FunctionLiveVarInfo>());
       SchedulingManager S(target, graph, schedPrio);
-      
+          
       ChooseInstructionsForDelaySlots(S, bb, graph); // modifies graph
       
-      ForwardListSchedule(S);                       // computes schedule in S
+      ForwardListSchedule(S);               // computes schedule in S
       
-      RecordSchedule((*GI).first, S);               // records schedule in BB
+      RecordSchedule(bb, S);                // records schedule in BB
     }
   
   if (SchedDebugLevel >= Sched_PrintMachineCode)
     {
-      cout << endl
-          << "*** Machine instructions after INSTRUCTION SCHEDULING" << endl;
-      PrintMachineInstructions(method);
+      cerr << "\n*** Machine instructions after INSTRUCTION SCHEDULING\n";
+      MachineCodeForMethod::get(&F).dump();
     }
   
-  return false;                                         // no reason to fail yet
+  return false;
 }
 
 
+Pass *createInstructionSchedulingWithSSAPass(const TargetMachine &tgt) {
+  return new InstructionSchedulingWithSSA(tgt);
+}