Get rid of 3 of the 4 'printimplicit' flags. Implicit operands are now

[oota-llvm.git] / lib / CodeGen / InstrSched / InstrScheduling.cpp
diff --git a/lib/CodeGen/InstrSched/InstrScheduling.cpp b/lib/CodeGen/InstrSched/InstrScheduling.cpp

index ae19a0635e5e668b0764e34f5bc27fc8b44e17c8..240bdefe94e72fdd263db4868a1b0333d33ca6c7 100644 (file)
--- a/lib/CodeGen/InstrSched/InstrScheduling.cpp
+++ b/lib/CodeGen/InstrSched/InstrScheduling.cpp
@@ -1,4 +1,11 @@
  //===- InstrScheduling.cpp - Generic Instruction Scheduling support -------===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
  //
  // This file implements the llvm/CodeGen/InstrScheduling.h interface, along with
  // generic support routines for instruction scheduling.
@@ -9,14 +16,20 @@
  #include "llvm/CodeGen/MachineInstr.h"
  #include "llvm/CodeGen/MachineCodeForInstruction.h"
  #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/FunctionLiveVarInfo.h"
+#include "../../Target/SparcV9/LiveVar/FunctionLiveVarInfo.h"
  #include "llvm/Target/TargetMachine.h"
  #include "llvm/BasicBlock.h"
  #include "Support/CommandLine.h"
  #include <algorithm>
+#include <iostream>
+
+namespace llvm {
  
  SchedDebugLevel_t SchedDebugLevel;
  
+static cl::opt<bool> EnableFillingDelaySlots("sched-fill-delay-slots",
+              cl::desc("Fill branch delay slots during local scheduling"));
+
  static cl::opt<SchedDebugLevel_t, true>
  SDL_opt("dsched", cl::Hidden, cl::location(SchedDebugLevel),
          cl::desc("enable instruction scheduling debugging information"),
@@ -25,7 +38,7 @@ SDL_opt("dsched", cl::Hidden, cl::location(SchedDebugLevel),
   clEnumValN(Sched_PrintMachineCode, "y", "print machine code after scheduling"),
   clEnumValN(Sched_PrintSchedTrace,  "t", "print trace of scheduling actions"),
   clEnumValN(Sched_PrintSchedGraphs, "g", "print scheduling graphs"),
-                   0));
+                   clEnumValEnd));
  
  
  //************************* Internal Data Types *****************************/
@@ -101,10 +114,7 @@ public:
    
    inline bool operator!=(const _Self& x) const { return !operator==(x); }
    
-  inline _NodeType* operator*() const {
-    assert(cycleNum < S.groups.size());
-    return (*S.groups[cycleNum])[slotNum];
-  }
+  inline _NodeType* operator*() const;
    inline _NodeType* operator->() const { return operator*(); }
    
           _Self& operator++();                          // Preincrement
@@ -140,10 +150,10 @@ public: // iterators
    typedef ScheduleIterator<SchedGraphNode> iterator;
    typedef ScheduleIterator<const SchedGraphNode> const_iterator;
    
-        iterator begin();
-  const_iterator begin() const;
-        iterator end();
-  const_iterator end()   const;
+        iterator begin()       { return iterator::begin(*this); }
+  const_iterator begin() const { return const_iterator::begin(*this); }
+        iterator end()         { return iterator::end(*this); }
+  const_iterator end() const   { return const_iterator::end(*this); }
    
  public: // constructors and destructor
    /*ctor*/             InstrSchedule   (unsigned int _nslots,
@@ -183,7 +193,10 @@ public: // accessor functions to query chosen schedule
                                          unsigned int slotNum,
                                          cycles_t cycle) {
      InstrGroup* igroup = this->getIGroup(cycle);
-    assert((*igroup)[slotNum] == NULL &&  "Slot already filled?");
+    if (!((*igroup)[slotNum] == NULL)) {
+      std::cerr << "Slot already filled?\n";
+      abort();
+    }
      igroup->addInstr(node, slotNum);
      assert(node->getNodeId() < startTime.size());
      startTime[node->getNodeId()] = cycle;
@@ -191,11 +204,17 @@ public: // accessor functions to query chosen schedule
    }
    
  private:
-  friend class iterator;
-  friend class const_iterator;
+  friend class ScheduleIterator<SchedGraphNode>;
+  friend class ScheduleIterator<const SchedGraphNode>;
    /*ctor*/     InstrSchedule   ();     // Disable: DO NOT IMPLEMENT.
  };
  
+template<class NodeType>
+inline NodeType *ScheduleIterator<NodeType>::operator*() const {
+  assert(cycleNum < S.groups.size());
+  return (*S.groups[cycleNum])[slotNum];
+}
+
  
  /*ctor*/
  InstrSchedule::InstrSchedule(unsigned int _nslots, unsigned int _numNodes)
@@ -265,30 +284,6 @@ ScheduleIterator<_NodeType>::end(const InstrSchedule& _schedule)
    return _Self(_schedule, _schedule.groups.size(), 0);
  }
  
-InstrSchedule::iterator
-InstrSchedule::begin()
-{
-  return iterator::begin(*this);
-}
-
-InstrSchedule::const_iterator
-InstrSchedule::begin() const
-{
-  return const_iterator::begin(*this);
-}
-
-InstrSchedule::iterator
-InstrSchedule::end()
-{
-  return iterator::end(*this);
-}
-
-InstrSchedule::const_iterator
-InstrSchedule::end() const
-{
-  return const_iterator::end(  *this);
-}
-
  
  //----------------------------------------------------------------------
  // class DelaySlotInfo:
@@ -433,7 +428,7 @@ public:
      // Append the instruction to the vector of choices for current cycle.
      // Increment numInClass[c] for the sched class to which the instr belongs.
      choiceVec.push_back(node);
-    const InstrSchedClass& sc = schedInfo.getSchedClass(node->getOpCode());
+    const InstrSchedClass& sc = schedInfo.getSchedClass(node->getOpcode());
      assert(sc < numInClass.size());
      numInClass[sc]++;
    }
@@ -487,7 +482,7 @@ public:
        choicesForSlot[s].erase(node);
      
      // and decrement the instr count for the sched class to which it belongs
-    const InstrSchedClass& sc = schedInfo.getSchedClass(node->getOpCode());
+    const InstrSchedClass& sc = schedInfo.getSchedClass(node->getOpcode());
      assert(sc < numInClass.size());
      numInClass[sc]--;
    }
@@ -507,7 +502,7 @@ public:
      if (!createIfMissing) return 0;
  
      DelaySlotInfo *dinfo =
-      new DelaySlotInfo(bn, getInstrInfo().getNumDelaySlots(bn->getOpCode()));
+      new DelaySlotInfo(bn, getInstrInfo().getNumDelaySlots(bn->getOpcode()));
      return delaySlotInfoForBranches[bn] = dinfo;
    }
    
@@ -521,15 +516,15 @@ private:
  SchedulingManager::SchedulingManager(const TargetMachine& target,
                                      const SchedGraph* graph,
                                      SchedPriorities& _schedPrio)
-  : nslots(target.getSchedInfo().getMaxNumIssueTotal()),
-    schedInfo(target.getSchedInfo()),
+  : nslots(target.getSchedInfo()->getMaxNumIssueTotal()),
+    schedInfo(*target.getSchedInfo()),
      schedPrio(_schedPrio),
      isched(nslots, graph->getNumNodes()),
      totalInstrCount(graph->getNumNodes() - 2),
      nextEarliestIssueTime(0),
      choicesForSlot(nslots),
-    numInClass(target.getSchedInfo().getNumSchedClasses(), 0), // set all to 0
-    nextEarliestStartTime(target.getInstrInfo().getNumRealOpCodes(),
+    numInClass(target.getSchedInfo()->getNumSchedClasses(), 0),        // set all to 0
+    nextEarliestStartTime(target.getInstrInfo()->getNumOpcodes(),
                           (cycles_t) 0)                         // set all to 0
  {
    updateTime(0);
@@ -546,19 +541,19 @@ void
  SchedulingManager::updateEarliestStartTimes(const SchedGraphNode* node,
                                             cycles_t schedTime)
  {
-  if (schedInfo.numBubblesAfter(node->getOpCode()) > 0)
+  if (schedInfo.numBubblesAfter(node->getOpcode()) > 0)
      { // Update next earliest time before which *nothing* can issue.
        nextEarliestIssueTime = std::max(nextEarliestIssueTime,
-                 curTime + 1 + schedInfo.numBubblesAfter(node->getOpCode()));
+                 curTime + 1 + schedInfo.numBubblesAfter(node->getOpcode()));
      }
    
    const std::vector<MachineOpCode>&
-    conflictVec = schedInfo.getConflictList(node->getOpCode());
+    conflictVec = schedInfo.getConflictList(node->getOpcode());
    
    for (unsigned i=0; i < conflictVec.size(); i++)
      {
        MachineOpCode toOp = conflictVec[i];
-      cycles_t est=schedTime + schedInfo.getMinIssueGap(node->getOpCode(),toOp);
+      cycles_t est=schedTime + schedInfo.getMinIssueGap(node->getOpcode(),toOp);
        assert(toOp < (int) nextEarliestStartTime.size());
        if (nextEarliestStartTime[toOp] < est)
          nextEarliestStartTime[toOp] = est;
@@ -634,17 +629,15 @@ RecordSchedule(MachineBasicBlock &MBB, const SchedulingManager& S)
  {
    const TargetInstrInfo& mii = S.schedInfo.getInstrInfo();
    
-#ifndef NDEBUG
    // Lets make sure we didn't lose any instructions, except possibly
    // some NOPs from delay slots.  Also, PHIs are not included in the schedule.
    unsigned numInstr = 0;
    for (MachineBasicBlock::iterator I=MBB.begin(); I != MBB.end(); ++I)
-    if (! mii.isNop((*I)->getOpCode()) &&
-       ! mii.isDummyPhiInstr((*I)->getOpCode()))
+    if (! mii.isNop(I->getOpcode()) &&
+       ! mii.isDummyPhiInstr(I->getOpcode()))
        ++numInstr;
    assert(S.isched.getNumInstructions() >= numInstr &&
          "Lost some non-NOP instructions during scheduling!");
-#endif
    
    if (S.isched.getNumInstructions() == 0)
      return;                            // empty basic block!
@@ -652,12 +645,13 @@ RecordSchedule(MachineBasicBlock &MBB, const SchedulingManager& S)
    // First find the dummy instructions at the start of the basic block
    MachineBasicBlock::iterator I = MBB.begin();
    for ( ; I != MBB.end(); ++I)
-    if (! mii.isDummyPhiInstr((*I)->getOpCode()))
+    if (! mii.isDummyPhiInstr(I->getOpcode()))
        break;
    
-  // Erase all except the dummy PHI instructions from MBB, and
+  // Remove all except the dummy PHI instructions from MBB, and
    // pre-allocate create space for the ones we will put back in.
-  MBB.erase(I, MBB.end());
+  while (I != MBB.end())
+    MBB.remove(I++);
    
    InstrSchedule::const_iterator NIend = S.isched.end();
    for (InstrSchedule::const_iterator NI = S.isched.begin(); NI != NIend; ++NI)
@@ -735,7 +729,7 @@ FindSlotChoices(SchedulingManager& S,
      if (nextNode == NULL)
        break;                   // no more instructions for this cycle
        
-    if (S.getInstrInfo().getNumDelaySlots(nextNode->getOpCode()) > 0) {
+    if (S.getInstrInfo().getNumDelaySlots(nextNode->getOpcode()) > 0) {
        delaySlotInfo = S.getDelaySlotInfoForInstr(nextNode);
        if (delaySlotInfo != NULL) {
          if (indexForBreakingNode < S.nslots)
@@ -745,7 +739,7 @@ FindSlotChoices(SchedulingManager& S,
          else
            indexForDelayedInstr = S.getNumChoices();
        }
-    } else if (S.schedInfo.breaksIssueGroup(nextNode->getOpCode())) {
+    } else if (S.schedInfo.breaksIssueGroup(nextNode->getOpcode())) {
        if (indexForBreakingNode < S.nslots)
          // have a breaking instruction already so throw this one away
          nextNode = NULL;
@@ -756,7 +750,7 @@ FindSlotChoices(SchedulingManager& S,
      if (nextNode != NULL) {
        S.addChoice(nextNode);
        
-      if (S.schedInfo.isSingleIssue(nextNode->getOpCode())) {
+      if (S.schedInfo.isSingleIssue(nextNode->getOpcode())) {
          assert(S.getNumChoices() == 1 &&
                 "Prioritizer returned invalid instr for this cycle!");
          break;
@@ -781,7 +775,7 @@ FindSlotChoices(SchedulingManager& S,
      // This is the common case, so handle it separately for efficiency.
        
      if (S.getNumChoices() == 1) {
-      MachineOpCode opCode = S.getChoice(0)->getOpCode();
+      MachineOpCode opCode = S.getChoice(0)->getOpcode();
        unsigned int s;
        for (s=startSlot; s < S.nslots; s++)
          if (S.schedInfo.instrCanUseSlot(opCode, s))
@@ -790,7 +784,7 @@ FindSlotChoices(SchedulingManager& S,
        S.addChoiceToSlot(s, S.getChoice(0));
      } else {
        for (unsigned i=0; i < S.getNumChoices(); i++) {
-        MachineOpCode opCode = S.getChoice(i)->getOpCode();
+        MachineOpCode opCode = S.getChoice(i)->getOpcode();
          for (unsigned int s=startSlot; s < S.nslots; s++)
            if (S.schedInfo.instrCanUseSlot(opCode, s))
              S.addChoiceToSlot(s, S.getChoice(i));
@@ -808,7 +802,7 @@ FindSlotChoices(SchedulingManager& S,
      assert(delaySlotInfo != NULL && "No delay slot info for instr?");
        
      const SchedGraphNode* delayedNode = S.getChoice(indexForDelayedInstr);
-    MachineOpCode delayOpCode = delayedNode->getOpCode();
+    MachineOpCode delayOpCode = delayedNode->getOpcode();
      unsigned ndelays= S.getInstrInfo().getNumDelaySlots(delayOpCode);
        
      unsigned delayedNodeSlot = S.nslots;
@@ -826,7 +820,7 @@ FindSlotChoices(SchedulingManager& S,
      for (unsigned i=0; i < S.getNumChoices() - 1; i++) {
        // Try to assign every other instruction to a lower numbered
        // slot than delayedNodeSlot.
-      MachineOpCode opCode =S.getChoice(i)->getOpCode();
+      MachineOpCode opCode =S.getChoice(i)->getOpcode();
        bool noSlotFound = true;
        unsigned int s;
        for (s=startSlot; s < delayedNodeSlot; s++)
@@ -876,7 +870,7 @@ FindSlotChoices(SchedulingManager& S,
           
      // Find the last possible slot for this instruction.
      for (int s = S.nslots-1; s >= (int) startSlot; s--)
-      if (S.schedInfo.instrCanUseSlot(breakingNode->getOpCode(), s)) {
+      if (S.schedInfo.instrCanUseSlot(breakingNode->getOpcode(), s)) {
          breakingSlot = s;
          break;
        }
@@ -889,7 +883,7 @@ FindSlotChoices(SchedulingManager& S,
      for (unsigned i=0;
           i < S.getNumChoices() && i < indexForBreakingNode; i++)
      {
-      MachineOpCode opCode =S.getChoice(i)->getOpCode();
+      MachineOpCode opCode =S.getChoice(i)->getOpcode();
           
        // If a higher priority instruction cannot be assigned to
        // any earlier slots, don't schedule the breaking instruction.
@@ -923,7 +917,7 @@ FindSlotChoices(SchedulingManager& S,
      // group, only assign them to slots lower than the breaking slot.
      // Otherwise, just ignore the instruction.
      for (unsigned i=indexForBreakingNode+1; i < S.getNumChoices(); i++) {
-      MachineOpCode opCode = S.getChoice(i)->getOpCode();
+      MachineOpCode opCode = S.getChoice(i)->getOpcode();
        for (unsigned int s=startSlot; s < nslotsToUse; s++)
          if (S.schedInfo.instrCanUseSlot(opCode, s))
            S.addChoiceToSlot(s, S.getChoice(i));
@@ -1035,11 +1029,11 @@ NodeCanFillDelaySlot(const SchedulingManager& S,
    assert(! node->isDummyNode());
    
    // don't put a branch in the delay slot of another branch
-  if (S.getInstrInfo().isBranch(node->getOpCode()))
+  if (S.getInstrInfo().isBranch(node->getOpcode()))
      return false;
    
    // don't put a single-issue instruction in the delay slot of a branch
-  if (S.schedInfo.isSingleIssue(node->getOpCode()))
+  if (S.schedInfo.isSingleIssue(node->getOpcode()))
      return false;
    
    // don't put a load-use dependence in the delay slot of a branch
@@ -1047,17 +1041,12 @@ NodeCanFillDelaySlot(const SchedulingManager& S,
    
    for (SchedGraphNode::const_iterator EI = node->beginInEdges();
         EI != node->endInEdges(); ++EI)
-    if (! (*EI)->getSrc()->isDummyNode()
-       && mii.isLoad((*EI)->getSrc()->getOpCode())
+    if (! ((SchedGraphNode*)(*EI)->getSrc())->isDummyNode()
+       && mii.isLoad(((SchedGraphNode*)(*EI)->getSrc())->getOpcode())
         && (*EI)->getDepType() == SchedGraphEdge::CtrlDep)
        return false;
    
-  // for now, don't put an instruction that does not have operand
-  // interlocks in the delay slot of a branch
-  if (! S.getInstrInfo().hasOperandInterlock(node->getOpCode()))
-    return false;
-  
-  // Finally, if the instruction preceeds the branch, we make sure the
+  // Finally, if the instruction precedes the branch, we make sure the
    // instruction can be reordered relative to the branch.  We simply check
    // if the instr. has only 1 outgoing edge, viz., a CD edge to the branch.
    // 
@@ -1065,7 +1054,7 @@ NodeCanFillDelaySlot(const SchedulingManager& S,
      bool onlyCDEdgeToBranch = true;
      for (SchedGraphNode::const_iterator OEI = node->beginOutEdges();
           OEI != node->endOutEdges(); ++OEI)
-      if (! (*OEI)->getSink()->isDummyNode()
+      if (! ((SchedGraphNode*)(*OEI)->getSink())->isDummyNode()
            && ((*OEI)->getSink() != brNode
                || (*OEI)->getDepType() != SchedGraphEdge::CtrlDep))
        {
@@ -1089,7 +1078,7 @@ MarkNodeForDelaySlot(SchedulingManager& S,
                      bool nodeIsPredecessor)
  {
    if (nodeIsPredecessor) {
-    // If node is in the same basic block (i.e., preceeds brNode),
+    // If node is in the same basic block (i.e., precedes brNode),
      // remove it and all its incident edges from the graph.  Make sure we
      // add dummy edges for pred/succ nodes that become entry/exit nodes.
      graph->eraseIncidentEdges(node, /*addDummyEdges*/ true);
@@ -1111,7 +1100,7 @@ FindUsefulInstructionsForDelaySlots(SchedulingManager& S,
  {
    const TargetInstrInfo& mii = S.getInstrInfo();
    unsigned ndelays =
-    mii.getNumDelaySlots(brNode->getOpCode());
+    mii.getNumDelaySlots(brNode->getOpcode());
    
    if (ndelays == 0)
      return;
@@ -1126,10 +1115,10 @@ FindUsefulInstructionsForDelaySlots(SchedulingManager& S,
    for (sg_pred_iterator P = pred_begin(brNode);
         P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P)
      if (! (*P)->isDummyNode() &&
-       ! mii.isNop((*P)->getOpCode()) &&
+       ! mii.isNop((*P)->getOpcode()) &&
         NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true))
      {
-      if (mii.maxLatency((*P)->getOpCode()) > 1)
+      if (mii.maxLatency((*P)->getOpcode()) > 1)
          mdelayNodeVec.push_back(*P);
        else
          sdelayNodeVec.push_back(*P);
@@ -1142,12 +1131,12 @@ FindUsefulInstructionsForDelaySlots(SchedulingManager& S,
    // 
    while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0) {
      unsigned lmin =
-      mii.maxLatency(mdelayNodeVec[0]->getOpCode());
+      mii.maxLatency(mdelayNodeVec[0]->getOpcode());
      unsigned minIndex   = 0;
      for (unsigned i=1; i < mdelayNodeVec.size(); i++)
      {
        unsigned li = 
-        mii.maxLatency(mdelayNodeVec[i]->getOpCode());
+        mii.maxLatency(mdelayNodeVec[i]->getOpcode());
        if (lmin >= li)
        {
          lmin = li;
@@ -1175,7 +1164,7 @@ static void ReplaceNopsWithUsefulInstr(SchedulingManager& S,
    std::vector<SchedGraphNode*> nopNodeVec;   // this will hold unused NOPs
    const TargetInstrInfo& mii = S.getInstrInfo();
    const MachineInstr* brInstr = node->getMachineInstr();
-  unsigned ndelays= mii.getNumDelaySlots(brInstr->getOpCode());
+  unsigned ndelays= mii.getNumDelaySlots(brInstr->getOpcode());
    assert(ndelays > 0 && "Unnecessary call to replace NOPs");
    
    // Remove the NOPs currently in delay slots from the graph.
@@ -1184,25 +1173,31 @@ static void ReplaceNopsWithUsefulInstr(SchedulingManager& S,
    //  
    unsigned int firstDelaySlotIdx = node->getOrigIndexInBB() + 1;
    MachineBasicBlock& MBB = node->getMachineBasicBlock();
-  assert(MBB[firstDelaySlotIdx - 1] == brInstr &&
-         "Incorrect instr. index in basic block for brInstr");
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  std::advance(MBBI, firstDelaySlotIdx - 1);
+  if (!(&*MBBI++ == brInstr)) {
+    std::cerr << "Incorrect instr. index in basic block for brInstr";
+    abort();
+  }
    
    // First find all useful instructions already in the delay slots
    // and USE THEM.  We'll throw away the unused alternatives below
    // 
-  for (unsigned i=firstDelaySlotIdx; i < firstDelaySlotIdx + ndelays; ++i)
-    if (! mii.isNop(MBB[i]->getOpCode()))
+  MachineBasicBlock::iterator Tmp = MBBI;
+  for (unsigned i = 0; i != ndelays; ++i, ++MBBI)
+    if (!mii.isNop(MBBI->getOpcode()))
        sdelayNodeVec.insert(sdelayNodeVec.begin(),
-                           graph->getGraphNodeForInstr(MBB[i]));
-  
+                           graph->getGraphNodeForInstr(MBBI));
+  MBBI = Tmp;
+
    // Then find the NOPs and keep only as many as are needed.
    // Put the rest in nopNodeVec to be deleted.
-  for (unsigned i=firstDelaySlotIdx; i < firstDelaySlotIdx + ndelays; ++i)
-    if (mii.isNop(MBB[i]->getOpCode()))
+  for (unsigned i=firstDelaySlotIdx; i < firstDelaySlotIdx+ndelays; ++i, ++MBBI)
+    if (mii.isNop(MBBI->getOpcode()))
        if (sdelayNodeVec.size() < ndelays)
-        sdelayNodeVec.push_back(graph->getGraphNodeForInstr(MBB[i]));
+        sdelayNodeVec.push_back(graph->getGraphNodeForInstr(MBBI));
        else {
-        nopNodeVec.push_back(graph->getGraphNodeForInstr(MBB[i]));
+        nopNodeVec.push_back(graph->getGraphNodeForInstr(MBBI));
           
          //remove the MI from the Machine Code For Instruction
          const TerminatorInst *TI = MBB.getBasicBlock()->getTerminator();
@@ -1211,7 +1206,7 @@ static void ReplaceNopsWithUsefulInstr(SchedulingManager& S,
            
          for(MachineCodeForInstruction::iterator mciI=llvmMvec.begin(), 
                mciE=llvmMvec.end(); mciI!=mciE; ++mciI){
-          if (*mciI==MBB[i])
+          if (*mciI == MBBI)
              llvmMvec.erase(mciI);
          }
        }
@@ -1255,7 +1250,8 @@ ChooseInstructionsForDelaySlots(SchedulingManager& S, MachineBasicBlock &MBB,
    std::vector<SchedGraphNode*> delayNodeVec;
    const MachineInstr* brInstr = NULL;
    
-  if (termInstr->getOpcode() != Instruction::Ret)
+  if (EnableFillingDelaySlots &&
+      termInstr->getOpcode() != Instruction::Ret)
    {
      // To find instructions that need delay slots without searching the full
      // machine code, we assume that the only delayed instructions are CALLs
@@ -1264,7 +1260,7 @@ ChooseInstructionsForDelaySlots(SchedulingManager& S, MachineBasicBlock &MBB,
      // 
      unsigned first = 0;
      while (first < termMvec.size() &&
-           ! mii.isBranch(termMvec[first]->getOpCode()))
+           ! mii.isBranch(termMvec[first]->getOpcode()))
      {
        ++first;
      }
@@ -1285,13 +1281,13 @@ ChooseInstructionsForDelaySlots(SchedulingManager& S, MachineBasicBlock &MBB,
    
    // Also mark delay slots for other delayed instructions to hold NOPs. 
    // Simply passing in an empty delayNodeVec will have this effect.
+  // If brInstr is not handled above (EnableFillingDelaySlots == false),
+  // brInstr will be NULL so this will handle the branch instrs. as well.
    // 
    delayNodeVec.clear();
-  for (unsigned i=0; i < MBB.size(); ++i)
-    if (MBB[i] != brInstr &&
-        mii.getNumDelaySlots(MBB[i]->getOpCode()) > 0)
-    {
-      SchedGraphNode* node = graph->getGraphNodeForInstr(MBB[i]);
+  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I)
+    if (I != brInstr && mii.getNumDelaySlots(I->getOpcode()) > 0) {
+      SchedGraphNode* node = graph->getGraphNodeForInstr(I);
        ReplaceNopsWithUsefulInstr(S, node, delayNodeVec, graph);
      }
  }
@@ -1327,12 +1323,8 @@ DelaySlotInfo::scheduleDelayedNode(SchedulingManager& S)
      for (unsigned i=0; i < delayNodeVec.size(); i++) {
        const SchedGraphNode* dnode = delayNodeVec[i];
        if ( ! S.isScheduled(dnode)
-           && S.schedInfo.instrCanUseSlot(dnode->getOpCode(), nextSlot)
-           && instrIsFeasible(S, dnode->getOpCode()))
-      {
-        assert(S.getInstrInfo().hasOperandInterlock(dnode->getOpCode())
-               && "Instructions without interlocks not yet supported "
-               "when filling branch delay slots");
+           && S.schedInfo.instrCanUseSlot(dnode->getOpcode(), nextSlot)
+           && instrIsFeasible(S, dnode->getOpcode())) {
          S.scheduleInstr(dnode, nextSlot, nextTime);
          break;
        }
@@ -1502,3 +1494,6 @@ bool InstructionSchedulingWithSSA::runOnFunction(Function &F)
  FunctionPass *createInstructionSchedulingWithSSAPass(const TargetMachine &tgt) {
    return new InstructionSchedulingWithSSA(tgt);
  }
+
+} // End llvm namespace
+