Expand small memmovs using inline code. Set the X86 threshold for expanding

[oota-llvm.git] / lib / CodeGen / SelectionDAG / ScheduleDAGRRList.cpp
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp

index cef3eafc5c5e558ebc74bbe8c68d0cdc69984bee..f84b3f0557795d63c3c2e41fcffc9a608caca0b7 100644 (file)
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -27,6 +27,7 @@
  #include "llvm/ADT/SmallPtrSet.h"
  #include "llvm/ADT/SmallSet.h"
  #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
  #include <climits>
  #include <queue>
  #include "llvm/Support/CommandLine.h"
@@ -239,7 +240,8 @@ void ScheduleDAGRRList::Schedule() {
  /// possible. It will be commuted when it is translated to a MI.
  void ScheduleDAGRRList::CommuteNodesToReducePressure() {
    SmallPtrSet<SUnit*, 4> OperandSeen;
-  for (unsigned i = Sequence.size()-1; i != 0; --i) {  // Ignore first node.
+  for (unsigned i = Sequence.size(); i != 0; ) {
+    --i;
      SUnit *SU = Sequence[i];
      if (!SU || !SU->Node) continue;
      if (SU->isCommutable) {
@@ -311,11 +313,8 @@ void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain,
  #endif
    
    if (PredSU->NumSuccsLeft == 0) {
-    // EntryToken has to go last!  Special case it here.
-    if (!PredSU->Node || PredSU->Node->getOpcode() != ISD::EntryToken) {
-      PredSU->isAvailable = true;
-      AvailableQueue->push(PredSU);
-    }
+    PredSU->isAvailable = true;
+    AvailableQueue->push(PredSU);
    }
  }
  
@@ -365,14 +364,14 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
  /// CapturePred - This does the opposite of ReleasePred. Since SU is being
  /// unscheduled, incrcease the succ left count of its predecessors. Remove
  /// them from AvailableQueue if necessary.
-void ScheduleDAGRRList::CapturePred(SUnit *PredSU, SUnit *SU, bool isChain) {
-  PredSU->CycleBound = 0;
+void ScheduleDAGRRList::CapturePred(SUnit *PredSU, SUnit *SU, bool isChain) {  
+  unsigned CycleBound = 0;
    for (SUnit::succ_iterator I = PredSU->Succs.begin(), E = PredSU->Succs.end();
         I != E; ++I) {
      if (I->Dep == SU)
        continue;
-    PredSU->CycleBound = std::max(PredSU->CycleBound,
-                                  I->Dep->Cycle + PredSU->Latency);
+    CycleBound = std::max(CycleBound,
+                          I->Dep->Cycle + PredSU->Latency);
    }
  
    if (PredSU->isAvailable) {
@@ -381,6 +380,7 @@ void ScheduleDAGRRList::CapturePred(SUnit *PredSU, SUnit *SU, bool isChain) {
        AvailableQueue->remove(PredSU);
    }
  
+  PredSU->CycleBound = CycleBound;
    ++PredSU->NumSuccsLeft;
  }
  
@@ -735,9 +735,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
                                   I->isCtrl, I->isSpecial));
      }
  
-    RemovePred(SU, ChainPred, true, false);
-    if (isNewLoad) {
-      AddPred(LoadSU,ChainPred, true, false);
+    if (ChainPred) {
+      RemovePred(SU, ChainPred, true, false);
+      if (isNewLoad)
+        AddPred(LoadSU, ChainPred, true, false);
      }
      for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
        SDep *Pred = &LoadPreds[i];
@@ -941,9 +942,12 @@ bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
  void ScheduleDAGRRList::ListScheduleBottomUp() {
    unsigned CurCycle = 0;
    // Add root to Available queue.
-  SUnit *RootSU = SUnitMap[DAG.getRoot().Val].front();
-  RootSU->isAvailable = true;
-  AvailableQueue->push(RootSU);
+  if (!SUnits.empty()) {
+    SUnit *RootSU = SUnitMap[DAG.getRoot().Val].front();
+    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+    RootSU->isAvailable = true;
+    AvailableQueue->push(RootSU);
+  }
  
    // While Available queue is not empty, grab the node with the highest
    // priority. If it is not ready put it back.  Schedule the node.
@@ -1066,12 +1070,6 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
      ++CurCycle;
    }
  
-  // Add entry node last
-  if (DAG.getEntryNode().Val != DAG.getRoot().Val) {
-    SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front();
-    Sequence.push_back(Entry);
-  }
-
    // Reverse the order if it is bottom up.
    std::reverse(Sequence.begin(), Sequence.end());
    
@@ -1079,16 +1077,34 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
  #ifndef NDEBUG
    // Verify that all SUnits were scheduled.
    bool AnyNotSched = false;
+  unsigned DeadNodes = 0;
+  unsigned Noops = 0;
    for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
-    if (SUnits[i].NumSuccsLeft != 0) {
+    if (!SUnits[i].isScheduled) {
+      if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
+        ++DeadNodes;
+        continue;
+      }
        if (!AnyNotSched)
          cerr << "*** List scheduling failed! ***\n";
        SUnits[i].dump(&DAG);
        cerr << "has not been scheduled!\n";
        AnyNotSched = true;
      }
+    if (SUnits[i].NumSuccsLeft != 0) {
+      if (!AnyNotSched)
+        cerr << "*** List scheduling failed! ***\n";
+      SUnits[i].dump(&DAG);
+      cerr << "has successors left!\n";
+      AnyNotSched = true;
+    }
    }
+  for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+    if (!Sequence[i])
+      ++Noops;
    assert(!AnyNotSched);
+  assert(Sequence.size() + DeadNodes - Noops == SUnits.size() &&
+         "The number of nodes scheduled doesn't match the expected number!");
  #endif
  }
  
@@ -1145,22 +1161,16 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
  /// schedulers.
  void ScheduleDAGRRList::ListScheduleTopDown() {
    unsigned CurCycle = 0;
-  SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front();
  
    // All leaves to Available queue.
    for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
      // It is available if it has no predecessors.
-    if (SUnits[i].Preds.empty() && &SUnits[i] != Entry) {
+    if (SUnits[i].Preds.empty()) {
        AvailableQueue->push(&SUnits[i]);
        SUnits[i].isAvailable = true;
      }
    }
    
-  // Emit the entry node first.
-  ScheduleNodeTopDown(Entry, CurCycle);
-  Sequence.push_back(Entry);
-  ++CurCycle;
-
    // While Available queue is not empty, grab the node with the highest
    // priority. If it is not ready put it back.  Schedule the node.
    std::vector<SUnit*> NotReady;
@@ -1181,23 +1191,41 @@ void ScheduleDAGRRList::ListScheduleTopDown() {
        ScheduleNodeTopDown(CurSU, CurCycle);
        Sequence.push_back(CurSU);
      }
-    CurCycle++;
+    ++CurCycle;
    }
    
    
  #ifndef NDEBUG
    // Verify that all SUnits were scheduled.
    bool AnyNotSched = false;
+  unsigned DeadNodes = 0;
+  unsigned Noops = 0;
    for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
      if (!SUnits[i].isScheduled) {
+      if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
+        ++DeadNodes;
+        continue;
+      }
        if (!AnyNotSched)
          cerr << "*** List scheduling failed! ***\n";
        SUnits[i].dump(&DAG);
        cerr << "has not been scheduled!\n";
        AnyNotSched = true;
      }
+    if (SUnits[i].NumPredsLeft != 0) {
+      if (!AnyNotSched)
+        cerr << "*** List scheduling failed! ***\n";
+      SUnits[i].dump(&DAG);
+      cerr << "has predecessors left!\n";
+      AnyNotSched = true;
+    }
    }
+  for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+    if (!Sequence[i])
+      ++Noops;
    assert(!AnyNotSched);
+  assert(Sequence.size() + DeadNodes - Noops == SUnits.size() &&
+         "The number of nodes scheduled doesn't match the expected number!");
  #endif
  }
  
@@ -1242,11 +1270,12 @@ namespace {
    template<class SF>
    class VISIBILITY_HIDDEN RegReductionPriorityQueue
     : public SchedulingPriorityQueue {
-    std::priority_queue<SUnit*, std::vector<SUnit*>, SF> Queue;
+    std::set<SUnit*, SF> Queue;
+    unsigned currentQueueId;
  
    public:
      RegReductionPriorityQueue() :
-    Queue(SF(this)) {}
+    Queue(SF(this)), currentQueueId(0) {}
      
      virtual void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap,
                             std::vector<SUnit> &sunits) {}
@@ -1266,39 +1295,32 @@ namespace {
      bool empty() const { return Queue.empty(); }
      
      void push(SUnit *U) {
-      Queue.push(U);
+      assert(!U->NodeQueueId && "Node in the queue already");
+      U->NodeQueueId = ++currentQueueId;
+      Queue.insert(U);
      }
+
      void push_all(const std::vector<SUnit *> &Nodes) {
        for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
-        Queue.push(Nodes[i]);
+        push(Nodes[i]);
      }
      
      SUnit *pop() {
        if (empty()) return NULL;
-      SUnit *V = Queue.top();
-      Queue.pop();
+      typename std::set<SUnit*, SF>::iterator i = prior(Queue.end());
+      SUnit *V = *i;
+      Queue.erase(i);
+      V->NodeQueueId = 0;
        return V;
      }
  
-    /// remove - This is a really inefficient way to remove a node from a
-    /// priority queue.  We should roll our own heap to make this better or
-    /// something.
      void remove(SUnit *SU) {
-      std::vector<SUnit*> Temp;
-      
-      assert(!Queue.empty() && "Not in queue!");
-      while (Queue.top() != SU) {
-        Temp.push_back(Queue.top());
-        Queue.pop();
-        assert(!Queue.empty() && "Not in queue!");
-      }
-
-      // Remove the node from the PQ.
-      Queue.pop();
-      
-      // Add all the other nodes back.
-      for (unsigned i = 0, e = Temp.size(); i != e; ++i)
-        Queue.push(Temp[i]);
+      assert(!Queue.empty() && "Queue is empty!");
+      size_t RemovedNum = Queue.erase(SU);
+      RemovedNum = RemovedNum; // Silence compiler warning.
+      assert(RemovedNum > 0 && "Not in queue!");
+      assert(RemovedNum == 1 && "Multiple times in the queue!");
+      SU->NodeQueueId = 0;
      }
    };
  
@@ -1478,18 +1500,6 @@ static unsigned calcMaxScratches(const SUnit *SU) {
  
  // Bottom up
  bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
-  // There used to be a special tie breaker here that looked for
-  // two-address instructions and preferred the instruction with a
-  // def&use operand.  The special case triggered diagnostics when
-  // _GLIBCXX_DEBUG was enabled because it broke the strict weak
-  // ordering that priority_queue requires. It didn't help much anyway
-  // because AddPseudoTwoAddrDeps already covers many of the cases
-  // where it would have applied.  In addition, it's counter-intuitive
-  // that a tie breaker would be the first thing attempted.  There's a
-  // "real" tie breaker below that is the operation of last resort.
-  // The fact that the "special tie breaker" would trigger when there
-  // wasn't otherwise a tie is what broke the strict weak ordering
-  // constraint.
  
    unsigned LPriority = SPQ->getNodePriority(left);
    unsigned RPriority = SPQ->getNodePriority(right);
@@ -1536,8 +1546,9 @@ bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
    if (left->CycleBound != right->CycleBound)
      return left->CycleBound > right->CycleBound;
  
-  // FIXME: No strict ordering.
-  return false;
+  assert(left->NodeQueueId && right->NodeQueueId && 
+         "NodeQueueId cannot be zero");
+  return (left->NodeQueueId > right->NodeQueueId);
  }
  
  template<class SF> bool
@@ -1708,26 +1719,9 @@ void BURegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
      CalcNodeSethiUllmanNumber(&(*SUnits)[i]);
  }
  
-static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) {
-  unsigned Sum = 0;
-  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-       I != E; ++I) {
-    SUnit *SuccSU = I->Dep;
-    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
-         EE = SuccSU->Preds.end(); II != EE; ++II) {
-      SUnit *PredSU = II->Dep;
-      if (!PredSU->isScheduled)
-        ++Sum;
-    }
-  }
-
-  return Sum;
-}
-
  /// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
  /// predecessors of the successors of the SUnit SU. Stop when the provided
  /// limit is exceeded.
-
  static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 
                                                      unsigned Limit) {
    unsigned Sum = 0;
@@ -1737,11 +1731,9 @@ static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
      for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
           EE = SuccSU->Preds.end(); II != EE; ++II) {
        SUnit *PredSU = II->Dep;
-      if (!PredSU->isScheduled) {
-        ++Sum;
-        if(Sum > Limit)
-            return Sum;
-      }
+      if (!PredSU->isScheduled)
+        if (++Sum > Limit)
+          return Sum;
      }
    }
    return Sum;
@@ -1785,8 +1777,9 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
    if (left->CycleBound != right->CycleBound)
      return left->CycleBound > right->CycleBound;
  
-  // FIXME: No strict ordering.
-  return false;
+  assert(left->NodeQueueId && right->NodeQueueId && 
+         "NodeQueueId cannot be zero");
+  return (left->NodeQueueId > right->NodeQueueId);
  }
  
  /// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number.