Unbreak build. Evan, please make sure my changes are correct.

[oota-llvm.git] / lib / CodeGen / SelectionDAG / ScheduleDAGRRList.cpp
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp

index 8e449971db333b96d61760d8bf75d79fa86c33a0..26da246c412af2f72ceb381ab0229f5691662946 100644 (file)
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -25,6 +25,7 @@
  #include "llvm/Target/TargetInstrInfo.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
  #include "llvm/ADT/PriorityQueue.h"
  #include "llvm/ADT/SmallSet.h"
  #include "llvm/ADT/Statistic.h"
@@ -202,7 +203,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
      cerr << "*** Scheduling failed! ***\n";
      PredSU->dump(this);
      cerr << " has been released too many times!\n";
-    assert(0);
+    llvm_unreachable(0);
    }
  #endif
    
@@ -351,7 +352,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
    SUnit *NewSU;
    bool TryUnfold = false;
    for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
-    MVT VT = N->getValueType(i);
+    EVT VT = N->getValueType(i);
      if (VT == MVT::Flag)
        return NULL;
      else if (VT == MVT::Other)
@@ -359,7 +360,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
    }
    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
      const SDValue &Op = N->getOperand(i);
-    MVT VT = Op.getNode()->getValueType(Op.getResNo());
+    EVT VT = Op.getNode()->getValueType(Op.getResNo());
      if (VT == MVT::Flag)
        return NULL;
    }
@@ -410,6 +411,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
        NewSU->isCommutable = true;
      ComputeLatency(NewSU);
  
+    // Record all the edges to and from the old SU, by category.
      SmallVector<SDep, 4> ChainPreds;
      SmallVector<SDep, 4> ChainSuccs;
      SmallVector<SDep, 4> LoadPreds;
@@ -433,6 +435,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
          NodeSuccs.push_back(*I);
      }
  
+    // Now assign edges to the newly-created nodes.
      for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
        const SDep &Pred = ChainPreds[i];
        RemovePred(SU, Pred);
@@ -468,9 +471,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
          AddPred(SuccDep, D);
        }
      } 
-    if (isNewLoad) {
-      AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
-    }
+
+    // Add a data dependency to reflect that NewSU reads the value defined
+    // by LoadSU.
+    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
  
      if (isNewLoad)
        AvailableQueue->addNode(LoadSU);
@@ -567,7 +571,7 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
  /// getPhysicalRegisterVT - Returns the ValueType of the physical register
  /// definition of the specified node.
  /// FIXME: Move to SelectionDAG?
-static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
                                   const TargetInstrInfo *TII) {
    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
    assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
@@ -750,7 +754,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
          assert(LRegs.size() == 1 && "Can't handle this yet!");
          unsigned Reg = LRegs[0];
          SUnit *LRDef = LiveRegDefs[Reg];
-        MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
          const TargetRegisterClass *RC =
            TRI->getPhysicalRegisterRegClass(Reg, VT);
          const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
@@ -826,7 +830,7 @@ void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
      cerr << "*** Scheduling failed! ***\n";
      SuccSU->dump(this);
      cerr << " has been released too many times!\n";
-    assert(0);
+    llvm_unreachable(0);
    }
  #endif
    
@@ -985,6 +989,8 @@ namespace {
        SUnits = &sunits;
        // Add pseudo dependency edges for two-address nodes.
        AddPseudoTwoAddrDeps();
+      // Reroute edges to nodes with multiple uses.
+      PrescheduleNodesWithMultipleUses();
        // Calculate node priorities.
        CalculateSethiUllmanNumbers();
      }
@@ -1014,9 +1020,10 @@ namespace {
          // avoid spilling.
          return 0;
        if (Opc == TargetInstrInfo::EXTRACT_SUBREG ||
+          Opc == TargetInstrInfo::SUBREG_TO_REG ||
            Opc == TargetInstrInfo::INSERT_SUBREG)
-        // EXTRACT_SUBREG / INSERT_SUBREG should be close to its use to
-        // facilitate coalescing.
+        // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+        // close to their uses to facilitate coalescing.
          return 0;
        if (SU->NumSuccs == 0 && SU->NumPreds != 0)
          // If SU does not have a register use, i.e. it doesn't produce a value
@@ -1069,6 +1076,7 @@ namespace {
    protected:
      bool canClobber(const SUnit *SU, const SUnit *Op);
      void AddPseudoTwoAddrDeps();
+    void PrescheduleNodesWithMultipleUses();
      void CalculateSethiUllmanNumbers();
    };
  
@@ -1208,7 +1216,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
      if (!SUImpDefs)
        return false;
      for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
-      MVT VT = N->getValueType(i);
+      EVT VT = N->getValueType(i);
        if (VT == MVT::Flag || VT == MVT::Other)
          continue;
        if (!N->hasAnyUseOfValue(i))
@@ -1224,6 +1232,123 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
    return false;
  }
  
+/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
+/// are not handled well by the general register pressure reduction
+/// heuristics. When presented with code like this:
+///
+///      N
+///    / |
+///   /  |
+///  U  store
+///  |
+/// ...
+///
+/// the heuristics tend to push the store up, but since the
+/// operand of the store has another use (U), this would increase
+/// the length of that other use (the U->N edge).
+///
+/// This function transforms code like the above to route U's
+/// dependence through the store when possible, like this:
+///
+///      N
+///      ||
+///      ||
+///     store
+///       |
+///       U
+///       |
+///      ...
+///
+/// This results in the store being scheduled immediately
+/// after N, which shortens the U->N live range, reducing
+/// register pressure.
+///
+template<class SF>
+void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
+  // Visit all the nodes in topological order, working top-down.
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+    SUnit *SU = &(*SUnits)[i];
+    // For now, only look at nodes with no data successors, such as stores.
+    // These are especially important, due to the heuristics in
+    // getNodePriority for nodes with no data successors.
+    if (SU->NumSuccs != 0)
+      continue;
+    // For now, only look at nodes with exactly one data predecessor.
+    if (SU->NumPreds != 1)
+      continue;
+    // Avoid prescheduling copies to virtual registers, which don't behave
+    // like other nodes from the perspective of scheduling heuristics.
+    if (SDNode *N = SU->getNode())
+      if (N->getOpcode() == ISD::CopyToReg &&
+          TargetRegisterInfo::isVirtualRegister
+            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+        continue;
+
+    // Locate the single data predecessor.
+    SUnit *PredSU = 0;
+    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
+         EE = SU->Preds.end(); II != EE; ++II)
+      if (!II->isCtrl()) {
+        PredSU = II->getSUnit();
+        break;
+      }
+    assert(PredSU);
+
+    // Don't rewrite edges that carry physregs, because that requires additional
+    // support infrastructure.
+    if (PredSU->hasPhysRegDefs)
+      continue;
+    // Short-circuit the case where SU is PredSU's only data successor.
+    if (PredSU->NumSuccs == 1)
+      continue;
+    // Avoid prescheduling to copies from virtual registers, which don't behave
+    // like other nodes from the perspective of scheduling // heuristics.
+    if (SDNode *N = SU->getNode())
+      if (N->getOpcode() == ISD::CopyFromReg &&
+          TargetRegisterInfo::isVirtualRegister
+            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+        continue;
+
+    // Perform checks on the successors of PredSU.
+    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
+         EE = PredSU->Succs.end(); II != EE; ++II) {
+      SUnit *PredSuccSU = II->getSUnit();
+      if (PredSuccSU == SU) continue;
+      // If PredSU has another successor with no data successors, for
+      // now don't attempt to choose either over the other.
+      if (PredSuccSU->NumSuccs == 0)
+        goto outer_loop_continue;
+      // Don't break physical register dependencies.
+      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
+        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
+          goto outer_loop_continue;
+      // Don't introduce graph cycles.
+      if (scheduleDAG->IsReachable(SU, PredSuccSU))
+        goto outer_loop_continue;
+    }
+
+    // Ok, the transformation is safe and the heuristics suggest it is
+    // profitable. Update the graph.
+    DOUT << "Prescheduling SU # " << SU->NodeNum
+         << " next to PredSU # " << PredSU->NodeNum
+         << " to guide scheduling in the presence of multiple uses\n";
+    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
+      SDep Edge = PredSU->Succs[i];
+      assert(!Edge.isAssignedRegDep());
+      SUnit *SuccSU = Edge.getSUnit();
+      if (SuccSU != SU) {
+        Edge.setSUnit(PredSU);
+        scheduleDAG->RemovePred(SuccSU, Edge);
+        scheduleDAG->AddPred(SU, Edge);
+        Edge.setSUnit(SU);
+        scheduleDAG->AddPred(SuccSU, Edge);
+        --i;
+      }
+    }
+  outer_loop_continue:;
+  }
+}
+
  /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
  /// it as a def&use operand. Add a pseudo control edge from it to the other
  /// node (if it won't create a cycle) so the two-address one will be scheduled
@@ -1265,19 +1390,30 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
          if (SuccSU->getHeight() < SU->getHeight() &&
              (SU->getHeight() - SuccSU->getHeight()) > 1)
            continue;
+        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
+        // constrains whatever is using the copy, instead of the copy
+        // itself. In the case that the copy is coalesced, this
+        // preserves the intent of the pseudo two-address heurietics.
+        while (SuccSU->Succs.size() == 1 &&
+               SuccSU->getNode()->isMachineOpcode() &&
+               SuccSU->getNode()->getMachineOpcode() ==
+                 TargetInstrInfo::COPY_TO_REGCLASS)
+          SuccSU = SuccSU->Succs.front().getSUnit();
+        // Don't constrain non-instruction nodes.
          if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
            continue;
          // Don't constrain nodes with physical register defs if the
          // predecessor can clobber them.
-        if (SuccSU->hasPhysRegDefs) {
+        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
            if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
              continue;
          }
-        // Don't constrain extract_subreg / insert_subreg; these may be
-        // coalesced away. We want them close to their uses.
+        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
+        // these may be coalesced away. We want them close to their uses.
          unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
          if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG ||
-            SuccOpc == TargetInstrInfo::INSERT_SUBREG)
+            SuccOpc == TargetInstrInfo::INSERT_SUBREG ||
+            SuccOpc == TargetInstrInfo::SUBREG_TO_REG)
            continue;
          if ((!canClobber(SuccSU, DUSU) ||
               (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
@@ -1370,7 +1506,7 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
  //===----------------------------------------------------------------------===//
  
  llvm::ScheduleDAGSDNodes *
-llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, bool) {
+llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
    const TargetMachine &TM = IS->TM;
    const TargetInstrInfo *TII = TM.getInstrInfo();
    const TargetRegisterInfo *TRI = TM.getRegisterInfo();
@@ -1384,7 +1520,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, bool) {
  }
  
  llvm::ScheduleDAGSDNodes *
-llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, bool) {
+llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
    const TargetMachine &TM = IS->TM;
    const TargetInstrInfo *TII = TM.getInstrInfo();
    const TargetRegisterInfo *TRI = TM.getRegisterInfo();