MI-Sched: Model "reserved" processor resources.

author Andrew Trick <atrick@apple.com>

Thu, 5 Dec 2013 17:56:02 +0000 (17:56 +0000)

committer Andrew Trick <atrick@apple.com>

Thu, 5 Dec 2013 17:56:02 +0000 (17:56 +0000)
author Andrew Trick <atrick@apple.com>
Thu, 5 Dec 2013 17:56:02 +0000 (17:56 +0000)
committer Andrew Trick <atrick@apple.com>
Thu, 5 Dec 2013 17:56:02 +0000 (17:56 +0000)
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h

index 48ed232a15c603f133a7764b16618753e1445a43..66bf8c5dd77c5ed01b0d8ec1687431bbb1115ec9 100644 (file)
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -292,7 +292,8 @@ namespace llvm {
      bool isScheduleHigh   : 1;          // True if preferable to schedule high.
      bool isScheduleLow    : 1;          // True if preferable to schedule low.
      bool isCloned         : 1;          // True if this node has been cloned.
-    bool isUnbuffered     : 1;          // Reads an unbuffered resource.
+    bool isUnbuffered     : 1;          // Uses an unbuffered resource.
+    bool hasReservedResource : 1;       // Uses a reserved resource.
      Sched::Preference SchedulingPref;   // Scheduling preference.
  
    private:
@@ -318,9 +319,9 @@ namespace llvm {
          hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
          isAvailable(false), isScheduled(false), isScheduleHigh(false),
          isScheduleLow(false), isCloned(false), isUnbuffered(false),
-        SchedulingPref(Sched::None), isDepthCurrent(false),
-        isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0),
-        BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
+        hasReservedResource(false), SchedulingPref(Sched::None),
+        isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
+        TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
  
      /// SUnit - Construct an SUnit for post-regalloc scheduling to represent
      /// a MachineInstr.
@@ -333,9 +334,9 @@ namespace llvm {
          hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
          isAvailable(false), isScheduled(false), isScheduleHigh(false),
          isScheduleLow(false), isCloned(false), isUnbuffered(false),
-        SchedulingPref(Sched::None), isDepthCurrent(false),
-        isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0),
-        BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
+        hasReservedResource(false), SchedulingPref(Sched::None),
+        isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
+        TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
  
      /// SUnit - Construct a placeholder SUnit.
      SUnit()
@@ -347,9 +348,9 @@ namespace llvm {
          hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
          isAvailable(false), isScheduled(false), isScheduleHigh(false),
          isScheduleLow(false), isCloned(false), isUnbuffered(false),
-        SchedulingPref(Sched::None), isDepthCurrent(false),
-        isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0),
-        BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
+        hasReservedResource(false), SchedulingPref(Sched::None),
+        isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
+        TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
  
      /// \brief Boundary nodes are placeholders for the boundary of the
      /// scheduling region.
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp

index 3296149afa82bf64a1cd5754431e9f137516f9c5..6cfedcbbc2e7ab5779aea022ff51a10205c7c40c 100644 (file)
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -1322,6 +1322,8 @@ void CopyConstrain::apply(ScheduleDAGMI *DAG) {
  // GenericScheduler - Implementation of the generic MachineSchedStrategy.
  //===----------------------------------------------------------------------===//
  
+static const unsigned InvalidCycle = ~0U;
+
  namespace {
  /// GenericScheduler shrinks the unscheduled zone using heuristics to balance
  /// the schedule.
@@ -1491,6 +1493,10 @@ public:
      // Is the scheduled region resource limited vs. latency limited.
      bool IsResourceLimited;
  
+    // Record the highest cycle at which each resource has been reserved by a
+    // scheduled instruction.
+    SmallVector<unsigned, 16> ReservedCycles;
+
  #ifndef NDEBUG
      // Remember the greatest operand latency as an upper bound on the number of
      // times we should retry the pending queue because of a hazard.
@@ -1518,6 +1524,7 @@ public:
        MaxExecutedResCount = 0;
        ZoneCritResIdx = 0;
        IsResourceLimited = false;
+      ReservedCycles.clear();
  #ifndef NDEBUG
        MaxObservedLatency = 0;
  #endif
@@ -1587,6 +1594,8 @@ public:
      /// cycle.
      unsigned getLatencyStallCycles(SUnit *SU);
  
+    unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles);
+
      bool checkHazard(SUnit *SU);
  
      unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs);
@@ -1708,8 +1717,10 @@ init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
    DAG = dag;
    SchedModel = smodel;
    Rem = rem;
-  if (SchedModel->hasInstrSchedModel())
+  if (SchedModel->hasInstrSchedModel()) {
      ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
+    ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
+  }
  }
  
  /// Initialize the per-region scheduling policy.
@@ -1890,6 +1901,20 @@ unsigned GenericScheduler::SchedBoundary::getLatencyStallCycles(SUnit *SU) {
    return 0;
  }
  
+/// Compute the next cycle at which the given processor resource can be
+/// scheduled.
+unsigned GenericScheduler::SchedBoundary::
+getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
+  unsigned NextUnreserved = ReservedCycles[PIdx];
+  // If this resource has never been used, always return cycle zero.
+  if (NextUnreserved == InvalidCycle)
+    return 0;
+  // For bottom-up scheduling add the cycles needed for the current operation.
+  if (!isTop())
+    NextUnreserved += Cycles;
+  return NextUnreserved;
+}
+
  /// Does this SU have a hazard within the current instruction group.
  ///
  /// The scheduler supports two modes of hazard recognition. The first is the
@@ -1913,6 +1938,15 @@ bool GenericScheduler::SchedBoundary::checkHazard(SUnit *SU) {
            << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
      return true;
    }
+  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
+    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+    for (TargetSchedModel::ProcResIter
+           PI = SchedModel->getWriteProcResBegin(SC),
+           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+      if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle)
+        return true;
+    }
+  }
    return false;
  }
  
@@ -2097,7 +2131,7 @@ void GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx,
  /// \return the next cycle at which the instruction may execute without
  /// oversubscribing resources.
  unsigned GenericScheduler::SchedBoundary::
-countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) {
+countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
    unsigned Factor = SchedModel->getResourceFactor(PIdx);
    unsigned Count = Factor * Cycles;
    DEBUG(dbgs() << "  " << getResourceName(PIdx)
@@ -2116,8 +2150,14 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) {
            << getResourceName(PIdx) << ": "
            << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
    }
-  // TODO: We don't yet model reserved resources. It's not hard though.
-  return CurrCycle;
+  // For reserved resources, record the highest cycle using the resource.
+  unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
+  if (NextAvailable > CurrCycle) {
+    DEBUG(dbgs() << "  Resource conflict: "
+          << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
+          << NextAvailable << "\n");
+  }
+  return NextAvailable;
  }
  
  /// Move the boundary of scheduled code by one SUnit.
@@ -2131,25 +2171,17 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) {
      }
      HazardRec->EmitInstruction(SU);
    }
+  // checkHazard should prevent scheduling multiple instructions per cycle that
+  // exceed the issue width.
    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
    unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
-  CurrMOps += IncMOps;
-  // checkHazard prevents scheduling multiple instructions per cycle that exceed
-  // issue width. However, we commonly reach the maximum. In this case
-  // opportunistically bump the cycle to avoid uselessly checking everything in
-  // the readyQ. Furthermore, a single instruction may produce more than one
-  // cycle's worth of micro-ops.
-  //
-  // TODO: Also check if this SU must end a dispatch group.
-  unsigned NextCycle = CurrCycle;
-  if (CurrMOps >= SchedModel->getIssueWidth()) {
-    ++NextCycle;
-    DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
-          << " at cycle " << CurrCycle << '\n');
-  }
+  assert(CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth() &&
+         "Cannot scheduling this instructions MicroOps in the current cycle.");
+
    unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
    DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
  
+  unsigned NextCycle = CurrCycle;
    switch (SchedModel->getMicroOpBufferSize()) {
    case 0:
      assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
@@ -2194,10 +2226,23 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) {
             PI = SchedModel->getWriteProcResBegin(SC),
             PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
        unsigned RCycle =
-        countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle);
+        countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
        if (RCycle > NextCycle)
          NextCycle = RCycle;
      }
+    if (SU->hasReservedResource) {
+      // For reserved resources, record the highest cycle using the resource.
+      // For top-down scheduling, this is the cycle in which we schedule this
+      // instruction plus the number of cycles the operations reserves the
+      // resource. For bottom-up is it simply the instruction's cycle.
+      for (TargetSchedModel::ProcResIter
+             PI = SchedModel->getWriteProcResBegin(SC),
+             PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+        unsigned PIdx = PI->ProcResourceIdx;
+        if (SchedModel->getProcResource(PIdx)->BufferSize == 0)
+          ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle;
+      }
+    }
    }
    // Update ExpectedLatency and DependentLatency.
    unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
@@ -2224,6 +2269,16 @@ void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) {
        (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
        > (int)LFactor;
    }
+  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
+  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
+  // one cycle.  Since we commonly reach the max MOps here, opportunistically
+  // bump the cycle to avoid uselessly checking everything in the readyQ.
+  CurrMOps += IncMOps;
+  while (CurrMOps >= SchedModel->getIssueWidth()) {
+    bumpCycle(++NextCycle);
+    DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
+          << " at cycle " << CurrCycle << '\n');
+  }
    DEBUG(dumpScheduledState());
  }
  
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp

index eeae6ec03d86a53fc8279a57727ee561bd80a34b..977b8f0b41dcf1d8d62c9c719dfb8f8b1eab2af3 100644 (file)
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -697,9 +697,15 @@ void ScheduleDAGInstrs::initSUnits() {
        for (TargetSchedModel::ProcResIter
               PI = SchedModel.getWriteProcResBegin(SC),
               PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
-        if (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize == 1) {
+        switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) {
+        case 0:
+          SU->hasReservedResource = true;
+          break;
+        case 1:
            SU->isUnbuffered = true;
            break;
+        default:
+          break;
          }
        }
      }
author	Andrew Trick <atrick@apple.com>
	Thu, 5 Dec 2013 17:56:02 +0000 (17:56 +0000)
committer	Andrew Trick <atrick@apple.com>
	Thu, 5 Dec 2013 17:56:02 +0000 (17:56 +0000)
include/llvm/CodeGen/ScheduleDAG.h		patch \| blob \| history
lib/CodeGen/MachineScheduler.cpp		patch \| blob \| history
lib/CodeGen/ScheduleDAGInstrs.cpp		patch \| blob \| history