Use the schedule itinerary operand use/def cycle information to adjust dependence...

author David Goodwin <david_goodwin@apple.com>

Wed, 19 Aug 2009 16:08:58 +0000 (16:08 +0000)

committer David Goodwin <david_goodwin@apple.com>

Wed, 19 Aug 2009 16:08:58 +0000 (16:08 +0000)
author David Goodwin <david_goodwin@apple.com>
Wed, 19 Aug 2009 16:08:58 +0000 (16:08 +0000)
committer David Goodwin <david_goodwin@apple.com>
Wed, 19 Aug 2009 16:08:58 +0000 (16:08 +0000)
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h

index ead3d3db264b717b29288864825920f8b6945cf1..f820200f99bd0a0a3c0b0edadc858db099f2e75d 100644 (file)
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -495,6 +495,12 @@ namespace llvm {
      ///
      virtual void ComputeLatency(SUnit *SU) = 0;
  
+    /// ComputeOperandLatency - Override dependence edge latency using
+    /// operand use/def information
+    ///
+    virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+                                       SDep& dep) const { };
+
      /// Schedule - Order nodes according to selected style, filling
      /// in the Sequence member.
      ///
diff --git a/include/llvm/Target/TargetInstrItineraries.h b/include/llvm/Target/TargetInstrItineraries.h

index 9ba9cb61c5b971029bdb39cf3d1eaaabb5aa83e4..0e4ca985ddd8b8c9f0e125f15b9534cf50aa4f33 100644 (file)
--- a/include/llvm/Target/TargetInstrItineraries.h
+++ b/include/llvm/Target/TargetInstrItineraries.h
@@ -103,7 +103,7 @@ struct InstrItineraryData {
    /// isEmpty - Returns true if there are no itineraries.
    ///
    bool isEmpty() const { return Itineratries == 0; }
-  
+
    /// beginStage - Return the first stage of the itinerary.
    /// 
    const InstrStage *beginStage(unsigned ItinClassIndx) const {
@@ -118,20 +118,17 @@ struct InstrItineraryData {
      return Stages + StageIdx;
    }
  
-  /// getLatency - Return the scheduling latency of the given class.  A
-  /// simple latency value for an instruction is an over-simplification
-  /// for some architectures, but it's a reasonable first approximation.
+  /// getStageLatency - Return the total stage latency of the given
+  /// class.  The latency is the maximum completion time for any stage
+  /// in the itinerary.
    ///
-  unsigned getLatency(unsigned ItinClassIndx) const {
-    // If the target doesn't provide latency information, use a simple
-    // non-zero default value for all instructions.
+  unsigned getStageLatency(unsigned ItinClassIndx) const {
+    // If the target doesn't provide itinerary information, use a
+    // simple non-zero default value for all instructions.
      if (isEmpty())
        return 1;
  
-    // Caclulate the maximum completion time for any stage. The
-    // assumption is that all inputs are consumed at the start of the
-    // first stage and that all outputs are produced at the end of the
-    // latest completing last stage.
+    // Calculate the maximum completion time for any stage.
      unsigned Latency = 0, StartCycle = 0;
      for (const InstrStage *IS = beginStage(ItinClassIndx),
             *E = endStage(ItinClassIndx); IS != E; ++IS) {
@@ -141,6 +138,21 @@ struct InstrItineraryData {
  
      return Latency;
    }
+
+  /// getOperandCycle - Return the cycle for the given class and
+  /// operand. Return -1 if no cycle is specified for the operand.
+  ///
+  int getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const {
+    if (isEmpty())
+      return -1;
+
+    unsigned FirstIdx = Itineratries[ItinClassIndx].FirstOperandCycle;
+    unsigned LastIdx = Itineratries[ItinClassIndx].LastOperandCycle;
+    if ((FirstIdx + OperandIdx) >= LastIdx)
+      return -1;
+
+    return (int)OperandCycles[FirstIdx + OperandIdx];
+  }
  };
  
  
diff --git a/include/llvm/Target/TargetSubtarget.h b/include/llvm/Target/TargetSubtarget.h

index c86e81554ced659d53670a9ac51c8970b66a158b..14f612af979408d9a15b5df0a098907775087a6b 100644 (file)
--- a/include/llvm/Target/TargetSubtarget.h
+++ b/include/llvm/Target/TargetSubtarget.h
@@ -17,6 +17,7 @@
  namespace llvm {
  
  class SDep;
+class SUnit;
  
  //===----------------------------------------------------------------------===//
  ///
@@ -40,7 +41,8 @@ public:
  
    // adjustSchedDependency - Perform target specific adjustments to
    // the latency of a schedule dependency.
-  virtual void adjustSchedDependency(SDep&) const { };
+  virtual void adjustSchedDependency(SUnit *def, SUnit *use, 
+                                     SDep& dep) const { };
  };
  
  } // End llvm namespace
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp

index c5ee7ac2d4520468c3d36770bf9864340dd2d514..1aceda5ea3a9f721fb5eab6f516fb18a7fceee9f 100644 (file)
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -210,6 +210,10 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
              // Optionally add in a special extra latency for nodes that
              // feed addresses.
              // TODO: Do this for register aliases too.
+            // TODO: Perhaps we should get rid of
+            // SpecialAddressLatency and just move this into
+            // adjustSchedDependency for the targets that care about
+            // it.
              if (SpecialAddressLatency != 0 && !UnitLatencies) {
                MachineInstr *UseMI = UseSU->getInstr();
                const TargetInstrDesc &UseTID = UseMI->getDesc();
@@ -220,8 +224,14 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
                    UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
                  LDataLatency += SpecialAddressLatency;
              }
+            // Adjust the dependence latency using operand def/use
+            // information (if any), and then allow the target to
+            // perform its own adjustments.
              const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
-            ST.adjustSchedDependency((SDep &)dep);
+            if (!UnitLatencies) {
+              ComputeOperandLatency(SU, UseSU, (SDep &)dep);
+              ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
+            }
              UseSU->addPred(dep);
            }
          }
@@ -231,7 +241,10 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
              SUnit *UseSU = UseList[i];
              if (UseSU != SU) {
                const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
-              ST.adjustSchedDependency((SDep &)dep);
+              if (!UnitLatencies) {
+                ComputeOperandLatency(SU, UseSU, (SDep &)dep);
+                ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
+              }
                UseSU->addPred(dep);
              }
            }
@@ -410,7 +423,7 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
  
    // Compute the latency for the node.
    SU->Latency =
-    InstrItins.getLatency(SU->getInstr()->getDesc().getSchedClass());
+    InstrItins.getStageLatency(SU->getInstr()->getDesc().getSchedClass());
  
    // Simplistic target-independent heuristic: assume that loads take
    // extra time.
@@ -419,6 +432,50 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
        SU->Latency += 2;
  }
  
+void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, 
+                                              SDep& dep) const {
+  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+  if (InstrItins.isEmpty())
+    return;
+  
+  // For a data dependency with a known register...
+  if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
+    return;
+
+  const unsigned Reg = dep.getReg();
+
+  // ... find the definition of the register in the defining
+  // instruction
+  MachineInstr *DefMI = Def->getInstr();
+  int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
+  if (DefIdx != -1) {
+    int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(), DefIdx);
+    if (DefCycle >= 0) {
+      MachineInstr *UseMI = Use->getInstr();
+      const unsigned UseClass = UseMI->getDesc().getSchedClass();
+
+      // For all uses of the register, calculate the maxmimum latency
+      int Latency = -1;
+      for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+        const MachineOperand &MO = UseMI->getOperand(i);
+        if (!MO.isReg() || !MO.isUse())
+          continue;
+        unsigned MOReg = MO.getReg();
+        if (MOReg != Reg)
+          continue;
+
+        int UseCycle = InstrItins.getOperandCycle(UseClass, i);
+        if (UseCycle >= 0)
+          Latency = std::max(Latency, DefCycle - UseCycle + 1);
+      }
+
+      // If we found a latency, then replace the existing dependence latency.
+      if (Latency >= 0)
+        dep.setLatency(Latency);
+    }
+  }
+}
+
  void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
    SU->getInstr()->dump();
  }
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h

index 00d6268d1a14a327e548b48bba97bb035b17a83d..929bdaa4b178700244bb75434ae716124ad9efc0 100644 (file)
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -160,6 +160,12 @@ namespace llvm {
      ///
      virtual void ComputeLatency(SUnit *SU);
  
+    /// ComputeOperandLatency - Override dependence edge latency using
+    /// operand use/def information
+    ///
+    virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+                                       SDep& dep) const;
+
      virtual MachineBasicBlock *EmitSchedule();
  
      /// StartBlock - Prepare to perform scheduling in the given block.
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp

index ca4ba565d40c843c65436ce8a5e2cc4751e035f2..a580b93c836a7925b5b50d8a221443d85ca17af1 100644 (file)
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -155,6 +155,9 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
  void ScheduleDAGSDNodes::AddSchedEdges() {
    const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
  
+  // Check to see if the scheduler cares about latencies.
+  bool UnitLatencies = ForceUnitLatencies();
+
    // Pass 2: add the preds, succs, etc.
    for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
      SUnit *SU = &SUnits[su];
@@ -212,8 +215,10 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
  
          const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
                                 OpSU->Latency, PhysReg);
-        if (!isChain)
-          ST.adjustSchedDependency((SDep &)dep);
+        if (!isChain && !UnitLatencies) {
+          ComputeOperandLatency(OpSU, SU, (SDep &)dep);
+          ST.adjustSchedDependency(OpSU, SU, (SDep &)dep);
+        }
  
          SU->addPred(dep);
        }
@@ -242,8 +247,8 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
    for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
      if (N->isMachineOpcode()) {
        SawMachineOpcode = true;
-      SU->Latency +=
-        InstrItins.getLatency(TII->get(N->getMachineOpcode()).getSchedClass());
+      SU->Latency += InstrItins.
+        getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
      }
  }
author	David Goodwin <david_goodwin@apple.com>
	Wed, 19 Aug 2009 16:08:58 +0000 (16:08 +0000)
committer	David Goodwin <david_goodwin@apple.com>
	Wed, 19 Aug 2009 16:08:58 +0000 (16:08 +0000)
include/llvm/CodeGen/ScheduleDAG.h		patch \| blob \| history
include/llvm/Target/TargetInstrItineraries.h		patch \| blob \| history
include/llvm/Target/TargetSubtarget.h		patch \| blob \| history
lib/CodeGen/ScheduleDAGInstrs.cpp		patch \| blob \| history
lib/CodeGen/ScheduleDAGInstrs.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp		patch \| blob \| history