From dc4bdcdef1c8dd1a28b82deb08df039e5c0ffc5a Mon Sep 17 00:00:00 2001
From: David Goodwin <david_goodwin@apple.com>
Date: Wed, 19 Aug 2009 16:08:58 +0000
Subject: [PATCH] Use the schedule itinerary operand use/def cycle information
 to adjust dependence edge latency for post-RA scheduling.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@79425 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/CodeGen/ScheduleDAG.h            |  6 ++
 include/llvm/Target/TargetInstrItineraries.h  | 34 ++++++----
 include/llvm/Target/TargetSubtarget.h         |  4 +-
 lib/CodeGen/ScheduleDAGInstrs.cpp             | 63 ++++++++++++++++++-
 lib/CodeGen/ScheduleDAGInstrs.h               |  6 ++
 .../SelectionDAG/ScheduleDAGSDNodes.cpp       | 13 ++--
 6 files changed, 107 insertions(+), 19 deletions(-)

diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index ead3d3db264..f820200f99b 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -495,6 +495,12 @@ namespace llvm {
     ///
     virtual void ComputeLatency(SUnit *SU) = 0;
 
+    /// ComputeOperandLatency - Override dependence edge latency using
+    /// operand use/def information
+    ///
+    virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+                                       SDep& dep) const { };
+
     /// Schedule - Order nodes according to selected style, filling
     /// in the Sequence member.
     ///
diff --git a/include/llvm/Target/TargetInstrItineraries.h b/include/llvm/Target/TargetInstrItineraries.h
index 9ba9cb61c5b..0e4ca985ddd 100644
--- a/include/llvm/Target/TargetInstrItineraries.h
+++ b/include/llvm/Target/TargetInstrItineraries.h
@@ -103,7 +103,7 @@ struct InstrItineraryData {
   /// isEmpty - Returns true if there are no itineraries.
   ///
   bool isEmpty() const { return Itineratries == 0; }
-  
+
   /// beginStage - Return the first stage of the itinerary.
   /// 
   const InstrStage *beginStage(unsigned ItinClassIndx) const {
@@ -118,20 +118,17 @@ struct InstrItineraryData {
     return Stages + StageIdx;
   }
 
-  /// getLatency - Return the scheduling latency of the given class.  A
-  /// simple latency value for an instruction is an over-simplification
-  /// for some architectures, but it's a reasonable first approximation.
+  /// getStageLatency - Return the total stage latency of the given
+  /// class.  The latency is the maximum completion time for any stage
+  /// in the itinerary.
   ///
-  unsigned getLatency(unsigned ItinClassIndx) const {
-    // If the target doesn't provide latency information, use a simple
-    // non-zero default value for all instructions.
+  unsigned getStageLatency(unsigned ItinClassIndx) const {
+    // If the target doesn't provide itinerary information, use a
+    // simple non-zero default value for all instructions.
     if (isEmpty())
       return 1;
 
-    // Caclulate the maximum completion time for any stage. The
-    // assumption is that all inputs are consumed at the start of the
-    // first stage and that all outputs are produced at the end of the
-    // latest completing last stage.
+    // Calculate the maximum completion time for any stage.
     unsigned Latency = 0, StartCycle = 0;
     for (const InstrStage *IS = beginStage(ItinClassIndx),
            *E = endStage(ItinClassIndx); IS != E; ++IS) {
@@ -141,6 +138,21 @@ struct InstrItineraryData {
 
     return Latency;
   }
+
+  /// getOperandCycle - Return the cycle for the given class and
+  /// operand. Return -1 if no cycle is specified for the operand.
+  ///
+  int getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const {
+    if (isEmpty())
+      return -1;
+
+    unsigned FirstIdx = Itineratries[ItinClassIndx].FirstOperandCycle;
+    unsigned LastIdx = Itineratries[ItinClassIndx].LastOperandCycle;
+    if ((FirstIdx + OperandIdx) >= LastIdx)
+      return -1;
+
+    return (int)OperandCycles[FirstIdx + OperandIdx];
+  }
 };
 
 
diff --git a/include/llvm/Target/TargetSubtarget.h b/include/llvm/Target/TargetSubtarget.h
index c86e81554ce..14f612af979 100644
--- a/include/llvm/Target/TargetSubtarget.h
+++ b/include/llvm/Target/TargetSubtarget.h
@@ -17,6 +17,7 @@
 namespace llvm {
 
 class SDep;
+class SUnit;
 
 //===----------------------------------------------------------------------===//
 ///
@@ -40,7 +41,8 @@ public:
 
   // adjustSchedDependency - Perform target specific adjustments to
   // the latency of a schedule dependency.
-  virtual void adjustSchedDependency(SDep&) const { };
+  virtual void adjustSchedDependency(SUnit *def, SUnit *use, 
+                                     SDep& dep) const { };
 };
 
 } // End llvm namespace
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index c5ee7ac2d45..1aceda5ea3a 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -210,6 +210,10 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
             // Optionally add in a special extra latency for nodes that
             // feed addresses.
             // TODO: Do this for register aliases too.
+            // TODO: Perhaps we should get rid of
+            // SpecialAddressLatency and just move this into
+            // adjustSchedDependency for the targets that care about
+            // it.
             if (SpecialAddressLatency != 0 && !UnitLatencies) {
               MachineInstr *UseMI = UseSU->getInstr();
               const TargetInstrDesc &UseTID = UseMI->getDesc();
@@ -220,8 +224,14 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
                   UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
                 LDataLatency += SpecialAddressLatency;
             }
+            // Adjust the dependence latency using operand def/use
+            // information (if any), and then allow the target to
+            // perform its own adjustments.
             const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
-            ST.adjustSchedDependency((SDep &)dep);
+            if (!UnitLatencies) {
+              ComputeOperandLatency(SU, UseSU, (SDep &)dep);
+              ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
+            }
             UseSU->addPred(dep);
           }
         }
@@ -231,7 +241,10 @@ void ScheduleDAGInstrs::BuildSchedGraph() {
             SUnit *UseSU = UseList[i];
             if (UseSU != SU) {
               const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
-              ST.adjustSchedDependency((SDep &)dep);
+              if (!UnitLatencies) {
+                ComputeOperandLatency(SU, UseSU, (SDep &)dep);
+                ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
+              }
               UseSU->addPred(dep);
             }
           }
@@ -410,7 +423,7 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
 
   // Compute the latency for the node.
   SU->Latency =
-    InstrItins.getLatency(SU->getInstr()->getDesc().getSchedClass());
+    InstrItins.getStageLatency(SU->getInstr()->getDesc().getSchedClass());
 
   // Simplistic target-independent heuristic: assume that loads take
   // extra time.
@@ -419,6 +432,50 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
       SU->Latency += 2;
 }
 
+void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, 
+                                              SDep& dep) const {
+  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+  if (InstrItins.isEmpty())
+    return;
+  
+  // For a data dependency with a known register...
+  if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
+    return;
+
+  const unsigned Reg = dep.getReg();
+
+  // ... find the definition of the register in the defining
+  // instruction
+  MachineInstr *DefMI = Def->getInstr();
+  int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
+  if (DefIdx != -1) {
+    int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(), DefIdx);
+    if (DefCycle >= 0) {
+      MachineInstr *UseMI = Use->getInstr();
+      const unsigned UseClass = UseMI->getDesc().getSchedClass();
+
+      // For all uses of the register, calculate the maxmimum latency
+      int Latency = -1;
+      for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+        const MachineOperand &MO = UseMI->getOperand(i);
+        if (!MO.isReg() || !MO.isUse())
+          continue;
+        unsigned MOReg = MO.getReg();
+        if (MOReg != Reg)
+          continue;
+
+        int UseCycle = InstrItins.getOperandCycle(UseClass, i);
+        if (UseCycle >= 0)
+          Latency = std::max(Latency, DefCycle - UseCycle + 1);
+      }
+
+      // If we found a latency, then replace the existing dependence latency.
+      if (Latency >= 0)
+        dep.setLatency(Latency);
+    }
+  }
+}
+
 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
   SU->getInstr()->dump();
 }
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
index 00d6268d1a1..929bdaa4b17 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -160,6 +160,12 @@ namespace llvm {
     ///
     virtual void ComputeLatency(SUnit *SU);
 
+    /// ComputeOperandLatency - Override dependence edge latency using
+    /// operand use/def information
+    ///
+    virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+                                       SDep& dep) const;
+
     virtual MachineBasicBlock *EmitSchedule();
 
     /// StartBlock - Prepare to perform scheduling in the given block.
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index ca4ba565d40..a580b93c836 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -155,6 +155,9 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
 void ScheduleDAGSDNodes::AddSchedEdges() {
   const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
 
+  // Check to see if the scheduler cares about latencies.
+  bool UnitLatencies = ForceUnitLatencies();
+
   // Pass 2: add the preds, succs, etc.
   for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
     SUnit *SU = &SUnits[su];
@@ -212,8 +215,10 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
 
         const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
                                OpSU->Latency, PhysReg);
-        if (!isChain)
-          ST.adjustSchedDependency((SDep &)dep);
+        if (!isChain && !UnitLatencies) {
+          ComputeOperandLatency(OpSU, SU, (SDep &)dep);
+          ST.adjustSchedDependency(OpSU, SU, (SDep &)dep);
+        }
 
         SU->addPred(dep);
       }
@@ -242,8 +247,8 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
   for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
     if (N->isMachineOpcode()) {
       SawMachineOpcode = true;
-      SU->Latency +=
-        InstrItins.getLatency(TII->get(N->getMachineOpcode()).getSchedClass());
+      SU->Latency += InstrItins.
+        getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
     }
 }
 
-- 
2.34.1