Issue description:

[oota-llvm.git] / lib / CodeGen / ScheduleDAGInstrs.cpp
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp

index 6e80335f4bc81e581fdc0d7b5403c32cfb841c78..aa45a6861cabfe8904f9a5533ab823dc00ac5a71 100644 (file)
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -44,14 +44,15 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
                                       const MachineDominatorTree &mdt,
                                       bool IsPostRAFlag,
                                       LiveIntervals *lis)
-  : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
-    InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis),
-    IsPostRA(IsPostRAFlag), UnitLatencies(false), CanHandleTerminators(false),
-    LoopRegs(MDT), FirstDbgValue(0) {
+  : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis),
+    IsPostRA(IsPostRAFlag), CanHandleTerminators(false), FirstDbgValue(0) {
    assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
    DbgValues.clear();
    assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
           "Virtual registers must be removed prior to PostRA scheduling");
+
+  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+  SchedModel.init(*ST.getSchedModel(), &ST, TII);
  }
  
  /// getUnderlyingObjectFromInt - This is the function that does the work of
@@ -135,10 +136,6 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
  
  void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
    BB = bb;
-  LoopRegs.Deps.clear();
-  if (MachineLoop *ML = MLI.getLoopFor(BB))
-    if (BB == ML->getLoopLatch())
-      LoopRegs.VisitLoop(ML);
  }
  
  void ScheduleDAGInstrs::finishBlock() {
@@ -174,9 +171,6 @@ void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
    EndIndex = endcount;
    MISUnitMap.clear();
  
-  // Check to see if the scheduler cares about latencies.
-  UnitLatencies = forceUnitLatencies();
-
    ScheduleDAG::clearDAG();
  }
  
@@ -209,7 +203,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
        if (Reg == 0) continue;
  
        if (TRI->isPhysicalRegister(Reg))
-        Uses[Reg].push_back(&ExitSU);
+        Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
        else {
          assert(!IsPostRA && "Virtual register encountered after regalloc.");
          addVRegUseDeps(&ExitSU, i);
@@ -225,59 +219,44 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
               E = (*SI)->livein_end(); I != E; ++I) {
          unsigned Reg = *I;
          if (!Uses.contains(Reg))
-          Uses[Reg].push_back(&ExitSU);
+          Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
        }
    }
  }
  
  /// MO is an operand of SU's instruction that defines a physical register. Add
  /// data dependencies from SU to any uses of the physical register.
-void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
-                                           const MachineOperand &MO) {
+void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
+  const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
    assert(MO.isDef() && "expect physreg def");
  
    // Ask the target if address-backscheduling is desirable, and if so how much.
    const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
-  unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
-  unsigned DataLatency = SU->Latency;
  
    for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
         Alias.isValid(); ++Alias) {
      if (!Uses.contains(*Alias))
        continue;
-    std::vector<SUnit*> &UseList = Uses[*Alias];
+    std::vector<PhysRegSUOper> &UseList = Uses[*Alias];
      for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
-      SUnit *UseSU = UseList[i];
+      SUnit *UseSU = UseList[i].SU;
        if (UseSU == SU)
          continue;
-      unsigned LDataLatency = DataLatency;
-      // Optionally add in a special extra latency for nodes that
-      // feed addresses.
-      // TODO: Perhaps we should get rid of
-      // SpecialAddressLatency and just move this into
-      // adjustSchedDependency for the targets that care about it.
-      if (SpecialAddressLatency != 0 && !UnitLatencies &&
-          UseSU != &ExitSU) {
-        MachineInstr *UseMI = UseSU->getInstr();
-        const MCInstrDesc &UseMCID = UseMI->getDesc();
-        int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias);
-        assert(RegUseIndex >= 0 && "UseMI doesn't use register!");
-        if (RegUseIndex >= 0 &&
-            (UseMI->mayLoad() || UseMI->mayStore()) &&
-            (unsigned)RegUseIndex < UseMCID.getNumOperands() &&
-            UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass())
-          LDataLatency += SpecialAddressLatency;
-      }
-      // Adjust the dependence latency using operand def/use
-      // information (if any), and then allow the target to
-      // perform its own adjustments.
-      SDep dep(SU, SDep::Data, LDataLatency, *Alias);
-      if (!UnitLatencies) {
-        unsigned Latency = computeOperandLatency(SU, UseSU, dep);
-        dep.setLatency(Latency);
-
-        ST.adjustSchedDependency(SU, UseSU, dep);
-      }
+
+      SDep dep(SU, SDep::Data, 1, *Alias);
+
+      // Adjust the dependence latency using operand def/use information,
+      // then allow the target to perform its own adjustments.
+      int UseOp = UseList[i].OpIdx;
+      MachineInstr *RegUse = UseOp < 0 ? 0 : UseSU->getInstr();
+      dep.setLatency(
+        SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
+                                         RegUse, UseOp, /*FindMin=*/false));
+      dep.setMinLatency(
+        SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
+                                         RegUse, UseOp, /*FindMin=*/true));
+
+      ST.adjustSchedDependency(SU, UseSU, dep);
        UseSU->addPred(dep);
      }
    }
@@ -301,9 +280,9 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
         Alias.isValid(); ++Alias) {
      if (!Defs.contains(*Alias))
        continue;
-    std::vector<SUnit *> &DefList = Defs[*Alias];
+    std::vector<PhysRegSUOper> &DefList = Defs[*Alias];
      for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
-      SUnit *DefSU = DefList[i];
+      SUnit *DefSU = DefList[i].SU;
        if (DefSU == &ExitSU)
          continue;
        if (DefSU != SU &&
@@ -312,8 +291,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
          if (Kind == SDep::Anti)
            DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias));
          else {
-          unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx,
-                                                 DefSU->getInstr());
+          unsigned AOLat =
+            SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
            DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias));
          }
        }
@@ -324,61 +303,14 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
      // Either insert a new Reg2SUnits entry with an empty SUnits list, or
      // retrieve the existing SUnits list for this register's uses.
      // Push this SUnit on the use list.
-    Uses[MO.getReg()].push_back(SU);
+    Uses[MO.getReg()].push_back(PhysRegSUOper(SU, OperIdx));
    }
    else {
-    addPhysRegDataDeps(SU, MO);
+    addPhysRegDataDeps(SU, OperIdx);
  
      // Either insert a new Reg2SUnits entry with an empty SUnits list, or
      // retrieve the existing SUnits list for this register's defs.
-    std::vector<SUnit *> &DefList = Defs[MO.getReg()];
-
-    // If a def is going to wrap back around to the top of the loop,
-    // backschedule it.
-    if (!UnitLatencies && DefList.empty()) {
-      LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg());
-      if (I != LoopRegs.Deps.end()) {
-        const MachineOperand *UseMO = I->second.first;
-        unsigned Count = I->second.second;
-        const MachineInstr *UseMI = UseMO->getParent();
-        unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
-        const MCInstrDesc &UseMCID = UseMI->getDesc();
-        const TargetSubtargetInfo &ST =
-          TM.getSubtarget<TargetSubtargetInfo>();
-        unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
-        // TODO: If we knew the total depth of the region here, we could
-        // handle the case where the whole loop is inside the region but
-        // is large enough that the isScheduleHigh trick isn't needed.
-        if (UseMOIdx < UseMCID.getNumOperands()) {
-          // Currently, we only support scheduling regions consisting of
-          // single basic blocks. Check to see if the instruction is in
-          // the same region by checking to see if it has the same parent.
-          if (UseMI->getParent() != MI->getParent()) {
-            unsigned Latency = SU->Latency;
-            if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass())
-              Latency += SpecialAddressLatency;
-            // This is a wild guess as to the portion of the latency which
-            // will be overlapped by work done outside the current
-            // scheduling region.
-            Latency -= std::min(Latency, Count);
-            // Add the artificial edge.
-            ExitSU.addPred(SDep(SU, SDep::Order, Latency,
-                                /*Reg=*/0, /*isNormalMemory=*/false,
-                                /*isMustAlias=*/false,
-                                /*isArtificial=*/true));
-          } else if (SpecialAddressLatency > 0 &&
-                     UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
-            // The entire loop body is within the current scheduling region
-            // and the latency of this operation is assumed to be greater
-            // than the latency of the loop.
-            // TODO: Recursively mark data-edge predecessors as
-            //       isScheduleHigh too.
-            SU->isScheduleHigh = true;
-          }
-        }
-        LoopRegs.Deps.erase(I);
-      }
-    }
+    std::vector<PhysRegSUOper> &DefList = Defs[MO.getReg()];
  
      // clear this register's use list
      if (Uses.contains(MO.getReg()))
@@ -393,11 +325,11 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
      // the block. Instead, we leave only one call at the back of the
      // DefList.
      if (SU->isCall) {
-      while (!DefList.empty() && DefList.back()->isCall)
+      while (!DefList.empty() && DefList.back().SU->isCall)
          DefList.pop_back();
      }
      // Defs are pushed in the order they are visited and never reordered.
-    DefList.push_back(SU);
+    DefList.push_back(PhysRegSUOper(SU, OperIdx));
    }
  }
  
@@ -411,9 +343,10 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
    const MachineInstr *MI = SU->getInstr();
    unsigned Reg = MI->getOperand(OperIdx).getReg();
  
-  // SSA defs do not have output/anti dependencies.
+  // Singly defined vregs do not have output/anti dependencies.
    // The current operand is a def, so we have at least one.
-  if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end())
+  // Check here if there are any others...
+  if (MRI.hasOneDef(Reg))
      return;
  
    // Add output dependence to the next nearest def of this vreg.
@@ -429,8 +362,8 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
    else {
      SUnit *DefSU = DefI->SU;
      if (DefSU != SU && DefSU != &ExitSU) {
-      unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx,
-                                                  DefSU->getInstr());
+      unsigned OutLatency =
+        SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
        DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg));
      }
      DefI->SU = SU;
@@ -461,18 +394,17 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
      if (DefSU) {
        // The reaching Def lives within this scheduling region.
        // Create a data dependence.
-      //
-      // TODO: Handle "special" address latencies cleanly.
-      SDep dep(DefSU, SDep::Data, DefSU->Latency, Reg);
-      if (!UnitLatencies) {
-        // Adjust the dependence latency using operand def/use information, then
-        // allow the target to perform its own adjustments.
-        unsigned Latency = computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
-        dep.setLatency(Latency);
-
-        const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
-        ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
-      }
+      SDep dep(DefSU, SDep::Data, 1, Reg);
+      // Adjust the dependence latency using operand def/use information, then
+      // allow the target to perform its own adjustments.
+      int DefOp = Def->findRegisterDefOperandIdx(Reg);
+      dep.setLatency(
+        SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, false));
+      dep.setMinLatency(
+        SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, true));
+
+      const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+      ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
        SU->addPred(dep);
      }
    }
@@ -487,7 +419,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
  /// (like a call or something with unmodeled side effects).
  static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
    if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
-      (MI->hasVolatileMemoryRef() &&
+      (MI->hasOrderedMemoryRef() &&
         (!MI->mayLoad() || !MI->isInvariantLoad(AA))))
      return true;
    return false;
@@ -639,7 +571,8 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
  /// checks whether SU can be aliasing any node dominated
  /// by it.
  static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
-            SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList) {
+                            SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList,
+                            unsigned LatencyToLoad) {
    if (!SU)
      return;
  
@@ -650,9 +583,11 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
         I != IE; ++I) {
      if (SU == *I)
        continue;
-    if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr()))
-      (*I)->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
+    if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) {
+      unsigned Latency = ((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0;
+      (*I)->addPred(SDep(SU, SDep::Order, Latency, /*Reg=*/0,
                           /*isNormalMemory=*/true));
+    }
      // Now go through all the chain successors and iterate from them.
      // Keep track of visited nodes.
      for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
@@ -714,10 +649,7 @@ void ScheduleDAGInstrs::initSUnits() {
      SU->isCommutable = MI->isCommutable();
  
      // Assign the Latency field of SU using target-provided information.
-    if (UnitLatencies)
-      SU->Latency = 1;
-    else
-      computeLatency(SU);
+    SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
    }
  }
  
@@ -815,8 +747,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
      // after stack slots are lowered to actual addresses.
      // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
      // produce more precise dependence information.
-#define STORE_LOAD_LATENCY 1
-    unsigned TrueMemOrderLatency = 0;
+    unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0;
      if (isGlobalMemoryObject(AA, MI)) {
        // Be conservative with these and add dependencies on all memory
        // references, even those that are known to not alias.
@@ -835,7 +766,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
        BarrierChain = SU;
        // This is a barrier event that acts as a pivotal node in the DAG,
        // so it is safe to clear list of exposed nodes.
-      adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes);
+      adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+                      TrueMemOrderLatency);
        RejectMemNodes.clear();
        NonAliasMemDefs.clear();
        NonAliasMemUses.clear();
@@ -843,8 +775,13 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
        // fall-through
      new_alias_chain:
        // Chain all possibly aliasing memory references though SU.
-      if (AliasChain)
-        addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
+      if (AliasChain) {
+        unsigned ChainLatency = 0;
+        if (AliasChain->getInstr()->mayLoad())
+          ChainLatency = TrueMemOrderLatency;
+        addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes,
+                           ChainLatency);
+      }
        AliasChain = SU;
        for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
          addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
@@ -858,13 +795,13 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
            addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes,
                               TrueMemOrderLatency);
        }
-      adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes);
+      adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+                      TrueMemOrderLatency);
        PendingLoads.clear();
        AliasMemDefs.clear();
        AliasMemUses.clear();
      } else if (MI->mayStore()) {
        bool MayAlias = true;
-      TrueMemOrderLatency = STORE_LOAD_LATENCY;
        if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
          // A store to a specific PseudoSourceValue. Add precise dependencies.
          // Record the def in MemDefs, first adding a dep if there is
@@ -905,7 +842,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
              addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
            // But we also should check dependent instructions for the
            // SU in question.
-          adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes);
+          adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+                          TrueMemOrderLatency);
          }
          // Add dependence on barrier chain, if needed.
          // There is no point to check aliasing on barrier event. Even if
@@ -927,7 +865,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
                              /*isArtificial=*/true));
      } else if (MI->mayLoad()) {
        bool MayAlias = true;
-      TrueMemOrderLatency = 0;
        if (MI->isInvariantLoad(AA)) {
          // Invariant load, no chain dependencies needed!
        } else {
@@ -955,7 +892,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
            MayAlias = true;
          }
          if (MayAlias)
-          adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes);
+          adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
          // Add dependencies on alias and barrier chains, if needed.
          if (MayAlias && AliasChain)
            addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
@@ -973,34 +910,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
    PendingLoads.clear();
  }
  
-void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
-  // Compute the latency for the node. We only provide a default for missing
-  // itineraries. Empty itineraries still have latency properties.
-  if (!InstrItins) {
-    SU->Latency = 1;
-
-    // Simplistic target-independent heuristic: assume that loads take
-    // extra time.
-    if (SU->getInstr()->mayLoad())
-      SU->Latency += 2;
-  } else {
-    SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
-  }
-}
-
-unsigned ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
-                                                  const SDep& dep,
-                                                  bool FindMin) const {
-  // For a data dependency with a known register...
-  if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
-    return 1;
-
-  return TII->computeOperandLatency(InstrItins, TRI, Def->getInstr(),
-                                    Use->getInstr(), dep.getReg(), FindMin);
-}
-
  void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    SU->getInstr()->dump();
+#endif
  }
  
  std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {