MachineScheduler: Restrict macroop fusion to data-dependent instructions.

author Matthias Braun <matze@braunis.de>

Mon, 20 Jul 2015 22:34:44 +0000 (22:34 +0000)

committer Matthias Braun <matze@braunis.de>

Mon, 20 Jul 2015 22:34:44 +0000 (22:34 +0000)
author Matthias Braun <matze@braunis.de>
Mon, 20 Jul 2015 22:34:44 +0000 (22:34 +0000)
committer Matthias Braun <matze@braunis.de>
Mon, 20 Jul 2015 22:34:44 +0000 (22:34 +0000)
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp

index a48e54caf3fe5930a37e721e8ce2468511d6e520..f4823681cf29b2f8508c362cdaf4363c56e6bc52 100644 (file)
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -1349,25 +1349,49 @@ namespace {
  /// \brief Post-process the DAG to create cluster edges between instructions
  /// that may be fused by the processor into a single operation.
  class MacroFusion : public ScheduleDAGMutation {
-  const TargetInstrInfo *TII;
+  const TargetInstrInfo &TII;
+  const TargetRegisterInfo &TRI;
  public:
-  MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
+  MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI)
+    : TII(TII), TRI(TRI) {}
  
    void apply(ScheduleDAGMI *DAG) override;
  };
  } // anonymous
  
+/// Returns true if \p MI reads a register written by \p Other.
+static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI,
+                       const MachineInstr &Other) {
+  for (const MachineOperand &MO : MI.uses()) {
+    if (!MO.isReg() || !MO.readsReg())
+      continue;
+
+    unsigned Reg = MO.getReg();
+    if (Other.modifiesRegister(Reg, &TRI))
+      return true;
+  }
+  return false;
+}
+
  /// \brief Callback from DAG postProcessing to create cluster edges to encourage
  /// fused operations.
  void MacroFusion::apply(ScheduleDAGMI *DAG) {
    // For now, assume targets can only fuse with the branch.
-  MachineInstr *Branch = DAG->ExitSU.getInstr();
+  SUnit &ExitSU = DAG->ExitSU;
+  MachineInstr *Branch = ExitSU.getInstr();
    if (!Branch)
      return;
  
-  for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
-    SUnit *SU = &DAG->SUnits[--Idx];
-    if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
+  for (SUnit &SU : DAG->SUnits) {
+    // SUnits with successors can't be schedule in front of the ExitSU.
+    if (!SU.Succs.empty())
+      continue;
+    // We only care if the node writes to a register that the branch reads.
+    MachineInstr *Pred = SU.getInstr();
+    if (!HasDataDep(TRI, *Branch, *Pred))
+      continue;
+
+    if (!TII.shouldScheduleAdjacent(Pred, Branch))
        continue;
  
      // Create a single weak edge from SU to ExitSU. The only effect is to cause
@@ -1376,11 +1400,11 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) {
      // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
      // of SU, we could create an artificial edge from the deepest root, but it
      // hasn't been needed yet.
-    bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
+    bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster));
      (void)Success;
      assert(Success && "No DAG nodes should be reachable from ExitSU");
  
-    DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
+    DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n");
      break;
    }
  }
@@ -2887,7 +2911,7 @@ static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
    if (EnableLoadCluster && DAG->TII->enableClusterLoads())
      DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
    if (EnableMacroFusion)
-    DAG->addMutation(make_unique<MacroFusion>(DAG->TII));
+    DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI));
    return DAG;
  }
  
diff --git a/test/CodeGen/AArch64/arm64-ccmp.ll b/test/CodeGen/AArch64/arm64-ccmp.ll

index 60fb74024edde51a66ada0aec81296cdf3933e07..446e5ce8cc4a43e65f281e729c78e99d676fbbcb 100644 (file)
--- a/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -104,11 +104,14 @@ if.end:                                           ; preds = %if.then, %lor.lhs.f
  ; Speculatively execute division by zero.
  ; The sdiv/udiv instructions do not trap when the divisor is zero, so they are
  ; safe to speculate.
-; CHECK: speculate_division
-; CHECK-NOT: cmp
-; CHECK: sdiv
-; CHECK: cmp
-; CHECK-NEXT: ccmp
+; CHECK-LABEL: speculate_division:
+; CHECK: cmp w0, #1
+; CHECK: sdiv [[DIVRES:w[0-9]+]], w1, w0
+; CHECK: ccmp [[DIVRES]], #16, #0, ge
+; CHECK: b.gt [[BLOCK:LBB[0-9_]+]]
+; CHECK: bl _foo
+; CHECK: [[BLOCK]]:
+; CHECK: orr w0, wzr, #0x7
  define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
  entry:
    %cmp = icmp sgt i32 %a, 0
author	Matthias Braun <matze@braunis.de>
	Mon, 20 Jul 2015 22:34:44 +0000 (22:34 +0000)
committer	Matthias Braun <matze@braunis.de>
	Mon, 20 Jul 2015 22:34:44 +0000 (22:34 +0000)
lib/CodeGen/MachineScheduler.cpp		patch \| blob \| history
test/CodeGen/AArch64/arm64-ccmp.ll		patch \| blob \| history