MISched: add dependence to ExitSU to model live-out latency.

author Andrew Trick <atrick@apple.com>

Tue, 18 Dec 2012 20:53:01 +0000 (20:53 +0000)

committer Andrew Trick <atrick@apple.com>

Tue, 18 Dec 2012 20:53:01 +0000 (20:53 +0000)
author Andrew Trick <atrick@apple.com>
Tue, 18 Dec 2012 20:53:01 +0000 (20:53 +0000)
committer Andrew Trick <atrick@apple.com>
Tue, 18 Dec 2012 20:53:01 +0000 (20:53 +0000)
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp

index ef33b123675428b001e1e1b50fece64e9bbe2a20..ebb80a736d2d2b0c94b3975acff7b3a908c970ef 100644 (file)
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -765,6 +765,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
      assert(SU && "No SUnit mapped to this MI");
  
      // Add register-based dependencies (data, anti, and output).
+    bool HasVRegDef = false;
      for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
        const MachineOperand &MO = MI->getOperand(j);
        if (!MO.isReg()) continue;
@@ -775,12 +776,26 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
          addPhysRegDeps(SU, j);
        else {
          assert(!IsPostRA && "Virtual register encountered!");
-        if (MO.isDef())
+        if (MO.isDef()) {
+          HasVRegDef = true;
            addVRegDefDeps(SU, j);
+        }
          else if (MO.readsReg()) // ignore undef operands
            addVRegUseDeps(SU, j);
        }
      }
+    // If we haven't seen any uses in this scheduling region, create a
+    // dependence edge to ExitSU to model the live-out latency. This is required
+    // for vreg defs with no in-region use, and prefetches with no vreg def.
+    //
+    // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
+    // check currently relies on being called before adding chain deps.
+    if (SU->NumSuccs == 0 && SU->Latency > 1
+        && (HasVRegDef || MI->mayLoad())) {
+      SDep Dep(SU, SDep::Artificial);
+      Dep.setLatency(SU->Latency - 1);
+      ExitSU.addPred(Dep);
+    }
  
      // Add chain dependencies.
      // Chain dependencies used to enforce memory order should have
diff --git a/test/CodeGen/ARM/misched-inorder-latency.ll b/test/CodeGen/ARM/misched-inorder-latency.ll

new file mode 100644 (file)

index 0000000..8c06b4c
--- /dev/null
+++ b/test/CodeGen/ARM/misched-inorder-latency.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -enable-misched -march=thumb -mcpu=swift \
+; RUN:          -pre-RA-sched=source -scheditins=false -ilp-window=0 \
+; RUN:          -disable-ifcvt-triangle-false -disable-post-ra | FileCheck %s
+;
+; For these tests, we set -ilp-window=0 to simulate in order processor.
+
+; %val1 is a 3-cycle load live out of %entry. It should be hoisted
+; above the add.
+; CHECK: @testload
+; CHECK: %entry
+; CHECK: ldr
+; CHECK: adds
+; CHECK: bne
+; CHECK: %true
+define i32 @testload(i32 *%ptr, i32 %sumin) {
+entry:
+  %sum1 = add i32 %sumin, 1
+  %val1 = load i32* %ptr
+  %p = icmp eq i32 %sumin, 0
+  br i1 %p, label %true, label %end
+true:
+  %sum2 = add i32 %sum1, 1
+  %ptr2 = getelementptr i32* %ptr, i32 1
+  %val = load i32* %ptr2
+  %val2 = add i32 %val1, %val
+  br label %end
+end:
+  %valmerge = phi i32 [ %val1, %entry], [ %val2, %true ]
+  %summerge = phi i32 [ %sum1, %entry], [ %sum2, %true ]
+  %sumout = add i32 %valmerge, %summerge
+  ret i32 %sumout
+}
+
+; The prefetch gets a default latency of 3 cycles and should be hoisted
+; above the add.
+;
+; CHECK: @testprefetch
+; CHECK: %entry
+; CHECK: pld
+; CHECK: adds
+; CHECK: bx
+define i32 @testprefetch(i8 *%ptr, i32 %i) {
+entry:
+  %tmp = add i32 %i, 1
+  tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
+  ret i32 %tmp
+}
+declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
author	Andrew Trick <atrick@apple.com>
	Tue, 18 Dec 2012 20:53:01 +0000 (20:53 +0000)
committer	Andrew Trick <atrick@apple.com>
	Tue, 18 Dec 2012 20:53:01 +0000 (20:53 +0000)
lib/CodeGen/ScheduleDAGInstrs.cpp		patch \| blob \| history
test/CodeGen/ARM/misched-inorder-latency.ll	[new file with mode: 0644]	patch \| blob