[LoopRerolling] Be more forgiving with instruction order.

author James Molloy <james.molloy@arm.com>

Thu, 12 Feb 2015 15:54:14 +0000 (15:54 +0000)

committer James Molloy <james.molloy@arm.com>

Thu, 12 Feb 2015 15:54:14 +0000 (15:54 +0000)
author James Molloy <james.molloy@arm.com>
Thu, 12 Feb 2015 15:54:14 +0000 (15:54 +0000)
committer James Molloy <james.molloy@arm.com>
Thu, 12 Feb 2015 15:54:14 +0000 (15:54 +0000)
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp

index 83538fc7cba986b1e22493571c2c7c8637d59d5a..852f3ed9d51a6c6ffd1335f32380501cd9d063bc 100644 (file)
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -45,6 +45,12 @@ static cl::opt<unsigned>
  MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden,
    cl::desc("The maximum increment for loop rerolling"));
  
+static cl::opt<unsigned>
+NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400),
+                          cl::Hidden,
+                          cl::desc("The maximum number of failures to tolerate"
+                                   " during fuzzy matching. (default: 400)"));
+
  // This loop re-rolling transformation aims to transform loops like this:
  //
  // int foo(int a);
@@ -394,9 +400,14 @@ namespace {
                                  const SmallInstructionSet &Final,
                                  DenseSet<Instruction *> &Users);
  
-      UsesTy::iterator nextInstr(int Val, UsesTy &In, UsesTy::iterator I);
+      UsesTy::iterator nextInstr(int Val, UsesTy &In,
+                                 const SmallInstructionSet &Exclude,
+                                 UsesTy::iterator *StartI=nullptr);
        bool isBaseInst(Instruction *I);
        bool isRootInst(Instruction *I);
+      bool instrDependsOn(Instruction *I,
+                          UsesTy::iterator Start,
+                          UsesTy::iterator End);
  
        LoopReroll *Parent;
  
@@ -941,10 +952,16 @@ bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &Po
  
  }
  
+/// Get the next instruction in "In" that is a member of set Val.
+/// Start searching from StartI, and do not return anything in Exclude.
+/// If StartI is not given, start from In.begin().
  LoopReroll::DAGRootTracker::UsesTy::iterator
  LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In,
-                                      UsesTy::iterator I) {
-  while (I != In.end() && I->second.test(Val) == 0)
+                                      const SmallInstructionSet &Exclude,
+                                      UsesTy::iterator *StartI) {
+  UsesTy::iterator I = StartI ? *StartI : In.begin();
+  while (I != In.end() && (I->second.test(Val) == 0 ||
+                           Exclude.count(I->first) != 0))
      ++I;
    return I;
  }
@@ -965,6 +982,19 @@ bool LoopReroll::DAGRootTracker::isRootInst(Instruction *I) {
    return false;
  }
  
+/// Return true if instruction I depends on any instruction between
+/// Start and End.
+bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
+                                                UsesTy::iterator Start,
+                                                UsesTy::iterator End) {
+  for (auto *U : I->users()) {
+    for (auto It = Start; It != End; ++It)
+      if (U == It->first)
+        return true;
+  }
+  return false;
+}
+
  bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
    // We now need to check for equivalence of the use graph of each root with
    // that of the primary induction variable (excluding the roots). Our goal
@@ -1022,8 +1052,9 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
      DenseMap<Value *, Value *> BaseMap;
  
      // Compare iteration Iter to the base.
-    auto BaseIt = nextInstr(0, Uses, Uses.begin());
-    auto RootIt = nextInstr(Iter, Uses, Uses.begin());
+    SmallInstructionSet Visited;
+    auto BaseIt = nextInstr(0, Uses, Visited);
+    auto RootIt = nextInstr(Iter, Uses, Visited);
      auto LastRootIt = Uses.begin();
  
      while (BaseIt != Uses.end() && RootIt != Uses.end()) {
@@ -1033,20 +1064,58 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
        // Skip over the IV or root instructions; only match their users.
        bool Continue = false;
        if (isBaseInst(BaseInst)) {
-        BaseIt = nextInstr(0, Uses, ++BaseIt);
+        Visited.insert(BaseInst);
+        BaseIt = nextInstr(0, Uses, Visited);
          Continue = true;
        }
        if (isRootInst(RootInst)) {
          LastRootIt = RootIt;
-        RootIt = nextInstr(Iter, Uses, ++RootIt);
+        Visited.insert(RootInst);
+        RootIt = nextInstr(Iter, Uses, Visited);
          Continue = true;
        }
        if (Continue) continue;
  
+      if (!BaseInst->isSameOperationAs(RootInst)) {
+        // Last chance saloon. We don't try and solve the full isomorphism
+        // problem, but try and at least catch the case where two instructions
+        // *of different types* are round the wrong way. We won't be able to
+        // efficiently tell, given two ADD instructions, which way around we
+        // should match them, but given an ADD and a SUB, we can at least infer
+        // which one is which.
+        //
+        // This should allow us to deal with a greater subset of the isomorphism
+        // problem. It does however change a linear algorithm into a quadratic
+        // one, so limit the number of probes we do.
+        auto TryIt = RootIt;
+        unsigned N = NumToleratedFailedMatches;
+        while (TryIt != Uses.end() &&
+               !BaseInst->isSameOperationAs(TryIt->first) &&
+               N--) {
+          ++TryIt;
+          TryIt = nextInstr(Iter, Uses, Visited, &TryIt);
+        }
+
+        if (TryIt == Uses.end() || TryIt == RootIt ||
+            instrDependsOn(TryIt->first, RootIt, TryIt)) {
+          DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
+                " vs. " << *RootInst << "\n");
+          return false;
+        }
+        
+        RootIt = TryIt;
+        RootInst = TryIt->first;
+      }
+
        // All instructions between the last root and this root
-      // belong to some other iteration. If they belong to a 
+      // may belong to some other iteration. If they belong to a 
        // future iteration, then they're dangerous to alias with.
-      for (; LastRootIt != RootIt; ++LastRootIt) {
+      // 
+      // Note that because we allow a limited amount of flexibility in the order
+      // that we visit nodes, LastRootIt might be *before* RootIt, in which
+      // case we've already checked this set of instructions so we shouldn't
+      // do anything.
+      for (; LastRootIt < RootIt; ++LastRootIt) {
          Instruction *I = LastRootIt->first;
          if (LastRootIt->second.find_first() < (int)Iter)
            continue;
@@ -1062,12 +1131,6 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
            FutureSideEffects = true;
        }
  
-      if (!BaseInst->isSameOperationAs(RootInst)) {
-        DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
-              " vs. " << *RootInst << "\n");
-        return false;
-      }
-
        // Make sure that this instruction, which is in the use set of this
        // root instruction, does not also belong to the base set or the set of
        // some other root instruction.
@@ -1174,8 +1237,10 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
        BaseMap.insert(std::make_pair(RootInst, BaseInst));
  
        LastRootIt = RootIt;
-      BaseIt = nextInstr(0, Uses, ++BaseIt);
-      RootIt = nextInstr(Iter, Uses, ++RootIt);
+      Visited.insert(BaseInst);
+      Visited.insert(RootInst);
+      BaseIt = nextInstr(0, Uses, Visited);
+      RootIt = nextInstr(Iter, Uses, Visited);
      }
      assert (BaseIt == Uses.end() && RootIt == Uses.end() &&
              "Mismatched set sizes!");
diff --git a/test/Transforms/LoopReroll/basic.ll b/test/Transforms/LoopReroll/basic.ll

index 92bf2946be2ccd8ab9d803d2a23851e8ad8febc3..f30f417f17a51773fd995f6fc71a6dfad22e23c1 100644 (file)
--- a/test/Transforms/LoopReroll/basic.ll
+++ b/test/Transforms/LoopReroll/basic.ll
@@ -488,6 +488,63 @@ for.end:                                          ; preds = %for.body
    ret void
  }
  
+; int foo(int a);
+; void bar2(int *x, int y, int z) {
+;   for (int i = 0; i < 500; i += 3) {
+;     foo(i+y+i*z); // Slightly reordered instruction order
+;     foo(i+1+y+(i+1)*z);
+;     foo(i+2+y+(i+2)*z);
+;   }
+; }
+
+; Function Attrs: nounwind uwtable
+define void @bar2(i32* nocapture readnone %x, i32 %y, i32 %z) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.08 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+
+  %tmp1 = add i32 %i.08, %y
+  %tmp2 = mul i32 %i.08, %z
+  %tmp3 = add i32 %tmp2, %tmp1
+  %call = tail call i32 @foo(i32 %tmp3) #1
+
+  %add = add nsw i32 %i.08, 1
+  %tmp2a = mul i32 %add, %z
+  %tmp1a = add i32 %add, %y
+  %tmp3a = add i32 %tmp2a, %tmp1a
+  %calla = tail call i32 @foo(i32 %tmp3a) #1
+  
+  %add2 = add nsw i32 %i.08, 2
+  %tmp2b = mul i32 %add2, %z
+  %tmp1b = add i32 %add2, %y
+  %tmp3b = add i32 %tmp2b, %tmp1b
+  %callb = tail call i32 @foo(i32 %tmp3b) #1
+
+  %add3 = add nsw i32 %i.08, 3
+
+  %exitcond = icmp eq i32 %add3, 500
+  br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK-LABEL: @bar2
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %tmp1 = add i32 %indvar, %y
+; CHECK: %tmp2 = mul i32 %indvar, %z
+; CHECK: %tmp3 = add i32 %tmp2, %tmp1
+; CHECK: %call = tail call i32 @foo(i32 %tmp3) #1
+; CHECK: %indvar.next = add i32 %indvar, 1
+; CHECK: %exitcond1 = icmp eq i32 %indvar, 497
+; CHECK: br i1 %exitcond1, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
  
  attributes #0 = { nounwind uwtable }
  attributes #1 = { nounwind }
author	James Molloy <james.molloy@arm.com>
	Thu, 12 Feb 2015 15:54:14 +0000 (15:54 +0000)
committer	James Molloy <james.molloy@arm.com>
	Thu, 12 Feb 2015 15:54:14 +0000 (15:54 +0000)
lib/Transforms/Scalar/LoopRerollPass.cpp		patch \| blob \| history
test/Transforms/LoopReroll/basic.ll		patch \| blob \| history