From 28a123abaf79644cc507c3086d82c86ff1aa8cd8 Mon Sep 17 00:00:00 2001
From: James Molloy <james.molloy@arm.com>
Date: Thu, 12 Feb 2015 15:54:14 +0000
Subject: [PATCH] [LoopRerolling] Be more forgiving with instruction order.

We can't solve the full subgraph isomorphism problem. But we can
allow obvious cases, where for example two instructions of different
types are out of order. Due to them having different types/opcodes,
there is no ambiguity.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228931 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/LoopRerollPass.cpp | 99 ++++++++++++++++++++----
 test/Transforms/LoopReroll/basic.ll      | 57 ++++++++++++++
 2 files changed, 139 insertions(+), 17 deletions(-)
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index 83538fc7cba..852f3ed9d51 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -45,6 +45,12 @@ static cl::opt<unsigned>
 MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden,
   cl::desc("The maximum increment for loop rerolling"));
 
+static cl::opt<unsigned>
+NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400),
+                          cl::Hidden,
+                          cl::desc("The maximum number of failures to tolerate"
+                                   " during fuzzy matching. (default: 400)"));
+
 // This loop re-rolling transformation aims to transform loops like this:
 //
 // int foo(int a);
@@ -394,9 +400,14 @@ namespace {
                                 const SmallInstructionSet &Final,
                                 DenseSet<Instruction *> &Users);
 
-      UsesTy::iterator nextInstr(int Val, UsesTy &In, UsesTy::iterator I);
+      UsesTy::iterator nextInstr(int Val, UsesTy &In,
+                                 const SmallInstructionSet &Exclude,
+                                 UsesTy::iterator *StartI=nullptr);
       bool isBaseInst(Instruction *I);
       bool isRootInst(Instruction *I);
+      bool instrDependsOn(Instruction *I,
+                          UsesTy::iterator Start,
+                          UsesTy::iterator End);
 
       LoopReroll *Parent;
 
@@ -941,10 +952,16 @@ bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &Po
 
 }
 
+/// Get the next instruction in "In" that is a member of set Val.
+/// Start searching from StartI, and do not return anything in Exclude.
+/// If StartI is not given, start from In.begin().
 LoopReroll::DAGRootTracker::UsesTy::iterator
 LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In,
-                                      UsesTy::iterator I) {
-  while (I != In.end() && I->second.test(Val) == 0)
+                                      const SmallInstructionSet &Exclude,
+                                      UsesTy::iterator *StartI) {
+  UsesTy::iterator I = StartI ? *StartI : In.begin();
+  while (I != In.end() && (I->second.test(Val) == 0 ||
+                           Exclude.count(I->first) != 0))
     ++I;
   return I;
 }
@@ -965,6 +982,19 @@ bool LoopReroll::DAGRootTracker::isRootInst(Instruction *I) {
   return false;
 }
 
+/// Return true if instruction I depends on any instruction between
+/// Start and End.
+bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
+                                                UsesTy::iterator Start,
+                                                UsesTy::iterator End) {
+  for (auto *U : I->users()) {
+    for (auto It = Start; It != End; ++It)
+      if (U == It->first)
+        return true;
+  }
+  return false;
+}
+
 bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
   // We now need to check for equivalence of the use graph of each root with
   // that of the primary induction variable (excluding the roots). Our goal
@@ -1022,8 +1052,9 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
     DenseMap<Value *, Value *> BaseMap;
 
     // Compare iteration Iter to the base.
-    auto BaseIt = nextInstr(0, Uses, Uses.begin());
-    auto RootIt = nextInstr(Iter, Uses, Uses.begin());
+    SmallInstructionSet Visited;
+    auto BaseIt = nextInstr(0, Uses, Visited);
+    auto RootIt = nextInstr(Iter, Uses, Visited);
     auto LastRootIt = Uses.begin();
 
     while (BaseIt != Uses.end() && RootIt != Uses.end()) {
@@ -1033,20 +1064,58 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
       // Skip over the IV or root instructions; only match their users.
       bool Continue = false;
       if (isBaseInst(BaseInst)) {
-        BaseIt = nextInstr(0, Uses, ++BaseIt);
+        Visited.insert(BaseInst);
+        BaseIt = nextInstr(0, Uses, Visited);
         Continue = true;
       }
       if (isRootInst(RootInst)) {
         LastRootIt = RootIt;
-        RootIt = nextInstr(Iter, Uses, ++RootIt);
+        Visited.insert(RootInst);
+        RootIt = nextInstr(Iter, Uses, Visited);
         Continue = true;
       }
       if (Continue) continue;
 
+      if (!BaseInst->isSameOperationAs(RootInst)) {
+        // Last chance saloon. We don't try and solve the full isomorphism
+        // problem, but try and at least catch the case where two instructions
+        // *of different types* are round the wrong way. We won't be able to
+        // efficiently tell, given two ADD instructions, which way around we
+        // should match them, but given an ADD and a SUB, we can at least infer
+        // which one is which.
+        //
+        // This should allow us to deal with a greater subset of the isomorphism
+        // problem. It does however change a linear algorithm into a quadratic
+        // one, so limit the number of probes we do.
+        auto TryIt = RootIt;
+        unsigned N = NumToleratedFailedMatches;
+        while (TryIt != Uses.end() &&
+               !BaseInst->isSameOperationAs(TryIt->first) &&
+               N--) {
+          ++TryIt;
+          TryIt = nextInstr(Iter, Uses, Visited, &TryIt);
+        }
+
+        if (TryIt == Uses.end() || TryIt == RootIt ||
+            instrDependsOn(TryIt->first, RootIt, TryIt)) {
+          DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
+                " vs. " << *RootInst << "\n");
+          return false;
+        }
+        
+        RootIt = TryIt;
+        RootInst = TryIt->first;
+      }
+
       // All instructions between the last root and this root
-      // belong to some other iteration. If they belong to a 
+      // may belong to some other iteration. If they belong to a 
       // future iteration, then they're dangerous to alias with.
-      for (; LastRootIt != RootIt; ++LastRootIt) {
+      // 
+      // Note that because we allow a limited amount of flexibility in the order
+      // that we visit nodes, LastRootIt might be *before* RootIt, in which
+      // case we've already checked this set of instructions so we shouldn't
+      // do anything.
+      for (; LastRootIt < RootIt; ++LastRootIt) {
         Instruction *I = LastRootIt->first;
         if (LastRootIt->second.find_first() < (int)Iter)
           continue;
@@ -1062,12 +1131,6 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
           FutureSideEffects = true;
       }
 
-      if (!BaseInst->isSameOperationAs(RootInst)) {
-        DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
-              " vs. " << *RootInst << "\n");
-        return false;
-      }
-
       // Make sure that this instruction, which is in the use set of this
       // root instruction, does not also belong to the base set or the set of
       // some other root instruction.
@@ -1174,8 +1237,10 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
       BaseMap.insert(std::make_pair(RootInst, BaseInst));
 
       LastRootIt = RootIt;
-      BaseIt = nextInstr(0, Uses, ++BaseIt);
-      RootIt = nextInstr(Iter, Uses, ++RootIt);
+      Visited.insert(BaseInst);
+      Visited.insert(RootInst);
+      BaseIt = nextInstr(0, Uses, Visited);
+      RootIt = nextInstr(Iter, Uses, Visited);
     }
     assert (BaseIt == Uses.end() && RootIt == Uses.end() &&
             "Mismatched set sizes!");
diff --git a/test/Transforms/LoopReroll/basic.ll b/test/Transforms/LoopReroll/basic.ll
index 92bf2946be2..f30f417f17a 100644
--- a/test/Transforms/LoopReroll/basic.ll
+++ b/test/Transforms/LoopReroll/basic.ll
@@ -488,6 +488,63 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
+; int foo(int a);
+; void bar2(int *x, int y, int z) {
+;   for (int i = 0; i < 500; i += 3) {
+;     foo(i+y+i*z); // Slightly reordered instruction order
+;     foo(i+1+y+(i+1)*z);
+;     foo(i+2+y+(i+2)*z);
+;   }
+; }
+
+; Function Attrs: nounwind uwtable
+define void @bar2(i32* nocapture readnone %x, i32 %y, i32 %z) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.08 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+
+  %tmp1 = add i32 %i.08, %y
+  %tmp2 = mul i32 %i.08, %z
+  %tmp3 = add i32 %tmp2, %tmp1
+  %call = tail call i32 @foo(i32 %tmp3) #1
+
+  %add = add nsw i32 %i.08, 1
+  %tmp2a = mul i32 %add, %z
+  %tmp1a = add i32 %add, %y
+  %tmp3a = add i32 %tmp2a, %tmp1a
+  %calla = tail call i32 @foo(i32 %tmp3a) #1
+  
+  %add2 = add nsw i32 %i.08, 2
+  %tmp2b = mul i32 %add2, %z
+  %tmp1b = add i32 %add2, %y
+  %tmp3b = add i32 %tmp2b, %tmp1b
+  %callb = tail call i32 @foo(i32 %tmp3b) #1
+
+  %add3 = add nsw i32 %i.08, 3
+
+  %exitcond = icmp eq i32 %add3, 500
+  br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK-LABEL: @bar2
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %tmp1 = add i32 %indvar, %y
+; CHECK: %tmp2 = mul i32 %indvar, %z
+; CHECK: %tmp3 = add i32 %tmp2, %tmp1
+; CHECK: %call = tail call i32 @foo(i32 %tmp3) #1
+; CHECK: %indvar.next = add i32 %indvar, 1
+; CHECK: %exitcond1 = icmp eq i32 %indvar, 497
+; CHECK: br i1 %exitcond1, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
 
 attributes #0 = { nounwind uwtable }
 attributes #1 = { nounwind }
-- 
2.34.1