MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden,
cl::desc("The maximum increment for loop rerolling"));
+static cl::opt<unsigned>
+NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400),
+ cl::Hidden,
+ cl::desc("The maximum number of failures to tolerate"
+ " during fuzzy matching. (default: 400)"));
+
// This loop re-rolling transformation aims to transform loops like this:
//
// int foo(int a);
const SmallInstructionSet &Final,
DenseSet<Instruction *> &Users);
- UsesTy::iterator nextInstr(int Val, UsesTy &In, UsesTy::iterator I);
+ UsesTy::iterator nextInstr(int Val, UsesTy &In,
+ const SmallInstructionSet &Exclude,
+ UsesTy::iterator *StartI=nullptr);
bool isBaseInst(Instruction *I);
bool isRootInst(Instruction *I);
+ bool instrDependsOn(Instruction *I,
+ UsesTy::iterator Start,
+ UsesTy::iterator End);
LoopReroll *Parent;
}
+/// Get the next instruction in "In" that is a member of set Val.
+/// Start searching from StartI, and do not return anything in Exclude.
+/// If StartI is not given, start from In.begin().
LoopReroll::DAGRootTracker::UsesTy::iterator
LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In,
- UsesTy::iterator I) {
- while (I != In.end() && I->second.test(Val) == 0)
+ const SmallInstructionSet &Exclude,
+ UsesTy::iterator *StartI) {
+ UsesTy::iterator I = StartI ? *StartI : In.begin();
+ while (I != In.end() && (I->second.test(Val) == 0 ||
+ Exclude.count(I->first) != 0))
++I;
return I;
}
return false;
}
+/// Return true if instruction I depends on any instruction between
+/// Start and End.
+bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
+ UsesTy::iterator Start,
+ UsesTy::iterator End) {
+ for (auto *U : I->users()) {
+ for (auto It = Start; It != End; ++It)
+ if (U == It->first)
+ return true;
+ }
+ return false;
+}
+
bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
// We now need to check for equivalence of the use graph of each root with
// that of the primary induction variable (excluding the roots). Our goal
DenseMap<Value *, Value *> BaseMap;
// Compare iteration Iter to the base.
- auto BaseIt = nextInstr(0, Uses, Uses.begin());
- auto RootIt = nextInstr(Iter, Uses, Uses.begin());
+ SmallInstructionSet Visited;
+ auto BaseIt = nextInstr(0, Uses, Visited);
+ auto RootIt = nextInstr(Iter, Uses, Visited);
auto LastRootIt = Uses.begin();
while (BaseIt != Uses.end() && RootIt != Uses.end()) {
// Skip over the IV or root instructions; only match their users.
bool Continue = false;
if (isBaseInst(BaseInst)) {
- BaseIt = nextInstr(0, Uses, ++BaseIt);
+ Visited.insert(BaseInst);
+ BaseIt = nextInstr(0, Uses, Visited);
Continue = true;
}
if (isRootInst(RootInst)) {
LastRootIt = RootIt;
- RootIt = nextInstr(Iter, Uses, ++RootIt);
+ Visited.insert(RootInst);
+ RootIt = nextInstr(Iter, Uses, Visited);
Continue = true;
}
if (Continue) continue;
+ if (!BaseInst->isSameOperationAs(RootInst)) {
+ // Last chance saloon. We don't try and solve the full isomorphism
+ // problem, but try and at least catch the case where two instructions
+ // *of different types* are round the wrong way. We won't be able to
+ // efficiently tell, given two ADD instructions, which way around we
+ // should match them, but given an ADD and a SUB, we can at least infer
+ // which one is which.
+ //
+ // This should allow us to deal with a greater subset of the isomorphism
+ // problem. It does however change a linear algorithm into a quadratic
+ // one, so limit the number of probes we do.
+ auto TryIt = RootIt;
+ unsigned N = NumToleratedFailedMatches;
+ while (TryIt != Uses.end() &&
+ !BaseInst->isSameOperationAs(TryIt->first) &&
+ N--) {
+ ++TryIt;
+ TryIt = nextInstr(Iter, Uses, Visited, &TryIt);
+ }
+
+ if (TryIt == Uses.end() || TryIt == RootIt ||
+ instrDependsOn(TryIt->first, RootIt, TryIt)) {
+ DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
+ " vs. " << *RootInst << "\n");
+ return false;
+ }
+
+ RootIt = TryIt;
+ RootInst = TryIt->first;
+ }
+
// All instructions between the last root and this root
- // belong to some other iteration. If they belong to a
+ // may belong to some other iteration. If they belong to a
// future iteration, then they're dangerous to alias with.
- for (; LastRootIt != RootIt; ++LastRootIt) {
+ //
+ // Note that because we allow a limited amount of flexibility in the order
+ // that we visit nodes, LastRootIt might be *before* RootIt, in which
+ // case we've already checked this set of instructions so we shouldn't
+ // do anything.
+ for (; LastRootIt < RootIt; ++LastRootIt) {
Instruction *I = LastRootIt->first;
if (LastRootIt->second.find_first() < (int)Iter)
continue;
FutureSideEffects = true;
}
- if (!BaseInst->isSameOperationAs(RootInst)) {
- DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
- " vs. " << *RootInst << "\n");
- return false;
- }
-
// Make sure that this instruction, which is in the use set of this
// root instruction, does not also belong to the base set or the set of
// some other root instruction.
BaseMap.insert(std::make_pair(RootInst, BaseInst));
LastRootIt = RootIt;
- BaseIt = nextInstr(0, Uses, ++BaseIt);
- RootIt = nextInstr(Iter, Uses, ++RootIt);
+ Visited.insert(BaseInst);
+ Visited.insert(RootInst);
+ BaseIt = nextInstr(0, Uses, Visited);
+ RootIt = nextInstr(Iter, Uses, Visited);
}
assert (BaseIt == Uses.end() && RootIt == Uses.end() &&
"Mismatched set sizes!");
ret void
}
+; int foo(int a);
+; void bar2(int *x, int y, int z) {
+; for (int i = 0; i < 500; i += 3) {
+; foo(i+y+i*z); // Slightly reordered instruction order
+; foo(i+1+y+(i+1)*z);
+; foo(i+2+y+(i+2)*z);
+; }
+; }
+
+; Function Attrs: nounwind uwtable
+define void @bar2(i32* nocapture readnone %x, i32 %y, i32 %z) #0 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.08 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+
+ %tmp1 = add i32 %i.08, %y
+ %tmp2 = mul i32 %i.08, %z
+ %tmp3 = add i32 %tmp2, %tmp1
+ %call = tail call i32 @foo(i32 %tmp3) #1
+
+ %add = add nsw i32 %i.08, 1
+ %tmp2a = mul i32 %add, %z
+ %tmp1a = add i32 %add, %y
+ %tmp3a = add i32 %tmp2a, %tmp1a
+ %calla = tail call i32 @foo(i32 %tmp3a) #1
+
+ %add2 = add nsw i32 %i.08, 2
+ %tmp2b = mul i32 %add2, %z
+ %tmp1b = add i32 %add2, %y
+ %tmp3b = add i32 %tmp2b, %tmp1b
+ %callb = tail call i32 @foo(i32 %tmp3b) #1
+
+ %add3 = add nsw i32 %i.08, 3
+
+ %exitcond = icmp eq i32 %add3, 500
+ br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK-LABEL: @bar2
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %tmp1 = add i32 %indvar, %y
+; CHECK: %tmp2 = mul i32 %indvar, %z
+; CHECK: %tmp3 = add i32 %tmp2, %tmp1
+; CHECK: %call = tail call i32 @foo(i32 %tmp3) #1
+; CHECK: %indvar.next = add i32 %indvar, 1
+; CHECK: %exitcond1 = icmp eq i32 %indvar, 497
+; CHECK: br i1 %exitcond1, label %for.end, label %for.body
+
+; CHECK: ret
+
+for.end: ; preds = %for.body
+ ret void
+}
+
attributes #0 = { nounwind uwtable }
attributes #1 = { nounwind }