From 1ad3dfdf95a5f4ada4f178ce6cb81d5ae3fb42d3 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Thu, 15 Oct 2015 16:51:00 +0000
Subject: [PATCH] Revert 250343 and 250344

Turns out this approach is buggy.  In discussion about follow on work, Sanjoy pointed out that we could be subject to circular logic problems.

Consider:
 if (i u< L) leave()
 if ((i + 1) u< L) leave()
 print(a[i] + a[i+1])

If we know that L is less than UINT_MAX, we could possible prove (in a control dependent way) that i + 1 does not overflow.  This gives us:
 if (i u< L) leave()
 if ((i +nuw 1) u< L) leave()
 print(a[i] + a[i+1])

If we now do the transform this patch proposed, we end up with:
 if ((i +nuw 1) u< L) leave_appropriately()
 print(a[i] + a[i+1])

That would be a miscompile when i==-1.  The problem here is that the control dependent nuw bits got used to prove something about the first condition.  That's obviously invalid.

This won't happen today, but since I plan to enhance LVI/CVP with exactly that transform at some point in the not too distant future...




git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@250430 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SimplifyCFG.cpp          | 117 ------------------
 .../SimplifyCFG/fast-fallthrough.ll           |  86 -------------
 2 files changed, 203 deletions(-)
 delete mode 100644 test/Transforms/SimplifyCFG/fast-fallthrough.ll
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index dc499ce78b8..e98f3349eff 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -73,17 +73,6 @@ static cl::opt<bool> HoistCondStores(
     "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
     cl::desc("Hoist conditional stores if an unconditional store precedes"));
 
-static cl::opt<unsigned> SpeculativeFlattenBias(
-    "speculative-flatten-bias", cl::Hidden, cl::init(100),
-    cl::desc("Control how biased a branch needs to be to be considered rarely"
-             " failing for speculative flattening (default = 100)"));
-
-static cl::opt<unsigned> SpeculativeFlattenThreshold(
-    "speculative-flatten-threshold", cl::Hidden, cl::init(10),
-    cl::desc("Control how much speculation happens due to speculative"
-             " flattening (default = 10)"));
-
-
 STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
 STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping");
 STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
@@ -2343,109 +2332,6 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
   return false;
 }
 
-
-/// Return true if B is known to be implied by A.  A & B must be i1 (boolean)
-static bool implies(Value *A, Value *B, const DataLayout &DL) {
-  assert(A->getType()->isIntegerTy(1) && B->getType()->isIntegerTy(1));
-  // Note that the truth table for implication is the same as <=u on i1
-  // values. 
-  Value *Simplified = SimplifyICmpInst(ICmpInst::ICMP_ULE, A, B, DL);
-  Constant *Con = dyn_cast_or_null<Constant>(Simplified);
-  return Con && Con->isOneValue();
-}
-
-/// If we have a series of tests leading to a frequently executed fallthrough
-/// path and we can test all the conditions at once, we can execute a single
-/// test on the fast path and figure out which condition failed on the slow
-/// path.  This transformation considers a pair of branches at a time since
-/// recursively considering branches pairwise will cause an entire chain to
-/// collapse.  This transformation is code size neutral, but makes it
-/// dynamically more expensive to fail either check.  As such, we only want to
-/// do this if both checks are expected to essentially never fail.
-/// The key motivating examples are cases like:
-///   br (i < Length), in_bounds, fail1
-/// in_bounds:
-///   br (i+1 < Length), in_bounds2, fail2
-/// in_bounds2:
-///   ...
-///
-/// We can rewrite this as:
-///   br (i+1 < Length), in_bounds2, dispatch
-/// in_bounds2:
-///   ...
-/// dispatch:
-///   br (i < Length), fail2, fail1
-///
-/// TODO: we could consider duplicating some (non-speculatable) instructions
-/// from BI->getParent() down both paths
-static bool SpeculativelyFlattenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
-                                                       const DataLayout &DL) {
-  auto *PredBB = PBI->getParent();
-  auto *BB = BI->getParent();
-
-  /// Is the failing path of this branch taken rarely if at all?
-  auto isRarelyUntaken = [](BranchInst *BI) {
-    uint64_t ProbTrue;
-    uint64_t ProbFalse;
-    if (!ExtractBranchMetadata(BI, ProbTrue, ProbFalse))
-      return false;
-    return ProbTrue > ProbFalse * SpeculativeFlattenBias;
-  };
-
-  if (PBI->getSuccessor(0) != BB ||
-      !isRarelyUntaken(PBI) || !isRarelyUntaken(BI) ||
-      !implies(BI->getCondition(), PBI->getCondition(), DL))
-    return false;
-
-  // TODO: The following code performs a similiar, but slightly distinct
-  // transformation to that done by SpeculativelyExecuteBB.  We should consider
-  // combining them at some point.
-
-  // Can we speculate everything in the given block (except for the terminator
-  // instruction) at the instruction boundary before 'At'?
-  unsigned SpeculationCost = 0;
-  for (Instruction &I : *BB) {
-    if (isa<TerminatorInst>(I)) break;
-    if (!isSafeToSpeculativelyExecute(&I, PBI))
-      return false;
-    SpeculationCost++;
-    // Only flatten relatively small BBs to avoid making the bad case terrible
-    if (SpeculationCost > SpeculativeFlattenThreshold || isa<CallInst>(I))
-      return false;
-  }
-
-  DEBUG(dbgs() << "Outlining slow path comparison: "
-        << *PBI->getCondition() << " implied by "
-        << *BI->getCondition() << "\n");
-  // See the example in the function comment.
-  Value *WhichCond = PBI->getCondition();
-  auto *Success = BI->getSuccessor(0);
-  auto *FailPBI = PBI->getSuccessor(1);
-  auto *FailBI =  BI->getSuccessor(1);
-  // Have PBI branch directly to the fast path using BI's condition, branch
-  // to this BI's block for the slow path dispatch
-  PBI->setSuccessor(0, Success);
-  PBI->setSuccessor(1, BB);
-  PBI->setCondition(BI->getCondition());
-  // Rewrite BI to distinguish between the two failing cases
-  BI->setSuccessor(0, FailBI);
-  BI->setSuccessor(1, FailPBI);
-  BI->setCondition(WhichCond);
-  // Move all of the instructions from BI->getParent other than
-  // the terminator into the fastpath.  This requires speculating them past
-  // the original PBI branch, but we checked for that legality above.
-  // TODO: This doesn't handle dependent loads.  We could duplicate those
-  // down both paths, but that involves further code growth.  We need to
-  // figure out a good cost model here.
-  PredBB->getInstList().splice(PBI, BB->getInstList(),
-                               BB->begin(), std::prev(BB->end()));
-
-  // To be conservatively correct, drop all metadata on the rewritten
-  // branches.  TODO: update metadata
-  PBI->dropUnknownNonDebugMetadata();
-  BI->dropUnknownNonDebugMetadata();
-  return true;
-}
 /// If we have a conditional branch as a predecessor of another block,
 /// this function tries to simplify it.  We know
 /// that PBI and BI are both conditional branches, and BI is in one of the
@@ -2504,9 +2390,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
     if (CE->canTrap())
       return false;
 
-  if (SpeculativelyFlattenCondBranchToCondBranch(PBI, BI, DL))
-    return true;
-
   // If this is a conditional branch in an empty block, and if any
   // predecessors are a conditional branch to one of our destinations,
   // fold the conditions into logical ops and one cond br.
diff --git a/test/Transforms/SimplifyCFG/fast-fallthrough.ll b/test/Transforms/SimplifyCFG/fast-fallthrough.ll
deleted file mode 100644
index 7c5dbe2b0ad..00000000000
--- a/test/Transforms/SimplifyCFG/fast-fallthrough.ll
+++ /dev/null
@@ -1,86 +0,0 @@
-; RUN: opt -S %s -simplifycfg | FileCheck %s
-
-define void @test(i32 %length.i, i32 %i) {
-; CHECK-LABEL: @test
-  %iplus1 = add nsw i32 %i, 1
-  %var29 = icmp slt i32 %i, %length.i
-  %var30 = icmp slt i32 %iplus1, %length.i
-; CHECK: br i1 %var30, label %in_bounds, label %next
-  br i1 %var29, label %next, label %out_of_bounds, !prof !{!"branch_weights", i32 1000, i32 0}
-
-next:
-; CHECK-LABEL: next:
-; CHECK: br i1 %var29, label %out_of_bounds2, label %out_of_bounds
-  br i1 %var30, label %in_bounds, label %out_of_bounds2, !prof !{!"branch_weights", i32 1000, i32 0}
-
-in_bounds:
-  ret void
-
-out_of_bounds:
-  call void @foo(i64 0)
-  unreachable
-
-out_of_bounds2:
-  call void @foo(i64 1)
-  unreachable
-}
-
-define void @test2(i32 %length.i, i32 %i) {
-; CHECK-LABEL: @test2
-  %var29 = icmp slt i32 %i, %length.i
-; CHECK: br i1 %var30, label %in_bounds, label %next
-  br i1 %var29, label %next, label %out_of_bounds, !prof !{!"branch_weights", i32 1000, i32 0}
-
-next:
-; CHECK-LABEL: next:
-; CHECK: br i1 %var29, label %out_of_bounds2, label %out_of_bounds
-  %iplus1 = add nsw i32 %i, 1
-  %var30 = icmp slt i32 %iplus1, %length.i
-  br i1 %var30, label %in_bounds, label %out_of_bounds2, !prof !{!"branch_weights", i32 1000, i32 0}
-
-in_bounds:
-  ret void
-
-out_of_bounds:
-  call void @foo(i64 0)
-  unreachable
-
-out_of_bounds2:
-  call void @foo(i64 1)
-  unreachable
-}
-
-; As written, this one can't trigger today.  It would require us to duplicate
-; the %val1 load down two paths and that's not implemented yet.
-define i64 @test3(i32 %length.i, i32 %i, i64* %base) {
-; CHECK-LABEL: @test3
-  %var29 = icmp slt i32 %i, %length.i
-; CHECK: br i1 %var29, label %next, label %out_of_bounds
-  br i1 %var29, label %next, label %out_of_bounds, !prof !{!"branch_weights", i32 1000, i32 0}
-
-next:
-; CHECK-LABEL: next:
-  %addr1 = getelementptr i64, i64* %base, i32 %i
-  %val1 = load i64, i64* %addr1
-  %iplus1 = add nsw i32 %i, 1
-  %var30 = icmp slt i32 %iplus1, %length.i
-; CHECK: br i1 %var30, label %in_bounds, label %out_of_bounds2
-  br i1 %var30, label %in_bounds, label %out_of_bounds2, !prof !{!"branch_weights", i32 1000, i32 0}
-
-in_bounds:
-  %addr2 = getelementptr i64, i64* %base, i32 %iplus1
-  %val2 = load i64, i64* %addr2
-  %res = sub i64 %val1, %val2
-  ret i64 %res
-
-out_of_bounds:
-  call void @foo(i64 0)
-  unreachable
-
-out_of_bounds2:
-  call void @foo(i64 %val1)
-  unreachable
-}
-
-declare void @foo(i64)
-
-- 
2.34.1