From 1ad3dfdf95a5f4ada4f178ce6cb81d5ae3fb42d3 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 15 Oct 2015 16:51:00 +0000 Subject: [PATCH] Revert 250343 and 250344 Turns out this approach is buggy. In discussion about follow on work, Sanjoy pointed out that we could be subject to circular logic problems. Consider: if (i u< L) leave() if ((i + 1) u< L) leave() print(a[i] + a[i+1]) If we know that L is less than UINT_MAX, we could possible prove (in a control dependent way) that i + 1 does not overflow. This gives us: if (i u< L) leave() if ((i +nuw 1) u< L) leave() print(a[i] + a[i+1]) If we now do the transform this patch proposed, we end up with: if ((i +nuw 1) u< L) leave_appropriately() print(a[i] + a[i+1]) That would be a miscompile when i==-1. The problem here is that the control dependent nuw bits got used to prove something about the first condition. That's obviously invalid. This won't happen today, but since I plan to enhance LVI/CVP with exactly that transform at some point in the not too distant future... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@250430 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 117 ------------------ .../SimplifyCFG/fast-fallthrough.ll | 86 ------------- 2 files changed, 203 deletions(-) delete mode 100644 test/Transforms/SimplifyCFG/fast-fallthrough.ll diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index dc499ce78b8..e98f3349eff 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -73,17 +73,6 @@ static cl::opt HoistCondStores( "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes")); -static cl::opt SpeculativeFlattenBias( - "speculative-flatten-bias", cl::Hidden, cl::init(100), - cl::desc("Control how biased a branch needs to be to be considered rarely" - " failing for speculative flattening (default = 100)")); - -static cl::opt SpeculativeFlattenThreshold( - "speculative-flatten-threshold", cl::Hidden, cl::init(10), - cl::desc("Control how much speculation happens due to speculative" - " flattening (default = 10)")); - - STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); @@ -2343,109 +2332,6 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) { return false; } - -/// Return true if B is known to be implied by A. A & B must be i1 (boolean) -static bool implies(Value *A, Value *B, const DataLayout &DL) { - assert(A->getType()->isIntegerTy(1) && B->getType()->isIntegerTy(1)); - // Note that the truth table for implication is the same as <=u on i1 - // values. - Value *Simplified = SimplifyICmpInst(ICmpInst::ICMP_ULE, A, B, DL); - Constant *Con = dyn_cast_or_null(Simplified); - return Con && Con->isOneValue(); -} - -/// If we have a series of tests leading to a frequently executed fallthrough -/// path and we can test all the conditions at once, we can execute a single -/// test on the fast path and figure out which condition failed on the slow -/// path. This transformation considers a pair of branches at a time since -/// recursively considering branches pairwise will cause an entire chain to -/// collapse. This transformation is code size neutral, but makes it -/// dynamically more expensive to fail either check. As such, we only want to -/// do this if both checks are expected to essentially never fail. -/// The key motivating examples are cases like: -/// br (i < Length), in_bounds, fail1 -/// in_bounds: -/// br (i+1 < Length), in_bounds2, fail2 -/// in_bounds2: -/// ... -/// -/// We can rewrite this as: -/// br (i+1 < Length), in_bounds2, dispatch -/// in_bounds2: -/// ... -/// dispatch: -/// br (i < Length), fail2, fail1 -/// -/// TODO: we could consider duplicating some (non-speculatable) instructions -/// from BI->getParent() down both paths -static bool SpeculativelyFlattenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, - const DataLayout &DL) { - auto *PredBB = PBI->getParent(); - auto *BB = BI->getParent(); - - /// Is the failing path of this branch taken rarely if at all? - auto isRarelyUntaken = [](BranchInst *BI) { - uint64_t ProbTrue; - uint64_t ProbFalse; - if (!ExtractBranchMetadata(BI, ProbTrue, ProbFalse)) - return false; - return ProbTrue > ProbFalse * SpeculativeFlattenBias; - }; - - if (PBI->getSuccessor(0) != BB || - !isRarelyUntaken(PBI) || !isRarelyUntaken(BI) || - !implies(BI->getCondition(), PBI->getCondition(), DL)) - return false; - - // TODO: The following code performs a similiar, but slightly distinct - // transformation to that done by SpeculativelyExecuteBB. We should consider - // combining them at some point. - - // Can we speculate everything in the given block (except for the terminator - // instruction) at the instruction boundary before 'At'? - unsigned SpeculationCost = 0; - for (Instruction &I : *BB) { - if (isa(I)) break; - if (!isSafeToSpeculativelyExecute(&I, PBI)) - return false; - SpeculationCost++; - // Only flatten relatively small BBs to avoid making the bad case terrible - if (SpeculationCost > SpeculativeFlattenThreshold || isa(I)) - return false; - } - - DEBUG(dbgs() << "Outlining slow path comparison: " - << *PBI->getCondition() << " implied by " - << *BI->getCondition() << "\n"); - // See the example in the function comment. - Value *WhichCond = PBI->getCondition(); - auto *Success = BI->getSuccessor(0); - auto *FailPBI = PBI->getSuccessor(1); - auto *FailBI = BI->getSuccessor(1); - // Have PBI branch directly to the fast path using BI's condition, branch - // to this BI's block for the slow path dispatch - PBI->setSuccessor(0, Success); - PBI->setSuccessor(1, BB); - PBI->setCondition(BI->getCondition()); - // Rewrite BI to distinguish between the two failing cases - BI->setSuccessor(0, FailBI); - BI->setSuccessor(1, FailPBI); - BI->setCondition(WhichCond); - // Move all of the instructions from BI->getParent other than - // the terminator into the fastpath. This requires speculating them past - // the original PBI branch, but we checked for that legality above. - // TODO: This doesn't handle dependent loads. We could duplicate those - // down both paths, but that involves further code growth. We need to - // figure out a good cost model here. - PredBB->getInstList().splice(PBI, BB->getInstList(), - BB->begin(), std::prev(BB->end())); - - // To be conservatively correct, drop all metadata on the rewritten - // branches. TODO: update metadata - PBI->dropUnknownNonDebugMetadata(); - BI->dropUnknownNonDebugMetadata(); - return true; -} /// If we have a conditional branch as a predecessor of another block, /// this function tries to simplify it. We know /// that PBI and BI are both conditional branches, and BI is in one of the @@ -2504,9 +2390,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, if (CE->canTrap()) return false; - if (SpeculativelyFlattenCondBranchToCondBranch(PBI, BI, DL)) - return true; - // If this is a conditional branch in an empty block, and if any // predecessors are a conditional branch to one of our destinations, // fold the conditions into logical ops and one cond br. diff --git a/test/Transforms/SimplifyCFG/fast-fallthrough.ll b/test/Transforms/SimplifyCFG/fast-fallthrough.ll deleted file mode 100644 index 7c5dbe2b0ad..00000000000 --- a/test/Transforms/SimplifyCFG/fast-fallthrough.ll +++ /dev/null @@ -1,86 +0,0 @@ -; RUN: opt -S %s -simplifycfg | FileCheck %s - -define void @test(i32 %length.i, i32 %i) { -; CHECK-LABEL: @test - %iplus1 = add nsw i32 %i, 1 - %var29 = icmp slt i32 %i, %length.i - %var30 = icmp slt i32 %iplus1, %length.i -; CHECK: br i1 %var30, label %in_bounds, label %next - br i1 %var29, label %next, label %out_of_bounds, !prof !{!"branch_weights", i32 1000, i32 0} - -next: -; CHECK-LABEL: next: -; CHECK: br i1 %var29, label %out_of_bounds2, label %out_of_bounds - br i1 %var30, label %in_bounds, label %out_of_bounds2, !prof !{!"branch_weights", i32 1000, i32 0} - -in_bounds: - ret void - -out_of_bounds: - call void @foo(i64 0) - unreachable - -out_of_bounds2: - call void @foo(i64 1) - unreachable -} - -define void @test2(i32 %length.i, i32 %i) { -; CHECK-LABEL: @test2 - %var29 = icmp slt i32 %i, %length.i -; CHECK: br i1 %var30, label %in_bounds, label %next - br i1 %var29, label %next, label %out_of_bounds, !prof !{!"branch_weights", i32 1000, i32 0} - -next: -; CHECK-LABEL: next: -; CHECK: br i1 %var29, label %out_of_bounds2, label %out_of_bounds - %iplus1 = add nsw i32 %i, 1 - %var30 = icmp slt i32 %iplus1, %length.i - br i1 %var30, label %in_bounds, label %out_of_bounds2, !prof !{!"branch_weights", i32 1000, i32 0} - -in_bounds: - ret void - -out_of_bounds: - call void @foo(i64 0) - unreachable - -out_of_bounds2: - call void @foo(i64 1) - unreachable -} - -; As written, this one can't trigger today. It would require us to duplicate -; the %val1 load down two paths and that's not implemented yet. -define i64 @test3(i32 %length.i, i32 %i, i64* %base) { -; CHECK-LABEL: @test3 - %var29 = icmp slt i32 %i, %length.i -; CHECK: br i1 %var29, label %next, label %out_of_bounds - br i1 %var29, label %next, label %out_of_bounds, !prof !{!"branch_weights", i32 1000, i32 0} - -next: -; CHECK-LABEL: next: - %addr1 = getelementptr i64, i64* %base, i32 %i - %val1 = load i64, i64* %addr1 - %iplus1 = add nsw i32 %i, 1 - %var30 = icmp slt i32 %iplus1, %length.i -; CHECK: br i1 %var30, label %in_bounds, label %out_of_bounds2 - br i1 %var30, label %in_bounds, label %out_of_bounds2, !prof !{!"branch_weights", i32 1000, i32 0} - -in_bounds: - %addr2 = getelementptr i64, i64* %base, i32 %iplus1 - %val2 = load i64, i64* %addr2 - %res = sub i64 %val1, %val2 - ret i64 %res - -out_of_bounds: - call void @foo(i64 0) - unreachable - -out_of_bounds2: - call void @foo(i64 %val1) - unreachable -} - -declare void @foo(i64) - -- 2.34.1