From 70c52f7d6cb88b275d7ef235745dc61d292e8abe Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 7 Oct 2015 00:20:07 +0000 Subject: [PATCH] InstCombine: Fold comparisons between unguessable allocas and other pointers This will allow us to optimize code such as: int f(int *p) { int x; return p == &x; } as well as: int *allocate(void); int f() { int x; int *p = allocate(); return p == &x; } The folding can only be done under certain circumstances. Even though p and &x cannot alias, the comparison must still return true if the pointer representations are equal. If a user successfully generates a p that's a correct guess for &x, comparison should return true even though p is an invalid pointer. This patch argues that if the address of the alloca isn't observable outside the function, the function can act as-if the address is impossible to guess from the outside. The tricky part is keeping the act consistent: if we fold p == &x to false in one place, we must make sure to fold any other comparisons based on those pointers similarly. To ensure that, we only fold when &x is involved exactly once in comparison instructions. Differential Revision: http://reviews.llvm.org/D13358 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@249490 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineCompares.cpp | 88 +++++++++++++++++ .../InstCombine/InstCombineInternal.h | 1 + test/Transforms/InstCombine/compare-alloca.ll | 97 +++++++++++++++++++ 3 files changed, 186 insertions(+) create mode 100644 test/Transforms/InstCombine/compare-alloca.ll diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 3afb5ed6d8a..a333c3e18e5 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -730,6 +730,83 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, return nullptr; } +Instruction *InstCombiner::FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca, + Value *Other) { + assert(ICI.isEquality() && "Cannot fold non-equality comparison."); + + // It would be tempting to fold away comparisons between allocas and any + // pointer not based on that alloca (e.g. an argument). However, even + // though such pointers cannot alias, they can still compare equal. + // + // But LLVM doesn't specify where allocas get their memory, so if the alloca + // doesn't escape we can argue that it's impossible to guess its value, and we + // can therefore act as if any such guesses are wrong. + // + // The code below checks that the alloca doesn't escape, and that it's only + // used in a comparison once (the current instruction). The + // single-comparison-use condition ensures that we're trivially folding all + // comparisons against the alloca consistently, and avoids the risk of + // erroneously folding a comparison of the pointer with itself. + + unsigned MaxIter = 32; // Break cycles and bound to constant-time. + + SmallVector Worklist; + for (Use &U : Alloca->uses()) { + if (Worklist.size() >= MaxIter) + return nullptr; + Worklist.push_back(&U); + } + + unsigned NumCmps = 0; + while (!Worklist.empty()) { + assert(Worklist.size() <= MaxIter); + Use *U = Worklist.pop_back_val(); + Value *V = U->getUser(); + --MaxIter; + + if (isa(V) || isa(V) || isa(V) || + isa(V)) { + // Track the uses. + } else if (isa(V)) { + // Loading from the pointer doesn't escape it. + continue; + } else if (auto *SI = dyn_cast(V)) { + // Storing *to* the pointer is fine, but storing the pointer escapes it. + if (SI->getValueOperand() == U->get()) + return nullptr; + continue; + } else if (isa(V)) { + if (NumCmps++) + return nullptr; // Found more than one cmp. + continue; + } else if (auto *Intrin = dyn_cast(V)) { + switch (Intrin->getIntrinsicID()) { + // These intrinsics don't escape or compare the pointer. Memset is safe + // because we don't allow ptrtoint. Memcpy and memmove are safe because + // we don't allow stores, so src cannot point to V. + case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + case Intrinsic::dbg_declare: case Intrinsic::dbg_value: + case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: + continue; + default: + return nullptr; + } + } else { + return nullptr; + } + for (Use &U : V->uses()) { + if (Worklist.size() >= MaxIter) + return nullptr; + Worklist.push_back(&U); + } + } + + Type *CmpTy = CmpInst::makeCmpResultType(Other->getType()); + return ReplaceInstUsesWith( + ICI, + ConstantInt::get(CmpTy, !CmpInst::isTrueWhenEqual(ICI.getPredicate()))); +} + /// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X". Instruction *InstCombiner::FoldICmpAddOpCst(Instruction &ICI, Value *X, ConstantInt *CI, @@ -3211,6 +3288,17 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { ICmpInst::getSwappedPredicate(I.getPredicate()), I)) return NI; + // Try to optimize equality comparisons against alloca-based pointers. + if (Op0->getType()->isPointerTy() && I.isEquality()) { + assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?"); + if (auto *Alloca = dyn_cast(GetUnderlyingObject(Op0, DL))) + if (Instruction *New = FoldAllocaCmp(I, Alloca, Op1)) + return New; + if (auto *Alloca = dyn_cast(GetUnderlyingObject(Op1, DL))) + if (Instruction *New = FoldAllocaCmp(I, Alloca, Op0)) + return New; + } + // Test to see if the operands of the icmp are casted versions of other // values. If the ptr->ptr cast can be stripped off both arguments, we do so // now. diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index 9e58c7428bd..79cb5f25dc6 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -281,6 +281,7 @@ public: ICmpInst::Predicate Pred); Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, Instruction &I); + Instruction *FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca, Value *Other); Instruction *FoldShiftByConstant(Value *Op0, Constant *Op1, BinaryOperator &I); Instruction *commonCastTransforms(CastInst &CI); diff --git a/test/Transforms/InstCombine/compare-alloca.ll b/test/Transforms/InstCombine/compare-alloca.ll new file mode 100644 index 00000000000..ca24da19177 --- /dev/null +++ b/test/Transforms/InstCombine/compare-alloca.ll @@ -0,0 +1,97 @@ +; RUN: opt -instcombine -S %s | FileCheck %s +target datalayout = "p:32:32" + + +define i1 @alloca_argument_compare(i64* %arg) { + %alloc = alloca i64 + %cmp = icmp eq i64* %arg, %alloc + ret i1 %cmp + ; CHECK-LABEL: alloca_argument_compare + ; CHECK: ret i1 false +} + +define i1 @alloca_argument_compare_swapped(i64* %arg) { + %alloc = alloca i64 + %cmp = icmp eq i64* %alloc, %arg + ret i1 %cmp + ; CHECK-LABEL: alloca_argument_compare_swapped + ; CHECK: ret i1 false +} + +define i1 @alloca_argument_compare_ne(i64* %arg) { + %alloc = alloca i64 + %cmp = icmp ne i64* %arg, %alloc + ret i1 %cmp + ; CHECK-LABEL: alloca_argument_compare_ne + ; CHECK: ret i1 true +} + +define i1 @alloca_argument_compare_derived_ptrs(i64* %arg, i64 %x) { + %alloc = alloca i64, i64 8 + %p = getelementptr i64, i64* %arg, i64 %x + %q = getelementptr i64, i64* %alloc, i64 3 + %cmp = icmp eq i64* %p, %q + ret i1 %cmp + ; CHECK-LABEL: alloca_argument_compare_derived_ptrs + ; CHECK: ret i1 false +} + +declare void @escape(i64*) +define i1 @alloca_argument_compare_escaped_alloca(i64* %arg) { + %alloc = alloca i64 + call void @escape(i64* %alloc) + %cmp = icmp eq i64* %alloc, %arg + ret i1 %cmp + ; CHECK-LABEL: alloca_argument_compare_escaped_alloca + ; CHECK: %cmp = icmp eq i64* %alloc, %arg + ; CHECK: ret i1 %cmp +} + +declare void @check_compares(i1, i1) +define void @alloca_argument_compare_two_compares(i64* %p) { + %q = alloca i64, i64 8 + %r = getelementptr i64, i64* %p, i64 1 + %s = getelementptr i64, i64* %q, i64 2 + %cmp1 = icmp eq i64* %p, %q + %cmp2 = icmp eq i64* %r, %s + call void @check_compares(i1 %cmp1, i1 %cmp2) + ret void + ; We will only fold if there is a single cmp. + ; CHECK-LABEL: alloca_argument_compare_two_compares + ; CHECK: call void @check_compares(i1 %cmp1, i1 %cmp2) +} + +define i1 @alloca_argument_compare_escaped_through_store(i64* %arg, i64** %ptr) { + %alloc = alloca i64 + %cmp = icmp eq i64* %alloc, %arg + %p = getelementptr i64, i64* %alloc, i64 1 + store i64* %p, i64** %ptr + ret i1 %cmp + ; CHECK-LABEL: alloca_argument_compare_escaped_through_store + ; CHECK: %cmp = icmp eq i64* %alloc, %arg + ; CHECK: ret i1 %cmp +} + +declare void @llvm.lifetime.start(i64, i8* nocapture) +declare void @llvm.lifetime.end(i64, i8* nocapture) +define i1 @alloca_argument_compare_benign_instrs(i8* %arg) { + %alloc = alloca i8 + call void @llvm.lifetime.start(i64 1, i8* %alloc) + %cmp = icmp eq i8* %arg, %alloc + %x = load i8, i8* %arg + store i8 %x, i8* %alloc + call void @llvm.lifetime.end(i64 1, i8* %alloc) + ret i1 %cmp + ; CHECK-LABEL: alloca_argument_compare_benign_instrs + ; CHECK: ret i1 false +} + +declare i64* @allocator() +define i1 @alloca_call_compare() { + %p = alloca i64 + %q = call i64* @allocator() + %cmp = icmp eq i64* %p, %q + ret i1 %cmp + ; CHECK-LABEL: alloca_call_compare + ; CHECK: ret i1 false +} -- 2.34.1