From: Silviu Baranga Date: Fri, 8 Jan 2016 11:11:04 +0000 (+0000) Subject: Re-commit r257064, this time with a fixed assert X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=2bb04c9abe5450398eec96919cb0bf529b00e7be;p=oota-llvm.git Re-commit r257064, this time with a fixed assert In setInsertionPoint if the value is not a PHI, Instruction or Argument it should be a Constant, not a ConstantExpr. Original commit message: [InstCombine] Look through PHIs, GEPs, IntToPtrs and PtrToInts to expose more constants when comparing GEPs Summary: When comparing two GEP instructions which have the same base pointer and one of them has a constant index, it is possible to only compare indices, transforming it to a compare with a constant. This removes one use for the GEP instruction with the constant index, can reduce register pressure and can sometimes lead to removing the comparisson entirely. InstCombine was already doing this when comparing two GEPs if the base pointers were the same. However, in the case where we have complex pointer arithmetic (GEPs applied to GEPs, PHIs of GEPs, conversions to or from integers, etc) the value of the original base pointer will be hidden to the optimizer and this transformation will be disabled. This change detects when the two sides of the comparison can be expressed as GEPs with the same base pointer, even if they don't appear as such in the IR. The transformation will convert all the pointer arithmetic to arithmetic done on indices and all the relevant uses of GEPs to GEPs with a common base pointer. The GEP comparison will be converted to a comparison done on indices. Reviewers: majnemer, jmolloy Subscribers: hfinkel, jevinskie, jmolloy, aadg, llvm-commits Differential Revision: http://reviews.llvm.org/D15146 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257164 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index c0786afe965..1e977e13530 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -13,6 +13,7 @@ #include "InstCombineInternal.h" #include "llvm/ADT/APSInt.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -595,6 +596,320 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC, return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset"); } +/// Returns true if we can rewrite Start as a GEP with pointer Base +/// and some integer offset. The nodes that need to be re-written +/// for this transformation will be added to Explored. +static bool canRewriteGEPAsOffset(Value *Start, Value *Base, + const DataLayout &DL, + SetVector &Explored) { + SmallVector WorkList(1, Start); + Explored.insert(Base); + + // The following traversal gives us an order which can be used + // when doing the final transformation. Since in the final + // transformation we create the PHI replacement instructions first, + // we don't have to get them in any particular order. + // + // However, for other instructions we will have to traverse the + // operands of an instruction first, which means that we have to + // do a post-order traversal. + while (!WorkList.empty()) { + SetVector PHIs; + + while (!WorkList.empty()) { + if (Explored.size() >= 100) + return false; + + Value *V = WorkList.back(); + + if (Explored.count(V) != 0) { + WorkList.pop_back(); + continue; + } + + if (!isa(V) && !isa(V) && + !isa(V) && !isa(V)) + // We've found some value that we can't explore which is different from + // the base. Therefore we can't do this transformation. + return false; + + if (isa(V) || isa(V)) { + auto *CI = dyn_cast(V); + if (!CI->isNoopCast(DL)) + return false; + + if (Explored.count(CI->getOperand(0)) == 0) + WorkList.push_back(CI->getOperand(0)); + } + + if (auto *GEP = dyn_cast(V)) { + // We're limiting the GEP to having one index. This will preserve + // the original pointer type. We could handle more cases in the + // future. + if (GEP->getNumIndices() != 1 || !GEP->isInBounds()) + return false; + + if (Explored.count(GEP->getOperand(0)) == 0) + WorkList.push_back(GEP->getOperand(0)); + } + + if (WorkList.back() == V) { + WorkList.pop_back(); + // We've finished visiting this node, mark it as such. + Explored.insert(V); + } + + if (auto *PN = dyn_cast(V)) { + Explored.insert(PN); + PHIs.insert(PN); + } + } + + // Explore the PHI nodes further. + for (auto *PN : PHIs) + for (Value *Op : PN->incoming_values()) + if (Explored.count(Op) == 0) + WorkList.push_back(Op); + } + + // Make sure that we can do this. Since we can't insert GEPs in a basic + // block before a PHI node, we can't easily do this transformation if + // we have PHI node users of transformed instructions. + for (Value *Val : Explored) { + for (Value *Use : Val->uses()) { + + auto *PHI = dyn_cast(Use); + auto *Inst = dyn_cast(Val); + + if (Inst == Base || Inst == PHI || !Inst || !PHI || + Explored.count(PHI) == 0) + continue; + + if (PHI->getParent() == Inst->getParent()) + return false; + } + } + return true; +} + +// Sets the appropriate insert point on Builder where we can add +// a replacement Instruction for V (if that is possible). +static void setInsertionPoint(IRBuilder<> &Builder, Value *V, + bool Before = true) { + if (auto *PHI = dyn_cast(V)) { + Builder.SetInsertPoint(&*PHI->getParent()->getFirstInsertionPt()); + return; + } + if (auto *I = dyn_cast(V)) { + if (!Before) + I = &*std::next(I->getIterator()); + Builder.SetInsertPoint(I); + return; + } + if (auto *A = dyn_cast(V)) { + // Set the insertion point in the entry block. + BasicBlock &Entry = A->getParent()->getEntryBlock(); + Builder.SetInsertPoint(&*Entry.getFirstInsertionPt()); + return; + } + // Otherwise, this is a constant and we don't need to set a new + // insertion point. + assert(isa(V) && "Setting insertion point for unknown value!"); +} + +/// Returns a re-written value of Start as an indexed GEP using Base as a +/// pointer. +static Value *rewriteGEPAsOffset(Value *Start, Value *Base, + const DataLayout &DL, + SetVector &Explored) { + // Perform all the substitutions. This is a bit tricky because we can + // have cycles in our use-def chains. + // 1. Create the PHI nodes without any incoming values. + // 2. Create all the other values. + // 3. Add the edges for the PHI nodes. + // 4. Emit GEPs to get the original pointers. + // 5. Remove the original instructions. + Type *IndexType = IntegerType::get( + Base->getContext(), DL.getPointerTypeSizeInBits(Start->getType())); + + DenseMap NewInsts; + NewInsts[Base] = ConstantInt::getNullValue(IndexType); + + // Create the new PHI nodes, without adding any incoming values. + for (Value *Val : Explored) { + if (Val == Base) + continue; + // Create empty phi nodes. This avoids cyclic dependencies when creating + // the remaining instructions. + if (auto *PHI = dyn_cast(Val)) + NewInsts[PHI] = PHINode::Create(IndexType, PHI->getNumIncomingValues(), + PHI->getName() + ".idx", PHI); + } + IRBuilder<> Builder(Base->getContext()); + + // Create all the other instructions. + for (Value *Val : Explored) { + + if (NewInsts.find(Val) != NewInsts.end()) + continue; + + if (auto *CI = dyn_cast(Val)) { + NewInsts[CI] = NewInsts[CI->getOperand(0)]; + continue; + } + if (auto *GEP = dyn_cast(Val)) { + Value *Index = NewInsts[GEP->getOperand(1)] + ? NewInsts[GEP->getOperand(1)] + : GEP->getOperand(1); + setInsertionPoint(Builder, GEP); + // Indices might need to be sign extended. GEPs will magically do + // this, but we need to do it ourselves here. + if (Index->getType()->getScalarSizeInBits() != + NewInsts[GEP->getOperand(0)]->getType()->getScalarSizeInBits()) { + Index = Builder.CreateSExtOrTrunc( + Index, NewInsts[GEP->getOperand(0)]->getType(), + GEP->getOperand(0)->getName() + ".sext"); + } + NewInsts[GEP] = + Builder.CreateAdd(NewInsts[GEP->getOperand(0)], Index, + GEP->getOperand(0)->getName() + ".add"); + continue; + + } + if (isa(Val)) + continue; + + llvm_unreachable("Unexpected instruction type"); + } + + // Add the incoming values to the PHI nodes. + for (Value *Val : Explored) { + if (Val == Base) + continue; + // All the instructions have been created, we can now add edges to the + // phi nodes. + if (auto *PHI = dyn_cast(Val)) { + PHINode *NewPhi = static_cast(NewInsts[PHI]); + for (unsigned I = 0, E = PHI->getNumIncomingValues(); I < E; ++I) { + Value *NewIncoming = PHI->getIncomingValue(I); + + if (NewInsts.find(NewIncoming) != NewInsts.end()) + NewIncoming = NewInsts[NewIncoming]; + + NewPhi->addIncoming(NewIncoming, PHI->getIncomingBlock(I)); + } + } + } + + // If required, create a inttoptr instruction. + Value *NewBase = Base; + setInsertionPoint(Builder, Base, false); + if (!Base->getType()->isPointerTy()) + NewBase = Builder.CreateBitOrPointerCast(Base, Start->getType(), + Start->getName() + "to.ptr"); + + for (Value *Val : Explored) { + if (Val == Base) + continue; + + // Depending on the type, for external users we have to emit + // a GEP or a GEP + ptrtoint. + if (isa(NewInsts[Val])) + setInsertionPoint(Builder, NewInsts[Val], false); + else + setInsertionPoint(Builder, NewBase, false); + + Value *GEP = + Builder.CreateInBoundsGEP(Start->getType()->getPointerElementType(), + NewBase, {NewInsts[Val]}, + Val->getName() + ".ptr"); + + if (!Val->getType()->isPointerTy()) { + Value *Cast = Builder.CreatePointerCast(GEP, Val->getType(), + Val->getName() + ".conv"); + GEP = Cast; + } + Val->replaceAllUsesWith(GEP); + } + + return NewInsts[Start]; +} + +/// Looks through GEPs, IntToPtrInsts and PtrToIntInsts in order to express +/// the input Value as a GEP constant indexed GEP. Returns a pair containing +/// the GEPs Pointer and Index. +static std::pair +getAsConstantIndexedAddress(Value *V, const DataLayout &DL) { + Type *IndexType = + IntegerType::get(V->getContext(), + DL.getPointerTypeSizeInBits(V->getType())); + + Constant *Index = ConstantInt::getNullValue(IndexType); + while (true) { + if (GEPOperator *GEP = dyn_cast(V)) { + // We accept only inbouds GEPs here to exclude the possibility of + // overflow. + if (!GEP->isInBounds()) + break; + if (GEP->hasAllConstantIndices() && GEP->getNumIndices() == 1) { + V = GEP->getOperand(0); + Constant *GEPIndex = static_cast(GEP->getOperand(1)); + Index = ConstantExpr::getAdd( + Index, ConstantExpr::getSExtOrBitCast(GEPIndex, IndexType)); + continue; + } + break; + } + if (auto *CI = dyn_cast(V)) { + if (!CI->isNoopCast(DL)) + break; + V = CI->getOperand(0); + continue; + } + if (auto *CI = dyn_cast(V)) { + if (!CI->isNoopCast(DL)) + break; + V = CI->getOperand(0); + continue; + } + break; + } + return {V, Index}; +} + +// Converts (CMP GEPLHS, RHS) if this change would make RHS a constant. +// We can look through PHIs, GEPs and casts in order to determine a +// common base between GEPLHS and RHS. +static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS, + ICmpInst::Predicate Cond, + const DataLayout &DL) { + if (!GEPLHS->hasAllConstantIndices()) + return nullptr; + + Value *PtrBase, *Index; + std::tie(PtrBase, Index) = getAsConstantIndexedAddress(GEPLHS, DL); + + // The set of nodes that will take part in this transformation. + SetVector Nodes; + + if (!canRewriteGEPAsOffset(RHS, PtrBase, DL, Nodes)) + return nullptr; + + // We know we can re-write this as + // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) + // Since we've only looked through inbouds GEPs we know that we + // can't have overflow on either side. We can therefore re-write + // this as: + // OFFSET1 cmp OFFSET2 + Value *NewRHS = rewriteGEPAsOffset(RHS, PtrBase, DL, Nodes); + + // RewriteGEPAsOffset has replaced RHS and all of its uses with a re-written + // GEP having PtrBase as the pointer base, and has returned in NewRHS the + // offset. Since Index is the offset of LHS to the base pointer, we will now + // compare the offsets instead of comparing the pointers. + return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Index, NewRHS); +} + /// FoldGEPICmp - Fold comparisons between a GEP instruction and something /// else. At this point we know that the GEP is on the LHS of the comparison. Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, @@ -674,8 +989,9 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, } // Otherwise, the base pointers are different and the indices are - // different, bail out. - return nullptr; + // different. Try convert this to an indexed compare by looking through + // PHIs/casts. + return transformToIndexedCompare(GEPLHS, RHS, Cond, DL); } // If one of the GEPs has all zero indices, recurse. @@ -727,7 +1043,10 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R); } } - return nullptr; + + // Try convert this to an indexed compare by looking through PHIs/casts as a + // last resort. + return transformToIndexedCompare(GEPLHS, RHS, Cond, DL); } Instruction *InstCombiner::FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca, diff --git a/test/Transforms/InstCombine/indexed-gep-compares.ll b/test/Transforms/InstCombine/indexed-gep-compares.ll new file mode 100644 index 00000000000..fab0d309e4a --- /dev/null +++ b/test/Transforms/InstCombine/indexed-gep-compares.ll @@ -0,0 +1,100 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64" + +define i32 *@test1(i32* %A, i32 %Offset) { +entry: + %tmp = getelementptr inbounds i32, i32* %A, i32 %Offset + br label %bb + +bb: + %RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %entry ] + %LHS = getelementptr inbounds i32, i32* %A, i32 100 + %RHS.next = getelementptr inbounds i32, i32* %RHS, i64 1 + %cond = icmp ult i32 * %LHS, %RHS + br i1 %cond, label %bb2, label %bb + +bb2: + ret i32* %RHS + +; CHECK-LABEL: @test1( +; CHECK: %[[INDEX:[0-9A-Za-z.]+]] = phi i32 [ %[[ADD:[0-9A-Za-z.]+]], %bb ], [ %Offset, %entry ] +; CHECK: %[[ADD]] = add i32 %[[INDEX]], 1 +; CHECK: %cond = icmp sgt i32 %[[INDEX]], 100 +; CHECK: br i1 %cond, label %bb2, label %bb +; CHECK: %[[PTR:[0-9A-Za-z.]+]] = getelementptr inbounds i32, i32* %A, i32 %[[INDEX]] +; CHECK: ret i32* %[[PTR]] +} + +define i32 *@test2(i32 %A, i32 %Offset) { +entry: + %A.ptr = inttoptr i32 %A to i32* + %tmp = getelementptr inbounds i32, i32* %A.ptr, i32 %Offset + br label %bb + +bb: + %RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %entry ] + %LHS = getelementptr inbounds i32, i32* %A.ptr, i32 100 + %RHS.next = getelementptr inbounds i32, i32* %RHS, i64 1 + %cmp0 = ptrtoint i32 *%LHS to i32 + %cmp1 = ptrtoint i32 *%RHS to i32 + %cond = icmp ult i32 %cmp0, %cmp1 + br i1 %cond, label %bb2, label %bb + +bb2: + ret i32* %RHS + +; CHECK-LABEL: @test2( +; CHECK: %[[TOPTR:[0-9A-Za-z.]+]] = inttoptr i32 %[[ADD:[0-9A-Za-z.]+]] to i32* +; CHECK: %[[INDEX:[0-9A-Za-z.]+]] = phi i32 [ %[[ADD:[0-9A-Za-z.]+]], %bb ], [ %Offset, %entry ] +; CHECK: %[[ADD]] = add i32 %[[INDEX]], 1 +; CHECK: %cond = icmp sgt i32 %[[INDEX]], 100 +; CHECK: br i1 %cond, label %bb2, label %bb +; CHECK: %[[PTR:[0-9A-Za-z.]+]] = getelementptr inbounds i32, i32* %[[TOPTR]], i32 %[[INDEX]] +; CHECK: ret i32* %[[PTR]] +} + +; Perform the transformation only if we know that the GEPs used are inbounds. +define i32 *@test3(i32* %A, i32 %Offset) { +entry: + %tmp = getelementptr i32, i32* %A, i32 %Offset + br label %bb + +bb: + %RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %entry ] + %LHS = getelementptr i32, i32* %A, i32 100 + %RHS.next = getelementptr i32, i32* %RHS, i64 1 + %cond = icmp ult i32 * %LHS, %RHS + br i1 %cond, label %bb2, label %bb + +bb2: + ret i32* %RHS + +; CHECK-LABEL: @test3( +; CHECK-NOT: %cond = icmp sgt i32 %{{[0-9A-Za-z.]+}}, 100 +} + +; An inttoptr that requires an extension or truncation will be opaque when determining +; the base pointer. In this case we can still perform the transformation by considering +; A.ptr as being the base pointer. +define i32 *@test4(i16 %A, i32 %Offset) { +entry: + %A.ptr = inttoptr i16 %A to i32* + %tmp = getelementptr inbounds i32, i32* %A.ptr, i32 %Offset + br label %bb + +bb: + %RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %entry ] + %LHS = getelementptr inbounds i32, i32* %A.ptr, i32 100 + %RHS.next = getelementptr inbounds i32, i32* %RHS, i64 1 + %cmp0 = ptrtoint i32 *%LHS to i32 + %cmp1 = ptrtoint i32 *%RHS to i32 + %cond = icmp ult i32 %cmp0, %cmp1 + br i1 %cond, label %bb2, label %bb + +bb2: + ret i32* %RHS + +; CHECK-LABEL: @test4( +; CHECK: %cond = icmp sgt i32 %{{[0-9A-Za-z.]+}}, 100 +}