From 7d4d116067f9569c2969a4a3e22f7bb4bde4b7e5 Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Tue, 28 Jul 2015 18:22:40 +0000 Subject: [PATCH] [SCEV] Apply NSW and NUW flags via poison value analysis Summary: Make Scalar Evolution able to propagate NSW and NUW flags from instructions to SCEVs in some cases. This is based on reasoning about when poison from instructions with these flags would trigger undefined behavior. This gives a 13% speed-up on some Eigen3-based Google-internal microbenchmarks for NVPTX. There does not seem to be clear agreement about when poison should be considered to propagate through instructions. In this analysis, poison propagates only in cases where that should be uncontroversial. This change makes LSR able to create induction variables for expressions like &ptr[i + offset] for loops like this: for (int i = 0; i < limit; ++i) { sum += ptr[i + offset]; } Here ptr is a 64 bit pointer and offset is a 32 bit integer. For NVPTX, LSR currently creates an induction variable for i + offset instead, which is not as fast. Improving this situation is what brings the 13% speed-up on some Eigen3-based Google-internal microbenchmarks for NVPTX. There are more details in this discussion on llvmdev. June: http://lists.cs.uiuc.edu/pipermail/llvmdev/2015-June/thread.html#87234 July: http://lists.cs.uiuc.edu/pipermail/llvmdev/2015-July/thread.html#87392 Patch by Bjarke Roune Reviewers: eliben, atrick, sanjoy Subscribers: majnemer, hfinkel, jingyue, meheff, llvm-commits Differential Revision: http://reviews.llvm.org/D11212 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@243460 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/ScalarEvolution.h | 8 + include/llvm/Analysis/ValueTracking.h | 49 ++- lib/Analysis/ScalarEvolution.cpp | 132 +++++-- lib/Analysis/ValueTracking.cpp | 161 ++++++++ .../multidim_ivs_and_integer_offsets_3d.ll | 2 +- ...multidim_ivs_and_parameteric_offsets_3d.ll | 2 +- .../ScalarEvolution/flags-from-poison.ll | 358 ++++++++++++++++++ .../LoopStrengthReduce/sext-ind-var.ll | 36 ++ 8 files changed, 720 insertions(+), 28 deletions(-) create mode 100644 test/Analysis/ScalarEvolution/flags-from-poison.ll create mode 100644 test/Transforms/LoopStrengthReduce/sext-ind-var.ll diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index 0f3f397d50b..c45fdb27191 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -566,6 +566,9 @@ namespace llvm { /// forgetMemoizedResults - Drop memoized information computed for S. void forgetMemoizedResults(const SCEV *S); + /// Return an existing SCEV for V if there is one, otherwise return nullptr. + const SCEV *getExistingSCEV(Value *V); + /// Return false iff given SCEV contains a SCEVUnknown with NULL value- /// pointer. bool checkValidity(const SCEV *S) const; @@ -595,6 +598,11 @@ namespace llvm { bool isMonotonicPredicate(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred, bool &Increasing); + // Return SCEV no-wrap flags that can be proven based on reasoning + // about how poison produced from no-wrap flags on this value + // (e.g. a nuw add) would trigger undefined behavior on overflow. + SCEV::NoWrapFlags getNoWrapFlagsFromUB(const Value *V); + public: static char ID; // Pass identification, replacement for typeid ScalarEvolution(); diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index 653821d0227..b8c370dac9b 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -30,6 +30,7 @@ namespace llvm { class DominatorTree; class TargetLibraryInfo; class LoopInfo; + class Loop; /// Determine which bits of V are known to be either zero or one and return /// them in the KnownZero/KnownOne bit sets. @@ -288,7 +289,53 @@ namespace llvm { AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT); - + + /// Return true if this function can prove that the instruction I will + /// always transfer execution to one of its successors (including the next + /// instruction that follows within a basic block). E.g. this is not + /// guaranteed for function calls that could loop infinitely. + /// + /// In other words, this function returns false for instructions that may + /// transfer execution or fail to transfer execution in a way that is not + /// captured in the CFG nor in the sequence of instructions within a basic + /// block. + /// + /// Undefined behavior is assumed not to happen, so e.g. division is + /// guaranteed to transfer execution to the following instruction even + /// though division by zero might cause undefined behavior. + bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I); + + /// Return true if this function can prove that the instruction I + /// is executed for every iteration of the loop L. + /// + /// Note that this currently only considers the loop header. + bool isGuaranteedToExecuteForEveryIteration(const Instruction *I, + const Loop *L); + + /// Return true if this function can prove that I is guaranteed to yield + /// full-poison (all bits poison) if at least one of its operands are + /// full-poison (all bits poison). + /// + /// The exact rules for how poison propagates through instructions have + /// not been settled as of 2015-07-10, so this function is conservative + /// and only considers poison to be propagated in uncontroversial + /// cases. There is no attempt to track values that may be only partially + /// poison. + bool propagatesFullPoison(const Instruction *I); + + /// Return either nullptr or an operand of I such that I will trigger + /// undefined behavior if I is executed and that operand has a full-poison + /// value (all bits poison). + const Value *getGuaranteedNonFullPoisonOp(const Instruction *I); + + /// Return true if this function can prove that if PoisonI is executed + /// and yields a full-poison value (all bits poison), then that will + /// trigger undefined behavior. + /// + /// Note that this currently only considers the basic block that is + /// the parent of I. + bool isKnownNotFullPoison(const Instruction *PoisonI); + /// \brief Specific patterns of select instructions we can match. enum SelectPatternFlavor { SPF_UNKNOWN = 0, diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 8e65c1a97f0..efeddf07db2 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -2936,7 +2936,8 @@ ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr, // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP // instruction to its SCEV, because the Instruction may be guarded by control // flow and the no-overflow bits may not be valid for the expression in any - // context. + // context. This can be fixed similarly to how these flags are handled for + // adds. SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap; const SCEV *TotalOffset = getConstant(IntPtrTy, 0); @@ -3315,22 +3316,25 @@ bool ScalarEvolution::checkValidity(const SCEV *S) const { const SCEV *ScalarEvolution::getSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); + const SCEV *S = getExistingSCEV(V); + if (S == nullptr) { + S = createSCEV(V); + ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S)); + } + return S; +} + +const SCEV *ScalarEvolution::getExistingSCEV(Value *V) { + assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); + ValueExprMapType::iterator I = ValueExprMap.find_as(V); if (I != ValueExprMap.end()) { const SCEV *S = I->second; if (checkValidity(S)) return S; - else - ValueExprMap.erase(I); + ValueExprMap.erase(I); } - const SCEV *S = createSCEV(V); - - // The process of creating a SCEV for V may have caused other SCEVs - // to have been created, so it's necessary to insert the new entry - // from scratch, rather than trying to remember the insert position - // above. - ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S)); - return S; + return nullptr; } /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V @@ -4089,8 +4093,63 @@ ScalarEvolution::getRange(const SCEV *S, return setRange(S, SignHint, ConservativeResult); } -/// createSCEV - We know that there is no SCEV for the specified value. -/// Analyze the expression. +SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) { + const BinaryOperator *BinOp = cast(V); + + // Return early if there are no flags to propagate to the SCEV. + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; + if (BinOp->hasNoUnsignedWrap()) + Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); + if (BinOp->hasNoSignedWrap()) + Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); + if (Flags == SCEV::FlagAnyWrap) { + return SCEV::FlagAnyWrap; + } + + // Here we check that BinOp is in the header of the innermost loop + // containing BinOp, since we only deal with instructions in the loop + // header. The actual loop we need to check later will come from an add + // recurrence, but getting that requires computing the SCEV of the operands, + // which can be expensive. This check we can do cheaply to rule out some + // cases early. + Loop *innermostContainingLoop = LI->getLoopFor(BinOp->getParent()); + if (innermostContainingLoop == nullptr || + innermostContainingLoop->getHeader() != BinOp->getParent()) + return SCEV::FlagAnyWrap; + + // Only proceed if we can prove that BinOp does not yield poison. + if (!isKnownNotFullPoison(BinOp)) return SCEV::FlagAnyWrap; + + // At this point we know that if V is executed, then it does not wrap + // according to at least one of NSW or NUW. If V is not executed, then we do + // not know if the calculation that V represents would wrap. Multiple + // instructions can map to the same SCEV. If we apply NSW or NUW from V to + // the SCEV, we must guarantee no wrapping for that SCEV also when it is + // derived from other instructions that map to the same SCEV. We cannot make + // that guarantee for cases where V is not executed. So we need to find the + // loop that V is considered in relation to and prove that V is executed for + // every iteration of that loop. That implies that the value that V + // calculates does not wrap anywhere in the loop, so then we can apply the + // flags to the SCEV. + // + // We check isLoopInvariant to disambiguate in case we are adding two + // recurrences from different loops, so that we know which loop to prove + // that V is executed in. + for (int OpIndex = 0; OpIndex < 2; ++OpIndex) { + const SCEV *Op = getSCEV(BinOp->getOperand(OpIndex)); + if (auto *AddRec = dyn_cast(Op)) { + const int OtherOpIndex = 1 - OpIndex; + const SCEV *OtherOp = getSCEV(BinOp->getOperand(OtherOpIndex)); + if (isLoopInvariant(OtherOp, AddRec->getLoop()) && + isGuaranteedToExecuteForEveryIteration(BinOp, AddRec->getLoop())) + return Flags; + } + } + return SCEV::FlagAnyWrap; +} + +/// createSCEV - We know that there is no SCEV for the specified value. Analyze +/// the expression. /// const SCEV *ScalarEvolution::createSCEV(Value *V) { if (!isSCEVable(V->getType())) @@ -4127,29 +4186,52 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // Instead, gather up all the operands and make a single getAddExpr call. // LLVM IR canonical form means we need only traverse the left operands. // - // Don't apply this instruction's NSW or NUW flags to the new - // expression. The instruction may be guarded by control flow that the - // no-wrap behavior depends on. Non-control-equivalent instructions can be - // mapped to the same SCEV expression, and it would be incorrect to transfer - // NSW/NUW semantics to those operations. + // FIXME: Expand this handling of NSW and NUW to other instructions, like + // sub and mul. SmallVector AddOps; - AddOps.push_back(getSCEV(U->getOperand(1))); - for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) { - unsigned Opcode = Op->getValueID() - Value::InstructionVal; - if (Opcode != Instruction::Add && Opcode != Instruction::Sub) + for (Value *Op = U;; Op = U->getOperand(0)) { + U = dyn_cast(Op); + unsigned Opcode = U ? U->getOpcode() : 0; + if (!U || (Opcode != Instruction::Add && Opcode != Instruction::Sub)) { + assert(Op != V && "V should be an add"); + AddOps.push_back(getSCEV(Op)); break; - U = cast(Op); + } + + if (auto *OpSCEV = getExistingSCEV(Op)) { + AddOps.push_back(OpSCEV); + break; + } + + // If a NUW or NSW flag can be applied to the SCEV for this + // addition, then compute the SCEV for this addition by itself + // with a separate call to getAddExpr. We need to do that + // instead of pushing the operands of the addition onto AddOps, + // since the flags are only known to apply to this particular + // addition - they may not apply to other additions that can be + // formed with operands from AddOps. + // + // FIXME: Expand this to sub instructions. + if (Opcode == Instruction::Add && isa(U)) { + SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U); + if (Flags != SCEV::FlagAnyWrap) { + AddOps.push_back(getAddExpr(getSCEV(U->getOperand(0)), + getSCEV(U->getOperand(1)), Flags)); + break; + } + } + const SCEV *Op1 = getSCEV(U->getOperand(1)); if (Opcode == Instruction::Sub) AddOps.push_back(getNegativeSCEV(Op1)); else AddOps.push_back(Op1); } - AddOps.push_back(getSCEV(U->getOperand(0))); return getAddExpr(AddOps); } + case Instruction::Mul: { - // Don't transfer NSW/NUW for the same reason as AddExpr. + // FIXME: Transfer NSW/NUW as in AddExpr. SmallVector MulOps; MulOps.push_back(getSCEV(U->getOperand(1))); for (Value *Op = U->getOperand(0); diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index fa0d7798cae..434a69e0820 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -3316,6 +3316,167 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS, return OverflowResult::MayOverflow; } +bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { + // FIXME: This conservative implementation can be relaxed. E.g. most + // atomic operations are guaranteed to terminate on most platforms + // and most functions terminate. + + return !I->isAtomic() && // atomics may never succeed on some platforms + !isa(I) && // could throw and might not terminate + !isa(I) && // might not terminate and could throw to + // non-successor (see bug 24185 for details). + !isa(I) && // has no successors + !isa(I); // has no successors +} + +bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I, + const Loop *L) { + // The loop header is guaranteed to be executed for every iteration. + // + // FIXME: Relax this constraint to cover all basic blocks that are + // guaranteed to be executed at every iteration. + if (I->getParent() != L->getHeader()) return false; + + for (const Instruction &LI : *L->getHeader()) { + if (&LI == I) return true; + if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false; + } + llvm_unreachable("Instruction not contained in its own parent basic block."); +} + +bool llvm::propagatesFullPoison(const Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Xor: + case Instruction::Trunc: + case Instruction::BitCast: + case Instruction::AddrSpaceCast: + // These operations all propagate poison unconditionally. Note that poison + // is not any particular value, so xor or subtraction of poison with + // itself still yields poison, not zero. + return true; + + case Instruction::AShr: + case Instruction::SExt: + // For these operations, one bit of the input is replicated across + // multiple output bits. A replicated poison bit is still poison. + return true; + + case Instruction::Shl: { + // Left shift *by* a poison value is poison. The number of + // positions to shift is unsigned, so no negative values are + // possible there. Left shift by zero places preserves poison. So + // it only remains to consider left shift of poison by a positive + // number of places. + // + // A left shift by a positive number of places leaves the lowest order bit + // non-poisoned. However, if such a shift has a no-wrap flag, then we can + // make the poison operand violate that flag, yielding a fresh full-poison + // value. + auto *OBO = cast(I); + return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap(); + } + + case Instruction::Mul: { + // A multiplication by zero yields a non-poison zero result, so we need to + // rule out zero as an operand. Conservatively, multiplication by a + // non-zero constant is not multiplication by zero. + // + // Multiplication by a non-zero constant can leave some bits + // non-poisoned. For example, a multiplication by 2 leaves the lowest + // order bit unpoisoned. So we need to consider that. + // + // Multiplication by 1 preserves poison. If the multiplication has a + // no-wrap flag, then we can make the poison operand violate that flag + // when multiplied by any integer other than 0 and 1. + auto *OBO = cast(I); + if (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) { + for (Value *V : OBO->operands()) { + if (auto *CI = dyn_cast(V)) { + // A ConstantInt cannot yield poison, so we can assume that it is + // the other operand that is poison. + return !CI->isZero(); + } + } + } + return false; + } + + case Instruction::GetElementPtr: + // A GEP implicitly represents a sequence of additions, subtractions, + // truncations, sign extensions and multiplications. The multiplications + // are by the non-zero sizes of some set of types, so we do not have to be + // concerned with multiplication by zero. If the GEP is in-bounds, then + // these operations are implicitly no-signed-wrap so poison is propagated + // by the arguments above for Add, Sub, Trunc, SExt and Mul. + return cast(I)->isInBounds(); + + default: + return false; + } +} + +const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) { + switch (I->getOpcode()) { + case Instruction::Store: + return cast(I)->getPointerOperand(); + + case Instruction::Load: + return cast(I)->getPointerOperand(); + + case Instruction::AtomicCmpXchg: + return cast(I)->getPointerOperand(); + + case Instruction::AtomicRMW: + return cast(I)->getPointerOperand(); + + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + return I->getOperand(1); + + default: + return nullptr; + } +} + +bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) { + // We currently only look for uses of poison values within the same basic + // block, as that makes it easier to guarantee that the uses will be + // executed given that PoisonI is executed. + // + // FIXME: Expand this to consider uses beyond the same basic block. To do + // this, look out for the distinction between post-dominance and strong + // post-dominance. + const BasicBlock *BB = PoisonI->getParent(); + + // Set of instructions that we have proved will yield poison if PoisonI + // does. + SmallSet YieldsPoison; + YieldsPoison.insert(PoisonI); + + for (const Instruction *I = PoisonI, *E = BB->end(); I != E; + I = I->getNextNode()) { + if (I != PoisonI) { + const Value *NotPoison = getGuaranteedNonFullPoisonOp(I); + if (NotPoison != nullptr && YieldsPoison.count(NotPoison)) return true; + if (!isGuaranteedToTransferExecutionToSuccessor(I)) return false; + } + + // Mark poison that propagates from I through uses of I. + if (YieldsPoison.count(I)) { + for (const User *User : I->users()) { + const Instruction *UserI = cast(User); + if (UserI->getParent() == BB && propagatesFullPoison(UserI)) + YieldsPoison.insert(User); + } + } + } + return false; +} + static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred, Value *CmpLHS, Value *CmpRHS, Value *TrueVal, Value *FalseVal, diff --git a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll index 317e62c8ef9..4ae976281c4 100644 --- a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll +++ b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll @@ -11,7 +11,7 @@ ; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> ; CHECK: Base offset: %A ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes. -; CHECK: ArrayRef[{3,+,1}<%for.i>][{-4,+,1}<%for.j>][{7,+,1}<%for.k>] +; CHECK: ArrayRef[{3,+,1}<%for.i>][{-4,+,1}<%for.j>][{7,+,1}<%for.k>] define void @foo(i64 %n, i64 %m, i64 %o, double* %A) { entry: diff --git a/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll index 9e37b76e59b..893c542c06a 100644 --- a/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll +++ b/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll @@ -11,7 +11,7 @@ ; AddRec: {{{((8 * ((((%m * %p) + %q) * %o) + %r)) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> ; CHECK: Base offset: %A ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes. -; CHECK: ArrayRef[{%p,+,1}<%for.i>][{%q,+,1}<%for.j>][{%r,+,1}<%for.k>] +; CHECK: ArrayRef[{%p,+,1}<%for.i>][{%q,+,1}<%for.j>][{%r,+,1}<%for.k>] define void @foo(i64 %n, i64 %m, i64 %o, double* %A, i64 %p, i64 %q, i64 %r) { entry: diff --git a/test/Analysis/ScalarEvolution/flags-from-poison.ll b/test/Analysis/ScalarEvolution/flags-from-poison.ll new file mode 100644 index 00000000000..3fcb0e5bd23 --- /dev/null +++ b/test/Analysis/ScalarEvolution/flags-from-poison.ll @@ -0,0 +1,358 @@ +; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s + +; Positive and negative tests for inferring flags like nsw from +; reasoning about how a poison value from overflow would trigger +; undefined behavior. + +define void @foo() { + ret void +} + +; Example where an add should get the nsw flag, so that a sext can be +; distributed over the add. +define void @test-add-nsw(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-nsw +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nsw i32 %i, %offset + +; CHECK: %index64 = +; CHECK: --> {(sext i32 %offset to i64),+,1} + %index64 = sext i32 %index32 to i64 + + %ptr = getelementptr inbounds float, float* %input, i64 %index64 + %nexti = add nsw i32 %i, 1 + %f = load float, float* %ptr, align 4 + call void @foo() + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Example where an add should get the nuw flag. +define void @test-add-nuw(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-nuw +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nuw i32 %i, %offset + + %ptr = getelementptr inbounds float, float* %input, i32 %index32 + %nexti = add nuw i32 %i, 1 + %f = load float, float* %ptr, align 4 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +; With no load to trigger UB from poison, we cannot infer nsw. +define void @test-add-no-load(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-no-load +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nsw i32 %i, %offset + + %ptr = getelementptr inbounds float, float* %input, i32 %index32 + %nexti = add nuw i32 %i, 1 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +; The current code is only supposed to look at the loop header, so +; it should not infer nsw in this case, as that would require looking +; outside the loop header. +define void @test-add-not-header(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-not-header +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ] + br label %loop2 +loop2: + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nsw i32 %i, %offset + + %ptr = getelementptr inbounds float, float* %input, i32 %index32 + %nexti = add nsw i32 %i, 1 + %f = load float, float* %ptr, align 4 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Same thing as test-add-not-header, but in this case only the load +; instruction is outside the loop header. +define void @test-add-not-header2(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-not-header2 +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nsw i32 %i, %offset + + %ptr = getelementptr inbounds float, float* %input, i32 %index32 + %nexti = add nsw i32 %i, 1 + br label %loop2 +loop2: + %f = load float, float* %ptr, align 4 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; The call instruction makes it not guaranteed that the add will be +; executed, since it could run forever or throw an exception, so we +; cannot assume that the UB is realized. +define void @test-add-call(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-call +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + call void @foo() + %index32 = add nsw i32 %i, %offset + + %ptr = getelementptr inbounds float, float* %input, i32 %index32 + %nexti = add nsw i32 %i, 1 + %f = load float, float* %ptr, align 4 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Same issue as test-add-call, but this time the call is between the +; producer of poison and the load that consumes it. +define void @test-add-call2(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-call2 +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nsw i32 %i, %offset + + %ptr = getelementptr inbounds float, float* %input, i32 %index32 + %nexti = add nsw i32 %i, 1 + call void @foo() + %f = load float, float* %ptr, align 4 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Without inbounds, GEP does not propagate poison in the very +; conservative approach used here. +define void @test-add-no-inbounds(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-no-inbounds +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nsw i32 %i, %offset + + %ptr = getelementptr float, float* %input, i32 %index32 + %nexti = add nsw i32 %i, 1 + %f = load float, float* %ptr, align 4 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Multiplication by a non-zero constant propagates poison if there is +; a nuw or nsw flag on the multiplication. +define void @test-add-mul-propagates(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-mul-propagates +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nsw i32 %i, %offset + + %indexmul = mul nuw i32 %index32, 2 + %ptr = getelementptr inbounds float, float* %input, i32 %indexmul + %nexti = add nsw i32 %i, 1 + %f = load float, float* %ptr, align 4 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Multiplication by a non-constant should not propagate poison in the +; very conservative approach used here. +define void @test-add-mul-no-propagation(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-mul-no-propagation +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nsw i32 %i, %offset + + %indexmul = mul nsw i32 %index32, %offset + %ptr = getelementptr inbounds float, float* %input, i32 %indexmul + %nexti = add nsw i32 %i, 1 + %f = load float, float* %ptr, align 4 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Multiplication by a non-zero constant does not propagate poison +; without a no-wrap flag. +define void @test-add-mul-no-propagation2(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-mul-no-propagation2 +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nsw i32 %i, %offset + + %indexmul = mul i32 %index32, 2 + %ptr = getelementptr inbounds float, float* %input, i32 %indexmul + %nexti = add nsw i32 %i, 1 + %f = load float, float* %ptr, align 4 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Division by poison triggers UB. +define void @test-add-div(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-div +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %j = +; CHECK: --> {%offset,+,1} + %j = add nsw i32 %i, %offset + + %q = sdiv i32 %numIterations, %j + %nexti = add nsw i32 %i, 1 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Remainder of poison by non-poison divisor does not trigger UB. +define void @test-add-div2(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-div2 +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %j = +; CHECK: --> {%offset,+,1} + %j = add nsw i32 %i, %offset + + %q = sdiv i32 %j, %numIterations + %nexti = add nsw i32 %i, 1 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Store to poison address triggers UB. +define void @test-add-store(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-store +entry: + br label %loop +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + +; CHECK: %index32 = +; CHECK: --> {%offset,+,1} + %index32 = add nsw i32 %i, %offset + + %ptr = getelementptr inbounds float, float* %input, i32 %index32 + %nexti = add nsw i32 %i, 1 + store float 1.0, float* %ptr, align 4 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop +exit: + ret void +} + +; Three sequential adds where the middle add should have nsw. There is +; a special case for sequential adds and this test covers that. We have to +; put the final add first in the program since otherwise the special case +; is not triggered, hence the strange basic block ordering. +define void @test-add-twice(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @test-add-twice +entry: + br label %loop +loop2: +; CHECK: %seq = +; CHECK: --> {(2 + %offset),+,1} + %seq = add nsw nuw i32 %index32, 1 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop + +loop: + %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ] + + %j = add nsw i32 %i, 1 +; CHECK: %index32 = +; CHECK: --> {(1 + %offset),+,1} + %index32 = add nsw i32 %j, %offset + + %ptr = getelementptr inbounds float, float* %input, i32 %index32 + %nexti = add nsw i32 %i, 1 + store float 1.0, float* %ptr, align 4 + br label %loop2 +exit: + ret void +} diff --git a/test/Transforms/LoopStrengthReduce/sext-ind-var.ll b/test/Transforms/LoopStrengthReduce/sext-ind-var.ll new file mode 100644 index 00000000000..9fca3f97094 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/sext-ind-var.ll @@ -0,0 +1,36 @@ +; RUN: opt -loop-reduce -S < %s | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-unknown-unknown" + +; LSR used not to be able to generate a float* induction variable in +; these cases due to scalar evolution not propagating nsw from an +; instruction to the SCEV, preventing distributing sext into the +; corresponding addrec. + +define float @testadd(float* %input, i32 %offset, i32 %numIterations) { +; CHECK-LABEL: @testadd +; CHECK: sext i32 %offset to i64 +; CHECK: loop: +; CHECK-DAG: phi float* +; CHECK-DAG: phi i32 +; CHECK-NOT: sext + +entry: + br label %loop + +loop: + %i = phi i32 [ %nexti, %loop ], [ 0, %entry ] + %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ] + %index32 = add nuw nsw i32 %i, %offset + %index64 = sext i32 %index32 to i64 + %ptr = getelementptr inbounds float, float* %input, i64 %index64 + %addend = load float, float* %ptr, align 4 + %nextsum = fadd float %sum, %addend + %nexti = add nuw nsw i32 %i, 1 + %exitcond = icmp eq i32 %nexti, %numIterations + br i1 %exitcond, label %exit, label %loop + +exit: + ret float %nextsum +} -- 2.34.1