/// forgetMemoizedResults - Drop memoized information computed for S.
void forgetMemoizedResults(const SCEV *S);
+ /// Return an existing SCEV for V if there is one, otherwise return nullptr.
+ const SCEV *getExistingSCEV(Value *V);
+
/// Return false iff given SCEV contains a SCEVUnknown with NULL value-
/// pointer.
bool checkValidity(const SCEV *S) const;
bool isMonotonicPredicate(const SCEVAddRecExpr *LHS,
ICmpInst::Predicate Pred, bool &Increasing);
+ // Return SCEV no-wrap flags that can be proven based on reasoning
+ // about how poison produced from no-wrap flags on this value
+ // (e.g. a nuw add) would trigger undefined behavior on overflow.
+ SCEV::NoWrapFlags getNoWrapFlagsFromUB(const Value *V);
+
public:
static char ID; // Pass identification, replacement for typeid
ScalarEvolution();
class DominatorTree;
class TargetLibraryInfo;
class LoopInfo;
+ class Loop;
/// Determine which bits of V are known to be either zero or one and return
/// them in the KnownZero/KnownOne bit sets.
AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT);
-
+
+ /// Return true if this function can prove that the instruction I will
+ /// always transfer execution to one of its successors (including the next
+ /// instruction that follows within a basic block). E.g. this is not
+ /// guaranteed for function calls that could loop infinitely.
+ ///
+ /// In other words, this function returns false for instructions that may
+ /// transfer execution or fail to transfer execution in a way that is not
+ /// captured in the CFG nor in the sequence of instructions within a basic
+ /// block.
+ ///
+ /// Undefined behavior is assumed not to happen, so e.g. division is
+ /// guaranteed to transfer execution to the following instruction even
+ /// though division by zero might cause undefined behavior.
+ bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I);
+
+ /// Return true if this function can prove that the instruction I
+ /// is executed for every iteration of the loop L.
+ ///
+ /// Note that this currently only considers the loop header.
+ bool isGuaranteedToExecuteForEveryIteration(const Instruction *I,
+ const Loop *L);
+
+ /// Return true if this function can prove that I is guaranteed to yield
+ /// full-poison (all bits poison) if at least one of its operands are
+ /// full-poison (all bits poison).
+ ///
+ /// The exact rules for how poison propagates through instructions have
+ /// not been settled as of 2015-07-10, so this function is conservative
+ /// and only considers poison to be propagated in uncontroversial
+ /// cases. There is no attempt to track values that may be only partially
+ /// poison.
+ bool propagatesFullPoison(const Instruction *I);
+
+ /// Return either nullptr or an operand of I such that I will trigger
+ /// undefined behavior if I is executed and that operand has a full-poison
+ /// value (all bits poison).
+ const Value *getGuaranteedNonFullPoisonOp(const Instruction *I);
+
+ /// Return true if this function can prove that if PoisonI is executed
+ /// and yields a full-poison value (all bits poison), then that will
+ /// trigger undefined behavior.
+ ///
+ /// Note that this currently only considers the basic block that is
+ /// the parent of I.
+ bool isKnownNotFullPoison(const Instruction *PoisonI);
+
/// \brief Specific patterns of select instructions we can match.
enum SelectPatternFlavor {
SPF_UNKNOWN = 0,
// FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP
// instruction to its SCEV, because the Instruction may be guarded by control
// flow and the no-overflow bits may not be valid for the expression in any
- // context.
+ // context. This can be fixed similarly to how these flags are handled for
+ // adds.
SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
const SCEV *ScalarEvolution::getSCEV(Value *V) {
assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
+ const SCEV *S = getExistingSCEV(V);
+ if (S == nullptr) {
+ S = createSCEV(V);
+ ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
+ }
+ return S;
+}
+
+const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
+ assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
+
ValueExprMapType::iterator I = ValueExprMap.find_as(V);
if (I != ValueExprMap.end()) {
const SCEV *S = I->second;
if (checkValidity(S))
return S;
- else
- ValueExprMap.erase(I);
+ ValueExprMap.erase(I);
}
- const SCEV *S = createSCEV(V);
-
- // The process of creating a SCEV for V may have caused other SCEVs
- // to have been created, so it's necessary to insert the new entry
- // from scratch, rather than trying to remember the insert position
- // above.
- ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
- return S;
+ return nullptr;
}
/// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
return setRange(S, SignHint, ConservativeResult);
}
-/// createSCEV - We know that there is no SCEV for the specified value.
-/// Analyze the expression.
+SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) {
+ const BinaryOperator *BinOp = cast<BinaryOperator>(V);
+
+ // Return early if there are no flags to propagate to the SCEV.
+ SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
+ if (BinOp->hasNoUnsignedWrap())
+ Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
+ if (BinOp->hasNoSignedWrap())
+ Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
+ if (Flags == SCEV::FlagAnyWrap) {
+ return SCEV::FlagAnyWrap;
+ }
+
+ // Here we check that BinOp is in the header of the innermost loop
+ // containing BinOp, since we only deal with instructions in the loop
+ // header. The actual loop we need to check later will come from an add
+ // recurrence, but getting that requires computing the SCEV of the operands,
+ // which can be expensive. This check we can do cheaply to rule out some
+ // cases early.
+ Loop *innermostContainingLoop = LI->getLoopFor(BinOp->getParent());
+ if (innermostContainingLoop == nullptr ||
+ innermostContainingLoop->getHeader() != BinOp->getParent())
+ return SCEV::FlagAnyWrap;
+
+ // Only proceed if we can prove that BinOp does not yield poison.
+ if (!isKnownNotFullPoison(BinOp)) return SCEV::FlagAnyWrap;
+
+ // At this point we know that if V is executed, then it does not wrap
+ // according to at least one of NSW or NUW. If V is not executed, then we do
+ // not know if the calculation that V represents would wrap. Multiple
+ // instructions can map to the same SCEV. If we apply NSW or NUW from V to
+ // the SCEV, we must guarantee no wrapping for that SCEV also when it is
+ // derived from other instructions that map to the same SCEV. We cannot make
+ // that guarantee for cases where V is not executed. So we need to find the
+ // loop that V is considered in relation to and prove that V is executed for
+ // every iteration of that loop. That implies that the value that V
+ // calculates does not wrap anywhere in the loop, so then we can apply the
+ // flags to the SCEV.
+ //
+ // We check isLoopInvariant to disambiguate in case we are adding two
+ // recurrences from different loops, so that we know which loop to prove
+ // that V is executed in.
+ for (int OpIndex = 0; OpIndex < 2; ++OpIndex) {
+ const SCEV *Op = getSCEV(BinOp->getOperand(OpIndex));
+ if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
+ const int OtherOpIndex = 1 - OpIndex;
+ const SCEV *OtherOp = getSCEV(BinOp->getOperand(OtherOpIndex));
+ if (isLoopInvariant(OtherOp, AddRec->getLoop()) &&
+ isGuaranteedToExecuteForEveryIteration(BinOp, AddRec->getLoop()))
+ return Flags;
+ }
+ }
+ return SCEV::FlagAnyWrap;
+}
+
+/// createSCEV - We know that there is no SCEV for the specified value. Analyze
+/// the expression.
///
const SCEV *ScalarEvolution::createSCEV(Value *V) {
if (!isSCEVable(V->getType()))
// Instead, gather up all the operands and make a single getAddExpr call.
// LLVM IR canonical form means we need only traverse the left operands.
//
- // Don't apply this instruction's NSW or NUW flags to the new
- // expression. The instruction may be guarded by control flow that the
- // no-wrap behavior depends on. Non-control-equivalent instructions can be
- // mapped to the same SCEV expression, and it would be incorrect to transfer
- // NSW/NUW semantics to those operations.
+ // FIXME: Expand this handling of NSW and NUW to other instructions, like
+ // sub and mul.
SmallVector<const SCEV *, 4> AddOps;
- AddOps.push_back(getSCEV(U->getOperand(1)));
- for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
- unsigned Opcode = Op->getValueID() - Value::InstructionVal;
- if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
+ for (Value *Op = U;; Op = U->getOperand(0)) {
+ U = dyn_cast<Operator>(Op);
+ unsigned Opcode = U ? U->getOpcode() : 0;
+ if (!U || (Opcode != Instruction::Add && Opcode != Instruction::Sub)) {
+ assert(Op != V && "V should be an add");
+ AddOps.push_back(getSCEV(Op));
break;
- U = cast<Operator>(Op);
+ }
+
+ if (auto *OpSCEV = getExistingSCEV(Op)) {
+ AddOps.push_back(OpSCEV);
+ break;
+ }
+
+ // If a NUW or NSW flag can be applied to the SCEV for this
+ // addition, then compute the SCEV for this addition by itself
+ // with a separate call to getAddExpr. We need to do that
+ // instead of pushing the operands of the addition onto AddOps,
+ // since the flags are only known to apply to this particular
+ // addition - they may not apply to other additions that can be
+ // formed with operands from AddOps.
+ //
+ // FIXME: Expand this to sub instructions.
+ if (Opcode == Instruction::Add && isa<BinaryOperator>(U)) {
+ SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
+ if (Flags != SCEV::FlagAnyWrap) {
+ AddOps.push_back(getAddExpr(getSCEV(U->getOperand(0)),
+ getSCEV(U->getOperand(1)), Flags));
+ break;
+ }
+ }
+
const SCEV *Op1 = getSCEV(U->getOperand(1));
if (Opcode == Instruction::Sub)
AddOps.push_back(getNegativeSCEV(Op1));
else
AddOps.push_back(Op1);
}
- AddOps.push_back(getSCEV(U->getOperand(0)));
return getAddExpr(AddOps);
}
+
case Instruction::Mul: {
- // Don't transfer NSW/NUW for the same reason as AddExpr.
+ // FIXME: Transfer NSW/NUW as in AddExpr.
SmallVector<const SCEV *, 4> MulOps;
MulOps.push_back(getSCEV(U->getOperand(1)));
for (Value *Op = U->getOperand(0);
return OverflowResult::MayOverflow;
}
+bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
+ // FIXME: This conservative implementation can be relaxed. E.g. most
+ // atomic operations are guaranteed to terminate on most platforms
+ // and most functions terminate.
+
+ return !I->isAtomic() && // atomics may never succeed on some platforms
+ !isa<CallInst>(I) && // could throw and might not terminate
+ !isa<InvokeInst>(I) && // might not terminate and could throw to
+ // non-successor (see bug 24185 for details).
+ !isa<ResumeInst>(I) && // has no successors
+ !isa<ReturnInst>(I); // has no successors
+}
+
+bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
+ const Loop *L) {
+ // The loop header is guaranteed to be executed for every iteration.
+ //
+ // FIXME: Relax this constraint to cover all basic blocks that are
+ // guaranteed to be executed at every iteration.
+ if (I->getParent() != L->getHeader()) return false;
+
+ for (const Instruction &LI : *L->getHeader()) {
+ if (&LI == I) return true;
+ if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false;
+ }
+ llvm_unreachable("Instruction not contained in its own parent basic block.");
+}
+
+bool llvm::propagatesFullPoison(const Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Xor:
+ case Instruction::Trunc:
+ case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
+ // These operations all propagate poison unconditionally. Note that poison
+ // is not any particular value, so xor or subtraction of poison with
+ // itself still yields poison, not zero.
+ return true;
+
+ case Instruction::AShr:
+ case Instruction::SExt:
+ // For these operations, one bit of the input is replicated across
+ // multiple output bits. A replicated poison bit is still poison.
+ return true;
+
+ case Instruction::Shl: {
+ // Left shift *by* a poison value is poison. The number of
+ // positions to shift is unsigned, so no negative values are
+ // possible there. Left shift by zero places preserves poison. So
+ // it only remains to consider left shift of poison by a positive
+ // number of places.
+ //
+ // A left shift by a positive number of places leaves the lowest order bit
+ // non-poisoned. However, if such a shift has a no-wrap flag, then we can
+ // make the poison operand violate that flag, yielding a fresh full-poison
+ // value.
+ auto *OBO = cast<OverflowingBinaryOperator>(I);
+ return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap();
+ }
+
+ case Instruction::Mul: {
+ // A multiplication by zero yields a non-poison zero result, so we need to
+ // rule out zero as an operand. Conservatively, multiplication by a
+ // non-zero constant is not multiplication by zero.
+ //
+ // Multiplication by a non-zero constant can leave some bits
+ // non-poisoned. For example, a multiplication by 2 leaves the lowest
+ // order bit unpoisoned. So we need to consider that.
+ //
+ // Multiplication by 1 preserves poison. If the multiplication has a
+ // no-wrap flag, then we can make the poison operand violate that flag
+ // when multiplied by any integer other than 0 and 1.
+ auto *OBO = cast<OverflowingBinaryOperator>(I);
+ if (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) {
+ for (Value *V : OBO->operands()) {
+ if (auto *CI = dyn_cast<ConstantInt>(V)) {
+ // A ConstantInt cannot yield poison, so we can assume that it is
+ // the other operand that is poison.
+ return !CI->isZero();
+ }
+ }
+ }
+ return false;
+ }
+
+ case Instruction::GetElementPtr:
+ // A GEP implicitly represents a sequence of additions, subtractions,
+ // truncations, sign extensions and multiplications. The multiplications
+ // are by the non-zero sizes of some set of types, so we do not have to be
+ // concerned with multiplication by zero. If the GEP is in-bounds, then
+ // these operations are implicitly no-signed-wrap so poison is propagated
+ // by the arguments above for Add, Sub, Trunc, SExt and Mul.
+ return cast<GEPOperator>(I)->isInBounds();
+
+ default:
+ return false;
+ }
+}
+
+const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Store:
+ return cast<StoreInst>(I)->getPointerOperand();
+
+ case Instruction::Load:
+ return cast<LoadInst>(I)->getPointerOperand();
+
+ case Instruction::AtomicCmpXchg:
+ return cast<AtomicCmpXchgInst>(I)->getPointerOperand();
+
+ case Instruction::AtomicRMW:
+ return cast<AtomicRMWInst>(I)->getPointerOperand();
+
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ return I->getOperand(1);
+
+ default:
+ return nullptr;
+ }
+}
+
+bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) {
+ // We currently only look for uses of poison values within the same basic
+ // block, as that makes it easier to guarantee that the uses will be
+ // executed given that PoisonI is executed.
+ //
+ // FIXME: Expand this to consider uses beyond the same basic block. To do
+ // this, look out for the distinction between post-dominance and strong
+ // post-dominance.
+ const BasicBlock *BB = PoisonI->getParent();
+
+ // Set of instructions that we have proved will yield poison if PoisonI
+ // does.
+ SmallSet<const Value *, 16> YieldsPoison;
+ YieldsPoison.insert(PoisonI);
+
+ for (const Instruction *I = PoisonI, *E = BB->end(); I != E;
+ I = I->getNextNode()) {
+ if (I != PoisonI) {
+ const Value *NotPoison = getGuaranteedNonFullPoisonOp(I);
+ if (NotPoison != nullptr && YieldsPoison.count(NotPoison)) return true;
+ if (!isGuaranteedToTransferExecutionToSuccessor(I)) return false;
+ }
+
+ // Mark poison that propagates from I through uses of I.
+ if (YieldsPoison.count(I)) {
+ for (const User *User : I->users()) {
+ const Instruction *UserI = cast<Instruction>(User);
+ if (UserI->getParent() == BB && propagatesFullPoison(UserI))
+ YieldsPoison.insert(User);
+ }
+ }
+ }
+ return false;
+}
+
static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred,
Value *CmpLHS, Value *CmpRHS,
Value *TrueVal, Value *FalseVal,
; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
; CHECK: Base offset: %A
; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
-; CHECK: ArrayRef[{3,+,1}<nw><%for.i>][{-4,+,1}<nw><%for.j>][{7,+,1}<nw><%for.k>]
+; CHECK: ArrayRef[{3,+,1}<nw><%for.i>][{-4,+,1}<nw><%for.j>][{7,+,1}<nuw><nsw><%for.k>]
define void @foo(i64 %n, i64 %m, i64 %o, double* %A) {
entry:
; AddRec: {{{((8 * ((((%m * %p) + %q) * %o) + %r)) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
; CHECK: Base offset: %A
; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
-; CHECK: ArrayRef[{%p,+,1}<nw><%for.i>][{%q,+,1}<nw><%for.j>][{%r,+,1}<nw><%for.k>]
+; CHECK: ArrayRef[{%p,+,1}<nw><%for.i>][{%q,+,1}<nw><%for.j>][{%r,+,1}<nsw><%for.k>]
define void @foo(i64 %n, i64 %m, i64 %o, double* %A, i64 %p, i64 %q, i64 %r) {
entry:
--- /dev/null
+; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s
+
+; Positive and negative tests for inferring flags like nsw from
+; reasoning about how a poison value from overflow would trigger
+; undefined behavior.
+
+define void @foo() {
+ ret void
+}
+
+; Example where an add should get the nsw flag, so that a sext can be
+; distributed over the add.
+define void @test-add-nsw(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-nsw
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nsw>
+ %index32 = add nsw i32 %i, %offset
+
+; CHECK: %index64 =
+; CHECK: --> {(sext i32 %offset to i64),+,1}<nsw>
+ %index64 = sext i32 %index32 to i64
+
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ call void @foo()
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Example where an add should get the nuw flag.
+define void @test-add-nuw(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-nuw
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nuw>
+ %index32 = add nuw i32 %i, %offset
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nuw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; With no load to trigger UB from poison, we cannot infer nsw.
+define void @test-add-no-load(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-no-load
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+ %index32 = add nsw i32 %i, %offset
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nuw i32 %i, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; The current code is only supposed to look at the loop header, so
+; it should not infer nsw in this case, as that would require looking
+; outside the loop header.
+define void @test-add-not-header(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-not-header
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ]
+ br label %loop2
+loop2:
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+ %index32 = add nsw i32 %i, %offset
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Same thing as test-add-not-header, but in this case only the load
+; instruction is outside the loop header.
+define void @test-add-not-header2(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-not-header2
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+ %index32 = add nsw i32 %i, %offset
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nsw i32 %i, 1
+ br label %loop2
+loop2:
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; The call instruction makes it not guaranteed that the add will be
+; executed, since it could run forever or throw an exception, so we
+; cannot assume that the UB is realized.
+define void @test-add-call(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-call
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+ call void @foo()
+ %index32 = add nsw i32 %i, %offset
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Same issue as test-add-call, but this time the call is between the
+; producer of poison and the load that consumes it.
+define void @test-add-call2(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-call2
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+ %index32 = add nsw i32 %i, %offset
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nsw i32 %i, 1
+ call void @foo()
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Without inbounds, GEP does not propagate poison in the very
+; conservative approach used here.
+define void @test-add-no-inbounds(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-no-inbounds
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+ %index32 = add nsw i32 %i, %offset
+
+ %ptr = getelementptr float, float* %input, i32 %index32
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Multiplication by a non-zero constant propagates poison if there is
+; a nuw or nsw flag on the multiplication.
+define void @test-add-mul-propagates(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-mul-propagates
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nsw>
+ %index32 = add nsw i32 %i, %offset
+
+ %indexmul = mul nuw i32 %index32, 2
+ %ptr = getelementptr inbounds float, float* %input, i32 %indexmul
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Multiplication by a non-constant should not propagate poison in the
+; very conservative approach used here.
+define void @test-add-mul-no-propagation(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-mul-no-propagation
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+ %index32 = add nsw i32 %i, %offset
+
+ %indexmul = mul nsw i32 %index32, %offset
+ %ptr = getelementptr inbounds float, float* %input, i32 %indexmul
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Multiplication by a non-zero constant does not propagate poison
+; without a no-wrap flag.
+define void @test-add-mul-no-propagation2(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-mul-no-propagation2
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+ %index32 = add nsw i32 %i, %offset
+
+ %indexmul = mul i32 %index32, 2
+ %ptr = getelementptr inbounds float, float* %input, i32 %indexmul
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Division by poison triggers UB.
+define void @test-add-div(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-div
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %j =
+; CHECK: --> {%offset,+,1}<nsw>
+ %j = add nsw i32 %i, %offset
+
+ %q = sdiv i32 %numIterations, %j
+ %nexti = add nsw i32 %i, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Remainder of poison by non-poison divisor does not trigger UB.
+define void @test-add-div2(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-div2
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %j =
+; CHECK: --> {%offset,+,1}<nw>
+ %j = add nsw i32 %i, %offset
+
+ %q = sdiv i32 %j, %numIterations
+ %nexti = add nsw i32 %i, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Store to poison address triggers UB.
+define void @test-add-store(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-store
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nsw>
+ %index32 = add nsw i32 %i, %offset
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nsw i32 %i, 1
+ store float 1.0, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Three sequential adds where the middle add should have nsw. There is
+; a special case for sequential adds and this test covers that. We have to
+; put the final add first in the program since otherwise the special case
+; is not triggered, hence the strange basic block ordering.
+define void @test-add-twice(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-twice
+entry:
+ br label %loop
+loop2:
+; CHECK: %seq =
+; CHECK: --> {(2 + %offset),+,1}<nw>
+ %seq = add nsw nuw i32 %index32, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+loop:
+ %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ]
+
+ %j = add nsw i32 %i, 1
+; CHECK: %index32 =
+; CHECK: --> {(1 + %offset),+,1}<nsw>
+ %index32 = add nsw i32 %j, %offset
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nsw i32 %i, 1
+ store float 1.0, float* %ptr, align 4
+ br label %loop2
+exit:
+ ret void
+}
--- /dev/null
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-unknown-unknown"
+
+; LSR used not to be able to generate a float* induction variable in
+; these cases due to scalar evolution not propagating nsw from an
+; instruction to the SCEV, preventing distributing sext into the
+; corresponding addrec.
+
+define float @testadd(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @testadd
+; CHECK: sext i32 %offset to i64
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+ %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+ %index32 = add nuw nsw i32 %i, %offset
+ %index64 = sext i32 %index32 to i64
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %addend = load float, float* %ptr, align 4
+ %nextsum = fadd float %sum, %addend
+ %nexti = add nuw nsw i32 %i, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret float %nextsum
+}