/// getNegativeSCEV - Return the SCEV object corresponding to -V.
///
- const SCEV *getNegativeSCEV(const SCEV *V);
+ const SCEV *getNegativeSCEV(const SCEV *V,
+ SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
/// getNotSCEV - Return the SCEV object corresponding to ~V.
///
/// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
///
-const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
+const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
+ SCEV::NoWrapFlags Flags) {
if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
return getConstant(
cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
Type *Ty = V->getType();
Ty = getEffectiveSCEVType(Ty);
- return getMulExpr(V,
- getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
+ return getMulExpr(
+ V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags);
}
/// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
/// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
SCEV::NoWrapFlags Flags) {
- assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW");
-
// Fast path: X - X --> 0.
if (LHS == RHS)
return getConstant(LHS->getType(), 0);
- // X - Y --> X + -Y.
- // X -(nsw || nuw) Y --> X + -Y.
- return getAddExpr(LHS, getNegativeSCEV(RHS));
+ // We represent LHS - RHS as LHS + (-1)*RHS. This transformation
+ // makes it so that we cannot make much use of NUW.
+ auto AddFlags = SCEV::FlagAnyWrap;
+ const bool RHSIsNotMinSigned =
+ !getSignedRange(RHS).getSignedMin().isMinSignedValue();
+ if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) {
+ // Let M be the minimum representable signed value. Then (-1)*RHS
+ // signed-wraps if and only if RHS is M. That can happen even for
+ // a NSW subtraction because e.g. (-1)*M signed-wraps even though
+ // -1 - M does not. So to transfer NSW from LHS - RHS to LHS +
+ // (-1)*RHS, we need to prove that RHS != M.
+ //
+ // If LHS is non-negative and we know that LHS - RHS does not
+ // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap
+ // either by proving that RHS > M or that LHS >= 0.
+ if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) {
+ AddFlags = SCEV::FlagNSW;
+ }
+ }
+
+ // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS -
+ // RHS is NSW and LHS >= 0.
+ //
+ // The difficulty here is that the NSW flag may have been proven
+ // relative to a loop that is to be found in a recurrence in LHS and
+ // not in RHS. Applying NSW to (-1)*M may then let the NSW have a
+ // larger scope than intended.
+ auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
+
+ return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags);
}
/// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
}
SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) {
+ if (isa<ConstantExpr>(V)) return SCEV::FlagAnyWrap;
const BinaryOperator *BinOp = cast<BinaryOperator>(V);
// Return early if there are no flags to propagate to the SCEV.
// because it leads to N-1 getAddExpr calls for N ultimate operands.
// Instead, gather up all the operands and make a single getAddExpr call.
// LLVM IR canonical form means we need only traverse the left operands.
- //
- // FIXME: Expand this handling of NSW and NUW to other instructions, like
- // sub and mul.
SmallVector<const SCEV *, 4> AddOps;
for (Value *Op = U;; Op = U->getOperand(0)) {
U = dyn_cast<Operator>(Op);
break;
}
- if (auto *OpSCEV = getExistingSCEV(Op)) {
+ if (auto *OpSCEV = getExistingSCEV(U)) {
AddOps.push_back(OpSCEV);
break;
}
// since the flags are only known to apply to this particular
// addition - they may not apply to other additions that can be
// formed with operands from AddOps.
- //
- // FIXME: Expand this to sub instructions.
- if (Opcode == Instruction::Add && isa<BinaryOperator>(U)) {
- SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
- if (Flags != SCEV::FlagAnyWrap) {
- AddOps.push_back(getAddExpr(getSCEV(U->getOperand(0)),
- getSCEV(U->getOperand(1)), Flags));
- break;
- }
+ const SCEV *RHS = getSCEV(U->getOperand(1));
+ SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
+ if (Flags != SCEV::FlagAnyWrap) {
+ const SCEV *LHS = getSCEV(U->getOperand(0));
+ if (Opcode == Instruction::Sub)
+ AddOps.push_back(getMinusSCEV(LHS, RHS, Flags));
+ else
+ AddOps.push_back(getAddExpr(LHS, RHS, Flags));
+ break;
}
- const SCEV *Op1 = getSCEV(U->getOperand(1));
if (Opcode == Instruction::Sub)
- AddOps.push_back(getNegativeSCEV(Op1));
+ AddOps.push_back(getNegativeSCEV(RHS));
else
- AddOps.push_back(Op1);
+ AddOps.push_back(RHS);
}
return getAddExpr(AddOps);
}
case Instruction::Mul: {
- // FIXME: Transfer NSW/NUW as in AddExpr.
SmallVector<const SCEV *, 4> MulOps;
- MulOps.push_back(getSCEV(U->getOperand(1)));
- for (Value *Op = U->getOperand(0);
- Op->getValueID() == Instruction::Mul + Value::InstructionVal;
- Op = U->getOperand(0)) {
- U = cast<Operator>(Op);
+ for (Value *Op = U;; Op = U->getOperand(0)) {
+ U = dyn_cast<Operator>(Op);
+ if (!U || U->getOpcode() != Instruction::Mul) {
+ assert(Op != V && "V should be a mul");
+ MulOps.push_back(getSCEV(Op));
+ break;
+ }
+
+ if (auto *OpSCEV = getExistingSCEV(U)) {
+ MulOps.push_back(OpSCEV);
+ break;
+ }
+
+ SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
+ if (Flags != SCEV::FlagAnyWrap) {
+ MulOps.push_back(getMulExpr(getSCEV(U->getOperand(0)),
+ getSCEV(U->getOperand(1)), Flags));
+ break;
+ }
+
MulOps.push_back(getSCEV(U->getOperand(1)));
}
- MulOps.push_back(getSCEV(U->getOperand(0)));
return getMulExpr(MulOps);
}
case Instruction::UDiv:
return getUDivExpr(getSCEV(U->getOperand(0)),
getSCEV(U->getOperand(1)));
case Instruction::Sub:
- return getMinusSCEV(getSCEV(U->getOperand(0)),
- getSCEV(U->getOperand(1)));
+ return getMinusSCEV(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)),
+ getNoWrapFlagsFromUB(U));
case Instruction::And:
// For an expression like x&255 that merely masks off the high bits,
// use zext(trunc(x)) as the SCEV expression.
if (SA->getValue().uge(BitWidth))
break;
+ // It is currently not resolved how to interpret NSW for left
+ // shift by BitWidth - 1, so we avoid applying flags in that
+ // case. Remove this check (or this comment) once the situation
+ // is resolved. See
+ // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html
+ // and http://reviews.llvm.org/D8890 .
+ auto Flags = SCEV::FlagAnyWrap;
+ if (SA->getValue().ult(BitWidth - 1)) Flags = getNoWrapFlagsFromUB(U);
+
Constant *X = ConstantInt::get(getContext(),
APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
- return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
+ return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X), Flags);
}
break;
; AddRec: {{{(28 + (4 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>,+,20}<%for.k>
; CHECK: Base offset: %A
; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 4 bytes.
-; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<%for.k>]
+; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<nw><%for.k>]
define void @foo(i64 %n, i64 %m, i64 %o, i32* nocapture %A) #0 {
entry:
exit:
ret void
}
+
+; Example where a mul should get the nsw flag, so that a sext can be
+; distributed over the mul.
+define void @test-mul-nsw(float* %input, i32 %stride, i32 %numIterations) {
+; CHECK-LABEL: @test-mul-nsw
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {0,+,%stride}<nsw>
+ %index32 = mul nsw i32 %i, %stride
+
+; CHECK: %index64 =
+; CHECK: --> {0,+,(sext i32 %stride to i64)}<nsw>
+ %index64 = sext i32 %index32 to i64
+
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Example where a mul should get the nuw flag.
+define void @test-mul-nuw(float* %input, i32 %stride, i32 %numIterations) {
+; CHECK-LABEL: @test-mul-nuw
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {0,+,%stride}<nuw>
+ %index32 = mul nuw i32 %i, %stride
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nuw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; Example where a shl should get the nsw flag, so that a sext can be
+; distributed over the shl.
+define void @test-shl-nsw(float* %input, i32 %start, i32 %numIterations) {
+; CHECK-LABEL: @test-shl-nsw
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {(256 * %start),+,256}<nsw>
+ %index32 = shl nsw i32 %i, 8
+
+; CHECK: %index64 =
+; CHECK: --> {(sext i32 (256 * %start) to i64),+,256}<nsw>
+ %index64 = sext i32 %index32 to i64
+
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Example where a shl should get the nuw flag.
+define void @test-shl-nuw(float* %input, i32 %numIterations) {
+; CHECK-LABEL: @test-shl-nuw
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {0,+,512}<nuw>
+ %index32 = shl nuw i32 %i, 9
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nuw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; Example where a sub should *not* get the nsw flag, because of how
+; scalar evolution represents A - B as A + (-B) and -B can wrap even
+; in cases where A - B does not.
+define void @test-sub-no-nsw(float* %input, i32 %start, i32 %sub, i32 %numIterations) {
+; CHECK-LABEL: @test-sub-no-nsw
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {((-1 * %sub) + %start),+,1}<nw>
+ %index32 = sub nsw i32 %i, %sub
+ %index64 = sext i32 %index32 to i64
+
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Example where a sub should get the nsw flag as the RHS cannot be the
+; minimal signed value.
+define void @test-sub-nsw(float* %input, i32 %start, i32 %sub, i32 %numIterations) {
+; CHECK-LABEL: @test-sub-nsw
+entry:
+ %halfsub = ashr i32 %sub, 1
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {((-1 * %halfsub)<nsw> + %start),+,1}<nsw>
+ %index32 = sub nsw i32 %i, %halfsub
+ %index64 = sext i32 %index32 to i64
+
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Example where a sub should get the nsw flag, since the LHS is non-negative,
+; which implies that the RHS cannot be the minimal signed value.
+define void @test-sub-nsw-lhs-non-negative(float* %input, i32 %sub, i32 %numIterations) {
+; CHECK-LABEL: @test-sub-nsw-lhs-non-negative
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {(-1 * %sub),+,1}<nsw>
+ %index32 = sub nsw i32 %i, %sub
+
+; CHECK: %index64 =
+; CHECK: --> {(sext i32 (-1 * %sub) to i64),+,1}<nsw>
+ %index64 = sext i32 %index32 to i64
+
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Two adds with a sub in the middle and the sub should have nsw. There is
+; a special case for sequential adds/subs and this test covers that. We have to
+; put the final add first in the program since otherwise the special case
+; is not triggered, hence the strange basic block ordering.
+define void @test-sub-with-add(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-sub-with-add
+entry:
+ br label %loop
+loop2:
+; CHECK: %seq =
+; CHECK: --> {(2 + (-1 * %offset)),+,1}<nw>
+ %seq = add nsw nuw i32 %index32, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+loop:
+ %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ]
+
+ %j = add nsw i32 %i, 1
+; CHECK: %index32 =
+; CHECK: --> {(1 + (-1 * %offset)),+,1}<nsw>
+ %index32 = sub nsw i32 %j, %offset
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nsw i32 %i, 1
+ store float 1.0, float* %ptr, align 4
+ br label %loop2
+exit:
+ ret void
+}
+
+
+; Subtraction of two recurrences. The addition in the SCEV that this
+; maps to is NSW, but the negation of the RHS does not since that
+; recurrence could be the most negative representable value.
+define void @subrecurrences(i32 %outer_l, i32 %inner_l, i32 %val) {
+; CHECK-LABEL: @subrecurrences
+ entry:
+ br label %outer
+
+outer:
+ %o_idx = phi i32 [ 0, %entry ], [ %o_idx.inc, %outer.be ]
+ %o_idx.inc = add nsw i32 %o_idx, 1
+ %cond = icmp eq i32 %o_idx, %val
+ br i1 %cond, label %inner, label %outer.be
+
+inner:
+ %i_idx = phi i32 [ 0, %outer ], [ %i_idx.inc, %inner ]
+ %i_idx.inc = add nsw i32 %i_idx, 1
+; CHECK: %v =
+; CHECK-NEXT: --> {{[{][{]}}-1,+,-1}<nw><%outer>,+,1}<nsw><%inner>
+ %v = sub nsw i32 %i_idx, %o_idx.inc
+ %forub = udiv i32 1, %v
+ %cond2 = icmp eq i32 %i_idx, %inner_l
+ br i1 %cond2, label %outer.be, label %inner
+
+outer.be:
+ %cond3 = icmp eq i32 %o_idx, %outer_l
+ br i1 %cond3, label %exit, label %outer
+
+exit:
+ ret void
+}
%tmp9 = select i1 %tmp4, i64 %tmp5, i64 %tmp6
; min(N, i+3)
; CHECK: select i1 %tmp4, i64 %tmp5, i64 %tmp6
-; CHECK-NEXT: --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<nw><%bb1> to i64))) smax (-1 + (-1 * (sext i32 %N to i64))))))
+; CHECK-NEXT: --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<nw><%bb1> to i64))<nsw>) smax (-1 + (-1 * (sext i32 %N to i64))<nsw>)))<nsw>)
%tmp11 = getelementptr inbounds i32, i32* %A, i64 %tmp9
%tmp12 = load i32, i32* %tmp11, align 4
%tmp13 = shl nsw i32 %tmp12, 1
; instruction to the SCEV, preventing distributing sext into the
; corresponding addrec.
+; Test this pattern:
+;
+; for (int i = 0; i < numIterations; ++i)
+; sum += ptr[i + offset];
+;
define float @testadd(float* %input, i32 %offset, i32 %numIterations) {
; CHECK-LABEL: @testadd
; CHECK: sext i32 %offset to i64
exit:
ret float %nextsum
}
+
+; Test this pattern:
+;
+; for (int i = 0; i < numIterations; ++i)
+; sum += ptr[i - offset];
+;
+define float @testsub(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @testsub
+; CHECK: sub i32 0, %offset
+; CHECK: sext i32
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+ %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+ %index32 = sub nuw nsw i32 %i, %offset
+ %index64 = sext i32 %index32 to i64
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %addend = load float, float* %ptr, align 4
+ %nextsum = fadd float %sum, %addend
+ %nexti = add nuw nsw i32 %i, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret float %nextsum
+}
+
+; Test this pattern:
+;
+; for (int i = 0; i < numIterations; ++i)
+; sum += ptr[i * stride];
+;
+define float @testmul(float* %input, i32 %stride, i32 %numIterations) {
+; CHECK-LABEL: @testmul
+; CHECK: sext i32 %stride to i64
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+ %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+ %index32 = mul nuw nsw i32 %i, %stride
+ %index64 = sext i32 %index32 to i64
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %addend = load float, float* %ptr, align 4
+ %nextsum = fadd float %sum, %addend
+ %nexti = add nuw nsw i32 %i, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret float %nextsum
+}
+
+; Test this pattern:
+;
+; for (int i = 0; i < numIterations; ++i)
+; sum += ptr[3 * (i << 7)];
+;
+; The multiplication by 3 is to make the address calculation expensive
+; enough to force the introduction of a pointer induction variable.
+define float @testshl(float* %input, i32 %numIterations) {
+; CHECK-LABEL: @testshl
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+ %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+ %index32 = shl nuw nsw i32 %i, 7
+ %index32mul = mul nuw nsw i32 %index32, 3
+ %index64 = sext i32 %index32mul to i64
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %addend = load float, float* %ptr, align 4
+ %nextsum = fadd float %sum, %addend
+ %nexti = add nuw nsw i32 %i, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret float %nextsum
+}