[SCEV] Apply NSW and NUW flags via poison value analysis for sub, mul and shl

author Bjarke Hammersholt Roune <broune@google.com>

Fri, 14 Aug 2015 22:45:26 +0000 (22:45 +0000)

committer Bjarke Hammersholt Roune <broune@google.com>

Fri, 14 Aug 2015 22:45:26 +0000 (22:45 +0000)
author Bjarke Hammersholt Roune <broune@google.com>
Fri, 14 Aug 2015 22:45:26 +0000 (22:45 +0000)
committer Bjarke Hammersholt Roune <broune@google.com>
Fri, 14 Aug 2015 22:45:26 +0000 (22:45 +0000)
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h

index 772028ceb16fbee8f876695f213210839c295033..57f23f07769e5901077a1f6f2f75a7a69f3c9290 100644 (file)
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -712,7 +712,8 @@ namespace llvm {
  
      /// getNegativeSCEV - Return the SCEV object corresponding to -V.
      ///
-    const SCEV *getNegativeSCEV(const SCEV *V);
+    const SCEV *getNegativeSCEV(const SCEV *V,
+                                SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
  
      /// getNotSCEV - Return the SCEV object corresponding to ~V.
      ///
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp

index 3e3032b48fe6f614ed90eff544016a45191b93fb..10d3acb46af1623af09faf24b917347a8d70261f 100644 (file)
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -3339,15 +3339,16 @@ const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
  
  /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
  ///
-const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
+const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
+                                             SCEV::NoWrapFlags Flags) {
    if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
      return getConstant(
                 cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
  
    Type *Ty = V->getType();
    Ty = getEffectiveSCEVType(Ty);
-  return getMulExpr(V,
-                  getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
+  return getMulExpr(
+      V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags);
  }
  
  /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
@@ -3366,15 +3367,40 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
  /// getMinusSCEV - Return LHS-RHS.  Minus is represented in SCEV as A+B*-1.
  const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
                                            SCEV::NoWrapFlags Flags) {
-  assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW");
-
    // Fast path: X - X --> 0.
    if (LHS == RHS)
      return getConstant(LHS->getType(), 0);
  
-  // X - Y --> X + -Y.
-  // X -(nsw || nuw) Y --> X + -Y.
-  return getAddExpr(LHS, getNegativeSCEV(RHS));
+  // We represent LHS - RHS as LHS + (-1)*RHS. This transformation
+  // makes it so that we cannot make much use of NUW.
+  auto AddFlags = SCEV::FlagAnyWrap;
+  const bool RHSIsNotMinSigned =
+      !getSignedRange(RHS).getSignedMin().isMinSignedValue();
+  if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) {
+    // Let M be the minimum representable signed value. Then (-1)*RHS
+    // signed-wraps if and only if RHS is M. That can happen even for
+    // a NSW subtraction because e.g. (-1)*M signed-wraps even though
+    // -1 - M does not. So to transfer NSW from LHS - RHS to LHS +
+    // (-1)*RHS, we need to prove that RHS != M.
+    //
+    // If LHS is non-negative and we know that LHS - RHS does not
+    // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap
+    // either by proving that RHS > M or that LHS >= 0.
+    if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) {
+      AddFlags = SCEV::FlagNSW;
+    }
+  }
+
+  // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS -
+  // RHS is NSW and LHS >= 0.
+  //
+  // The difficulty here is that the NSW flag may have been proven
+  // relative to a loop that is to be found in a recurrence in LHS and
+  // not in RHS. Applying NSW to (-1)*M may then let the NSW have a
+  // larger scope than intended.
+  auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
+
+  return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags);
  }
  
  /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
@@ -4094,6 +4120,7 @@ ScalarEvolution::getRange(const SCEV *S,
  }
  
  SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) {
+  if (isa<ConstantExpr>(V)) return SCEV::FlagAnyWrap;
    const BinaryOperator *BinOp = cast<BinaryOperator>(V);
  
    // Return early if there are no flags to propagate to the SCEV.
@@ -4185,9 +4212,6 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
      // because it leads to N-1 getAddExpr calls for N ultimate operands.
      // Instead, gather up all the operands and make a single getAddExpr call.
      // LLVM IR canonical form means we need only traverse the left operands.
-    //
-    // FIXME: Expand this handling of NSW and NUW to other instructions, like
-    // sub and mul.
      SmallVector<const SCEV *, 4> AddOps;
      for (Value *Op = U;; Op = U->getOperand(0)) {
        U = dyn_cast<Operator>(Op);
@@ -4198,7 +4222,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
          break;
        }
  
-      if (auto *OpSCEV = getExistingSCEV(Op)) {
+      if (auto *OpSCEV = getExistingSCEV(U)) {
          AddOps.push_back(OpSCEV);
          break;
        }
@@ -4210,45 +4234,57 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
        // since the flags are only known to apply to this particular
        // addition - they may not apply to other additions that can be
        // formed with operands from AddOps.
-      //
-      // FIXME: Expand this to sub instructions.
-      if (Opcode == Instruction::Add && isa<BinaryOperator>(U)) {
-        SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
-        if (Flags != SCEV::FlagAnyWrap) {
-          AddOps.push_back(getAddExpr(getSCEV(U->getOperand(0)),
-                                      getSCEV(U->getOperand(1)), Flags));
-          break;
-        }
+      const SCEV *RHS = getSCEV(U->getOperand(1));
+      SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
+      if (Flags != SCEV::FlagAnyWrap) {
+        const SCEV *LHS = getSCEV(U->getOperand(0));
+        if (Opcode == Instruction::Sub)
+          AddOps.push_back(getMinusSCEV(LHS, RHS, Flags));
+        else
+          AddOps.push_back(getAddExpr(LHS, RHS, Flags));
+        break;
        }
  
-      const SCEV *Op1 = getSCEV(U->getOperand(1));
        if (Opcode == Instruction::Sub)
-        AddOps.push_back(getNegativeSCEV(Op1));
+        AddOps.push_back(getNegativeSCEV(RHS));
        else
-        AddOps.push_back(Op1);
+        AddOps.push_back(RHS);
      }
      return getAddExpr(AddOps);
    }
  
    case Instruction::Mul: {
-    // FIXME: Transfer NSW/NUW as in AddExpr.
      SmallVector<const SCEV *, 4> MulOps;
-    MulOps.push_back(getSCEV(U->getOperand(1)));
-    for (Value *Op = U->getOperand(0);
-         Op->getValueID() == Instruction::Mul + Value::InstructionVal;
-         Op = U->getOperand(0)) {
-      U = cast<Operator>(Op);
+    for (Value *Op = U;; Op = U->getOperand(0)) {
+      U = dyn_cast<Operator>(Op);
+      if (!U || U->getOpcode() != Instruction::Mul) {
+        assert(Op != V && "V should be a mul");
+        MulOps.push_back(getSCEV(Op));
+        break;
+      }
+
+      if (auto *OpSCEV = getExistingSCEV(U)) {
+        MulOps.push_back(OpSCEV);
+        break;
+      }
+
+      SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
+      if (Flags != SCEV::FlagAnyWrap) {
+        MulOps.push_back(getMulExpr(getSCEV(U->getOperand(0)),
+                                    getSCEV(U->getOperand(1)), Flags));
+        break;
+      }
+
        MulOps.push_back(getSCEV(U->getOperand(1)));
      }
-    MulOps.push_back(getSCEV(U->getOperand(0)));
      return getMulExpr(MulOps);
    }
    case Instruction::UDiv:
      return getUDivExpr(getSCEV(U->getOperand(0)),
                         getSCEV(U->getOperand(1)));
    case Instruction::Sub:
-    return getMinusSCEV(getSCEV(U->getOperand(0)),
-                        getSCEV(U->getOperand(1)));
+    return getMinusSCEV(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)),
+                        getNoWrapFlagsFromUB(U));
    case Instruction::And:
      // For an expression like x&255 that merely masks off the high bits,
      // use zext(trunc(x)) as the SCEV expression.
@@ -4368,9 +4404,18 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
        if (SA->getValue().uge(BitWidth))
          break;
  
+      // It is currently not resolved how to interpret NSW for left
+      // shift by BitWidth - 1, so we avoid applying flags in that
+      // case. Remove this check (or this comment) once the situation
+      // is resolved. See
+      // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html
+      // and http://reviews.llvm.org/D8890 .
+      auto Flags = SCEV::FlagAnyWrap;
+      if (SA->getValue().ult(BitWidth - 1)) Flags = getNoWrapFlagsFromUB(U);
+
        Constant *X = ConstantInt::get(getContext(),
          APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
-      return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
+      return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X), Flags);
      }
      break;
  
diff --git a/test/Analysis/Delinearization/a.ll b/test/Analysis/Delinearization/a.ll

index 78bbfcf7de6e8d70705f26556a13f9dc16569f8b..917fc355726ca67b6f6e50533cf33915af1d76ee 100644 (file)
--- a/test/Analysis/Delinearization/a.ll
+++ b/test/Analysis/Delinearization/a.ll
@@ -10,7 +10,7 @@
  ; AddRec: {{{(28 + (4 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>,+,20}<%for.k>
  ; CHECK: Base offset: %A
  ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 4 bytes.
-; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<%for.k>]
+; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<nw><%for.k>]
  
  define void @foo(i64 %n, i64 %m, i64 %o, i32* nocapture %A) #0 {
  entry:
diff --git a/test/Analysis/ScalarEvolution/flags-from-poison.ll b/test/Analysis/ScalarEvolution/flags-from-poison.ll

index 3fcb0e5bd232ab950138f6231e382b416ba6e02b..b1fe7f1138b6c7dd72e1043e129ea43260634c56 100644 (file)
--- a/test/Analysis/ScalarEvolution/flags-from-poison.ll
+++ b/test/Analysis/ScalarEvolution/flags-from-poison.ll
@@ -356,3 +356,237 @@ loop:
  exit:
    ret void
  }
+
+; Example where a mul should get the nsw flag, so that a sext can be
+; distributed over the mul.
+define void @test-mul-nsw(float* %input, i32 %stride, i32 %numIterations) {
+; CHECK-LABEL: @test-mul-nsw
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {0,+,%stride}<nsw>
+  %index32 = mul nsw i32 %i, %stride
+
+; CHECK: %index64 =
+; CHECK: --> {0,+,(sext i32 %stride to i64)}<nsw>
+  %index64 = sext i32 %index32 to i64
+
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Example where a mul should get the nuw flag.
+define void @test-mul-nuw(float* %input, i32 %stride, i32 %numIterations) {
+; CHECK-LABEL: @test-mul-nuw
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {0,+,%stride}<nuw>
+  %index32 = mul nuw i32 %i, %stride
+
+  %ptr = getelementptr inbounds float, float* %input, i32 %index32
+  %nexti = add nuw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Example where a shl should get the nsw flag, so that a sext can be
+; distributed over the shl.
+define void @test-shl-nsw(float* %input, i32 %start, i32 %numIterations) {
+; CHECK-LABEL: @test-shl-nsw
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {(256 * %start),+,256}<nsw>
+  %index32 = shl nsw i32 %i, 8
+
+; CHECK: %index64 =
+; CHECK: --> {(sext i32 (256 * %start) to i64),+,256}<nsw>
+  %index64 = sext i32 %index32 to i64
+
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Example where a shl should get the nuw flag.
+define void @test-shl-nuw(float* %input, i32 %numIterations) {
+; CHECK-LABEL: @test-shl-nuw
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {0,+,512}<nuw>
+  %index32 = shl nuw i32 %i, 9
+
+  %ptr = getelementptr inbounds float, float* %input, i32 %index32
+  %nexti = add nuw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Example where a sub should *not* get the nsw flag, because of how
+; scalar evolution represents A - B as A + (-B) and -B can wrap even
+; in cases where A - B does not.
+define void @test-sub-no-nsw(float* %input, i32 %start, i32 %sub, i32 %numIterations) {
+; CHECK-LABEL: @test-sub-no-nsw
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {((-1 * %sub) + %start),+,1}<nw>
+  %index32 = sub nsw i32 %i, %sub
+  %index64 = sext i32 %index32 to i64
+
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Example where a sub should get the nsw flag as the RHS cannot be the
+; minimal signed value.
+define void @test-sub-nsw(float* %input, i32 %start, i32 %sub, i32 %numIterations) {
+; CHECK-LABEL: @test-sub-nsw
+entry:
+  %halfsub = ashr i32 %sub, 1
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {((-1 * %halfsub)<nsw> + %start),+,1}<nsw>
+  %index32 = sub nsw i32 %i, %halfsub
+  %index64 = sext i32 %index32 to i64
+
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Example where a sub should get the nsw flag, since the LHS is non-negative,
+; which implies that the RHS cannot be the minimal signed value.
+define void @test-sub-nsw-lhs-non-negative(float* %input, i32 %sub, i32 %numIterations) {
+; CHECK-LABEL: @test-sub-nsw-lhs-non-negative
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {(-1 * %sub),+,1}<nsw>
+  %index32 = sub nsw i32 %i, %sub
+
+; CHECK: %index64 =
+; CHECK: --> {(sext i32 (-1 * %sub) to i64),+,1}<nsw>
+  %index64 = sext i32 %index32 to i64
+
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Two adds with a sub in the middle and the sub should have nsw. There is
+; a special case for sequential adds/subs and this test covers that. We have to
+; put the final add first in the program since otherwise the special case
+; is not triggered, hence the strange basic block ordering.
+define void @test-sub-with-add(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-sub-with-add
+entry:
+  br label %loop
+loop2:
+; CHECK: %seq =
+; CHECK: --> {(2 + (-1 * %offset)),+,1}<nw>
+  %seq = add nsw nuw i32 %index32, 1
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+loop:
+  %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ]
+
+  %j = add nsw i32 %i, 1
+; CHECK: %index32 =
+; CHECK: --> {(1 + (-1 * %offset)),+,1}<nsw>
+  %index32 = sub nsw i32 %j, %offset
+
+  %ptr = getelementptr inbounds float, float* %input, i32 %index32
+  %nexti = add nsw i32 %i, 1
+  store float 1.0, float* %ptr, align 4
+  br label %loop2
+exit:
+  ret void
+}
+
+
+; Subtraction of two recurrences. The addition in the SCEV that this
+; maps to is NSW, but the negation of the RHS does not since that
+; recurrence could be the most negative representable value.
+define void @subrecurrences(i32 %outer_l, i32 %inner_l, i32 %val) {
+; CHECK-LABEL: @subrecurrences
+ entry:
+  br label %outer
+
+outer:
+  %o_idx = phi i32 [ 0, %entry ], [ %o_idx.inc, %outer.be ]
+  %o_idx.inc = add nsw i32 %o_idx, 1
+  %cond = icmp eq i32 %o_idx, %val
+  br i1 %cond, label %inner, label %outer.be
+
+inner:
+  %i_idx = phi i32 [ 0, %outer ], [ %i_idx.inc, %inner ]
+  %i_idx.inc = add nsw i32 %i_idx, 1
+; CHECK: %v =
+; CHECK-NEXT: --> {{[{][{]}}-1,+,-1}<nw><%outer>,+,1}<nsw><%inner>
+  %v = sub nsw i32 %i_idx, %o_idx.inc
+  %forub = udiv i32 1, %v
+  %cond2 = icmp eq i32 %i_idx, %inner_l
+  br i1 %cond2, label %outer.be, label %inner
+
+outer.be:
+  %cond3 = icmp eq i32 %o_idx, %outer_l
+  br i1 %cond3, label %exit, label %outer
+
+exit:
+  ret void
+}
diff --git a/test/Analysis/ScalarEvolution/min-max-exprs.ll b/test/Analysis/ScalarEvolution/min-max-exprs.ll

index 892fc23fe6b2e55feb8ada6c8d7a89df3aeceff9..a4be9c1177311720610d4d12ce34ea35ecba1c94 100644 (file)
--- a/test/Analysis/ScalarEvolution/min-max-exprs.ll
+++ b/test/Analysis/ScalarEvolution/min-max-exprs.ll
@@ -33,7 +33,7 @@ bb2:                                              ; preds = %bb1
    %tmp9 = select i1 %tmp4, i64 %tmp5, i64 %tmp6
  ;                  min(N, i+3)
  ; CHECK:           select i1 %tmp4, i64 %tmp5, i64 %tmp6
-; CHECK-NEXT:  --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<nw><%bb1> to i64))) smax (-1 + (-1 * (sext i32 %N to i64))))))
+; CHECK-NEXT:  --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<nw><%bb1> to i64))<nsw>) smax (-1 + (-1 * (sext i32 %N to i64))<nsw>)))<nsw>)
    %tmp11 = getelementptr inbounds i32, i32* %A, i64 %tmp9
    %tmp12 = load i32, i32* %tmp11, align 4
    %tmp13 = shl nsw i32 %tmp12, 1
diff --git a/test/Transforms/LoopStrengthReduce/sext-ind-var.ll b/test/Transforms/LoopStrengthReduce/sext-ind-var.ll

index 9fca3f97094510932230985c9788af09d831dc67..3cf8f536fa7134e5e441cf2a1babec9a77c4a79b 100644 (file)
--- a/test/Transforms/LoopStrengthReduce/sext-ind-var.ll
+++ b/test/Transforms/LoopStrengthReduce/sext-ind-var.ll
@@ -8,6 +8,11 @@ target triple = "nvptx64-unknown-unknown"
  ; instruction to the SCEV, preventing distributing sext into the
  ; corresponding addrec.
  
+; Test this pattern:
+;
+;   for (int i = 0; i < numIterations; ++i)
+;     sum += ptr[i + offset];
+;
  define float @testadd(float* %input, i32 %offset, i32 %numIterations) {
  ; CHECK-LABEL: @testadd
  ; CHECK: sext i32 %offset to i64
@@ -34,3 +39,102 @@ loop:
  exit:
    ret float %nextsum
  }
+
+; Test this pattern:
+;
+;   for (int i = 0; i < numIterations; ++i)
+;     sum += ptr[i - offset];
+;
+define float @testsub(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @testsub
+; CHECK: sub i32 0, %offset
+; CHECK: sext i32
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+  %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+  %index32 = sub nuw nsw i32 %i, %offset
+  %index64 = sext i32 %index32 to i64
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %addend = load float, float* %ptr, align 4
+  %nextsum = fadd float %sum, %addend
+  %nexti = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret float %nextsum
+}
+
+; Test this pattern:
+;
+;   for (int i = 0; i < numIterations; ++i)
+;     sum += ptr[i * stride];
+;
+define float @testmul(float* %input, i32 %stride, i32 %numIterations) {
+; CHECK-LABEL: @testmul
+; CHECK: sext i32 %stride to i64
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+  %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+  %index32 = mul nuw nsw i32 %i, %stride
+  %index64 = sext i32 %index32 to i64
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %addend = load float, float* %ptr, align 4
+  %nextsum = fadd float %sum, %addend
+  %nexti = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret float %nextsum
+}
+
+; Test this pattern:
+;
+;   for (int i = 0; i < numIterations; ++i)
+;     sum += ptr[3 * (i << 7)];
+;
+; The multiplication by 3 is to make the address calculation expensive
+; enough to force the introduction of a pointer induction variable.
+define float @testshl(float* %input, i32 %numIterations) {
+; CHECK-LABEL: @testshl
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+  %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+  %index32 = shl nuw nsw i32 %i, 7
+  %index32mul = mul nuw nsw i32 %index32, 3
+  %index64 = sext i32 %index32mul to i64
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %addend = load float, float* %ptr, align 4
+  %nextsum = fadd float %sum, %addend
+  %nexti = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret float %nextsum
+}
author	Bjarke Hammersholt Roune <broune@google.com>
	Fri, 14 Aug 2015 22:45:26 +0000 (22:45 +0000)
committer	Bjarke Hammersholt Roune <broune@google.com>
	Fri, 14 Aug 2015 22:45:26 +0000 (22:45 +0000)
include/llvm/Analysis/ScalarEvolution.h		patch \| blob \| history
lib/Analysis/ScalarEvolution.cpp		patch \| blob \| history
test/Analysis/Delinearization/a.ll		patch \| blob \| history
test/Analysis/ScalarEvolution/flags-from-poison.ll		patch \| blob \| history
test/Analysis/ScalarEvolution/min-max-exprs.ll		patch \| blob \| history
test/Transforms/LoopStrengthReduce/sext-ind-var.ll		patch \| blob \| history