Add support for runtime languages on our forward declarations.

[oota-llvm.git] / lib / Analysis / ScalarEvolution.cpp
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp

index bab4619894c76846c0adcb7f06a2623d10639cde..0c0ceeb951624cb6d34ec14bc8056952137b664e 100644 (file)
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -74,6 +74,7 @@
  #include "llvm/Analysis/ValueTracking.h"
  #include "llvm/Assembly/Writer.h"
  #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Support/ConstantRange.h"
  #include "llvm/Support/Debug.h"
@@ -108,6 +109,7 @@ INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
                  "Scalar Evolution Analysis", false, true)
  INITIALIZE_PASS_DEPENDENCY(LoopInfo)
  INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
  INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
                  "Scalar Evolution Analysis", false, true)
  char ScalarEvolution::ID = 0;
@@ -188,6 +190,14 @@ void SCEV::print(raw_ostream &OS) const {
          OS << OpStr;
      }
      OS << ")";
+    switch (NAry->getSCEVType()) {
+    case scAddExpr:
+    case scMulExpr:
+      if (NAry->getNoWrapFlags(FlagNUW))
+        OS << "<nuw>";
+      if (NAry->getNoWrapFlags(FlagNSW))
+        OS << "<nsw>";
+    }
      return;
    }
    case scUDivExpr: {
@@ -197,7 +207,7 @@ void SCEV::print(raw_ostream &OS) const {
    }
    case scUnknown: {
      const SCEVUnknown *U = cast<SCEVUnknown>(this);
-    const Type *AllocTy;
+    Type *AllocTy;
      if (U->isSizeOf(AllocTy)) {
        OS << "sizeof(" << *AllocTy << ")";
        return;
@@ -207,7 +217,7 @@ void SCEV::print(raw_ostream &OS) const {
        return;
      }
  
-    const Type *CTy;
+    Type *CTy;
      Constant *FieldNo;
      if (U->isOffsetOf(CTy, FieldNo)) {
        OS << "offsetof(" << *CTy << ", ";
@@ -228,7 +238,7 @@ void SCEV::print(raw_ostream &OS) const {
    llvm_unreachable("Unknown SCEV kind!");
  }
  
-const Type *SCEV::getType() const {
+Type *SCEV::getType() const {
    switch (getSCEVType()) {
    case scConstant:
      return cast<SCEVConstant>(this)->getType();
@@ -249,11 +259,9 @@ const Type *SCEV::getType() const {
      return cast<SCEVUnknown>(this)->getType();
    case scCouldNotCompute:
      llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-    return 0;
-  default: break;
+  default:
+    llvm_unreachable("Unknown SCEV kind!");
    }
-  llvm_unreachable("Unknown SCEV kind!");
-  return 0;
  }
  
  bool SCEV::isZero() const {
@@ -274,6 +282,20 @@ bool SCEV::isAllOnesValue() const {
    return false;
  }
  
+/// isNonConstantNegative - Return true if the specified scev is negated, but
+/// not a constant.
+bool SCEV::isNonConstantNegative() const {
+  const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this);
+  if (!Mul) return false;
+
+  // If there is a constant factor, it will be first.
+  const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
+  if (!SC) return false;
+
+  // Return true if the value is negative, this matches things like (-42 * V).
+  return SC->getValue()->getValue().isNegative();
+}
+
  SCEVCouldNotCompute::SCEVCouldNotCompute() :
    SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
  
@@ -297,17 +319,17 @@ const SCEV *ScalarEvolution::getConstant(const APInt& Val) {
  }
  
  const SCEV *
-ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) {
-  const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
+ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
+  IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
    return getConstant(ConstantInt::get(ITy, V, isSigned));
  }
  
  SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
-                           unsigned SCEVTy, const SCEV *op, const Type *ty)
+                           unsigned SCEVTy, const SCEV *op, Type *ty)
    : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
  
  SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
-                                   const SCEV *op, const Type *ty)
+                                   const SCEV *op, Type *ty)
    : SCEVCastExpr(ID, scTruncate, op, ty) {
    assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
           (Ty->isIntegerTy() || Ty->isPointerTy()) &&
@@ -315,7 +337,7 @@ SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
  }
  
  SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
-                                       const SCEV *op, const Type *ty)
+                                       const SCEV *op, Type *ty)
    : SCEVCastExpr(ID, scZeroExtend, op, ty) {
    assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
           (Ty->isIntegerTy() || Ty->isPointerTy()) &&
@@ -323,7 +345,7 @@ SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
  }
  
  SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
-                                       const SCEV *op, const Type *ty)
+                                       const SCEV *op, Type *ty)
    : SCEVCastExpr(ID, scSignExtend, op, ty) {
    assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
           (Ty->isIntegerTy() || Ty->isPointerTy()) &&
@@ -354,7 +376,7 @@ void SCEVUnknown::allUsesReplacedWith(Value *New) {
    setValPtr(New);
  }
  
-bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const {
+bool SCEVUnknown::isSizeOf(Type *&AllocTy) const {
    if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
      if (VCE->getOpcode() == Instruction::PtrToInt)
        if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
@@ -371,15 +393,15 @@ bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const {
    return false;
  }
  
-bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const {
+bool SCEVUnknown::isAlignOf(Type *&AllocTy) const {
    if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
      if (VCE->getOpcode() == Instruction::PtrToInt)
        if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
          if (CE->getOpcode() == Instruction::GetElementPtr &&
              CE->getOperand(0)->isNullValue()) {
-          const Type *Ty =
+          Type *Ty =
              cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
-          if (const StructType *STy = dyn_cast<StructType>(Ty))
+          if (StructType *STy = dyn_cast<StructType>(Ty))
              if (!STy->isPacked() &&
                  CE->getNumOperands() == 3 &&
                  CE->getOperand(1)->isNullValue()) {
@@ -396,7 +418,7 @@ bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const {
    return false;
  }
  
-bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const {
+bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
    if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
      if (VCE->getOpcode() == Instruction::PtrToInt)
        if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
@@ -404,7 +426,7 @@ bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const {
              CE->getNumOperands() == 3 &&
              CE->getOperand(0)->isNullValue() &&
              CE->getOperand(1)->isNullValue()) {
-          const Type *Ty =
+          Type *Ty =
              cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
            // Ignore vector types here so that ScalarEvolutionExpander doesn't
            // emit getelementptrs that index into vectors.
@@ -587,11 +609,8 @@ namespace {
        }
  
        default:
-        break;
+        llvm_unreachable("Unknown SCEV kind!");
        }
-
-      llvm_unreachable("Unknown SCEV kind!");
-      return 0;
      }
    };
  }
@@ -652,7 +671,7 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
  /// Assume, K > 0.
  static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
                                         ScalarEvolution &SE,
-                                       const Type* ResultTy) {
+                                       Type *ResultTy) {
    // Handle the simplest case efficiently.
    if (K == 1)
      return SE.getTruncateOrZeroExtend(It, ResultTy);
@@ -742,7 +761,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
    MultiplyFactor = MultiplyFactor.trunc(W);
  
    // Calculate the product, at width T+W
-  const IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
+  IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
                                                        CalculationBits);
    const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
    for (unsigned i = 1; i != K; ++i) {
@@ -790,7 +809,7 @@ const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
  //===----------------------------------------------------------------------===//
  
  const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
-                                             const Type *Ty) {
+                                             Type *Ty) {
    assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
           "This is not a truncating conversion!");
    assert(isSCEVable(Ty) &&
@@ -877,7 +896,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
  }
  
  const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
-                                               const Type *Ty) {
+                                               Type *Ty) {
    assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
           "This is not an extending conversion!");
    assert(isSCEVable(Ty) &&
@@ -954,7 +973,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
          const SCEV *RecastedMaxBECount =
            getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
          if (MaxBECount == RecastedMaxBECount) {
-          const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
+          Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
            // Check whether Start+Step*MaxBECount has no unsigned overflow.
            const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
            const SCEV *Add = getAddExpr(Start, ZMul);
@@ -1035,8 +1054,107 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
    return S;
  }
  
+// Get the limit of a recurrence such that incrementing by Step cannot cause
+// signed overflow as long as the value of the recurrence within the loop does
+// not exceed this limit before incrementing.
+static const SCEV *getOverflowLimitForStep(const SCEV *Step,
+                                           ICmpInst::Predicate *Pred,
+                                           ScalarEvolution *SE) {
+  unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
+  if (SE->isKnownPositive(Step)) {
+    *Pred = ICmpInst::ICMP_SLT;
+    return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
+                           SE->getSignedRange(Step).getSignedMax());
+  }
+  if (SE->isKnownNegative(Step)) {
+    *Pred = ICmpInst::ICMP_SGT;
+    return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
+                       SE->getSignedRange(Step).getSignedMin());
+  }
+  return 0;
+}
+
+// The recurrence AR has been shown to have no signed wrap. Typically, if we can
+// prove NSW for AR, then we can just as easily prove NSW for its preincrement
+// or postincrement sibling. This allows normalizing a sign extended AddRec as
+// such: {sext(Step + Start),+,Step} => {(Step + sext(Start),+,Step} As a
+// result, the expression "Step + sext(PreIncAR)" is congruent with
+// "sext(PostIncAR)"
+static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
+                                            Type *Ty,
+                                            ScalarEvolution *SE) {
+  const Loop *L = AR->getLoop();
+  const SCEV *Start = AR->getStart();
+  const SCEV *Step = AR->getStepRecurrence(*SE);
+
+  // Check for a simple looking step prior to loop entry.
+  const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
+  if (!SA)
+    return 0;
+
+  // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
+  // subtraction is expensive. For this purpose, perform a quick and dirty
+  // difference, by checking for Step in the operand list.
+  SmallVector<const SCEV *, 4> DiffOps;
+  for (SCEVAddExpr::op_iterator I = SA->op_begin(), E = SA->op_end();
+       I != E; ++I) {
+    if (*I != Step)
+      DiffOps.push_back(*I);
+  }
+  if (DiffOps.size() == SA->getNumOperands())
+    return 0;
+
+  // This is a postinc AR. Check for overflow on the preinc recurrence using the
+  // same three conditions that getSignExtendedExpr checks.
+
+  // 1. NSW flags on the step increment.
+  const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags());
+  const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
+    SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
+
+  if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW))
+    return PreStart;
+
+  // 2. Direct overflow check on the step operation's expression.
+  unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
+  Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
+  const SCEV *OperandExtendedStart =
+    SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy),
+                   SE->getSignExtendExpr(Step, WideTy));
+  if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) {
+    // Cache knowledge of PreAR NSW.
+    if (PreAR)
+      const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(SCEV::FlagNSW);
+    // FIXME: this optimization needs a unit test
+    DEBUG(dbgs() << "SCEV: untested prestart overflow check\n");
+    return PreStart;
+  }
+
+  // 3. Loop precondition.
+  ICmpInst::Predicate Pred;
+  const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE);
+
+  if (OverflowLimit &&
+      SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
+    return PreStart;
+  }
+  return 0;
+}
+
+// Get the normalized sign-extended expression for this AddRec's Start.
+static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR,
+                                            Type *Ty,
+                                            ScalarEvolution *SE) {
+  const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE);
+  if (!PreStart)
+    return SE->getSignExtendExpr(AR->getStart(), Ty);
+
+  return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty),
+                        SE->getSignExtendExpr(PreStart, Ty));
+}
+
  const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
-                                               const Type *Ty) {
+                                               Type *Ty) {
    assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
           "This is not an extending conversion!");
    assert(isSCEVable(Ty) &&
@@ -1097,7 +1215,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
        // If we have special knowledge that this addrec won't overflow,
        // we don't need to do any further analysis.
        if (AR->getNoWrapFlags(SCEV::FlagNSW))
-        return getAddRecExpr(getSignExtendExpr(Start, Ty),
+        return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
                               getSignExtendExpr(Step, Ty),
                               L, SCEV::FlagNSW);
  
@@ -1121,7 +1239,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
          const SCEV *RecastedMaxBECount =
            getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
          if (MaxBECount == RecastedMaxBECount) {
-          const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
+          Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
            // Check whether Start+Step*MaxBECount has no signed overflow.
            const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
            const SCEV *Add = getAddExpr(Start, SMul);
@@ -1133,7 +1251,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
              // Cache knowledge of AR NSW, which is propagated to this AddRec.
              const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
              // Return the expression with the addrec on the outside.
-            return getAddRecExpr(getSignExtendExpr(Start, Ty),
+            return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
                                   getSignExtendExpr(Step, Ty),
                                   L, AR->getNoWrapFlags());
            }
@@ -1149,7 +1267,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
              // Cache knowledge of AR NSW, which is propagated to this AddRec.
              const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
              // Return the expression with the addrec on the outside.
-            return getAddRecExpr(getSignExtendExpr(Start, Ty),
+            return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
                                   getZeroExtendExpr(Step, Ty),
                                   L, AR->getNoWrapFlags());
            }
@@ -1159,34 +1277,18 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
          // the addrec is safe. Also, if the entry is guarded by a comparison
          // with the start value and the backedge is guarded by a comparison
          // with the post-inc value, the addrec is safe.
-        if (isKnownPositive(Step)) {
-          const SCEV *N = getConstant(APInt::getSignedMinValue(BitWidth) -
-                                      getSignedRange(Step).getSignedMax());
-          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) ||
-              (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) &&
-               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT,
-                                           AR->getPostIncExpr(*this), N))) {
-            // Cache knowledge of AR NSW, which is propagated to this AddRec.
-            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
-            // Return the expression with the addrec on the outside.
-            return getAddRecExpr(getSignExtendExpr(Start, Ty),
-                                 getSignExtendExpr(Step, Ty),
-                                 L, AR->getNoWrapFlags());
-          }
-        } else if (isKnownNegative(Step)) {
-          const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) -
-                                      getSignedRange(Step).getSignedMin());
-          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) ||
-              (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) &&
-               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT,
-                                           AR->getPostIncExpr(*this), N))) {
-            // Cache knowledge of AR NSW, which is propagated to this AddRec.
-            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
-            // Return the expression with the addrec on the outside.
-            return getAddRecExpr(getSignExtendExpr(Start, Ty),
-                                 getSignExtendExpr(Step, Ty),
-                                 L, AR->getNoWrapFlags());
-          }
+        ICmpInst::Predicate Pred;
+        const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this);
+        if (OverflowLimit &&
+            (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
+             (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
+              isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this),
+                                          OverflowLimit)))) {
+          // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
+          const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+          return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
+                               getSignExtendExpr(Step, Ty),
+                               L, AR->getNoWrapFlags());
          }
        }
      }
@@ -1204,7 +1306,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
  /// unspecified bits out to the given type.
  ///
  const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
-                                              const Type *Ty) {
+                                              Type *Ty) {
    assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
           "This is not an extending conversion!");
    assert(isSCEVable(Ty) &&
@@ -1367,7 +1469,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
    assert(!Ops.empty() && "Cannot get empty add!");
    if (Ops.size() == 1) return Ops[0];
  #ifndef NDEBUG
-  const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+  Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
    for (unsigned i = 1, e = Ops.size(); i != e; ++i)
      assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
             "SCEVAddExpr operand types don't match!");
@@ -1417,7 +1519,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
    // Okay, check to see if the same value occurs in the operand list more than
    // once.  If so, merge them together into an multiply expression.  Since we
    // sorted the list, these values are required to be adjacent.
-  const Type *Ty = Ops[0]->getType();
+  Type *Ty = Ops[0]->getType();
    bool FoundMatch = false;
    for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
      if (Ops[i] == Ops[i+1]) {      //  X + Y + Y  -->  X + Y*2
@@ -1444,8 +1546,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
    // if the contents of the resulting outer trunc fold to something simple.
    for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) {
      const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
-    const Type *DstType = Trunc->getType();
-    const Type *SrcType = Trunc->getOperand()->getType();
+    Type *DstType = Trunc->getType();
+    Type *SrcType = Trunc->getOperand()->getType();
      SmallVector<const SCEV *, 8> LargeOps;
      bool Ok = true;
      // Check all the operands to see if they can be represented in the
@@ -1664,7 +1766,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
        // If all of the other operands were loop invariant, we are done.
        if (Ops.size() == 1) return NewRec;
  
-      // Otherwise, add the folded AddRec by the non-liv parts.
+      // Otherwise, add the folded AddRec by the non-invariant parts.
        for (unsigned i = 0;; ++i)
          if (Ops[i] == AddRec) {
            Ops[i] = NewRec;
@@ -1729,6 +1831,38 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
    return S;
  }
  
+static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) {
+  uint64_t k = i*j;
+  if (j > 1 && k / j != i) Overflow = true;
+  return k;
+}
+
+/// Compute the result of "n choose k", the binomial coefficient.  If an
+/// intermediate computation overflows, Overflow will be set and the return will
+/// be garbage. Overflow is not cleared on absense of overflow.
+static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
+  // We use the multiplicative formula:
+  //     n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 .
+  // At each iteration, we take the n-th term of the numeral and divide by the
+  // (k-n)th term of the denominator.  This division will always produce an
+  // integral result, and helps reduce the chance of overflow in the
+  // intermediate computations. However, we can still overflow even when the
+  // final result would fit.
+
+  if (n == 0 || n == k) return 1;
+  if (k > n) return 0;
+
+  if (k > n/2)
+    k = n-k;
+
+  uint64_t r = 1;
+  for (uint64_t i = 1; i <= k; ++i) {
+    r = umul_ov(r, n-(i-1), Overflow);
+    r /= i;
+  }
+  return r;
+}
+
  /// getMulExpr - Get a canonical multiply expression, or something simpler if
  /// possible.
  const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
@@ -1738,7 +1872,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
    assert(!Ops.empty() && "Cannot get empty mul!");
    if (Ops.size() == 1) return Ops[0];
  #ifndef NDEBUG
-  const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+  Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
    for (unsigned i = 1, e = Ops.size(); i != e; ++i)
      assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
             "SCEVMulExpr operand types don't match!");
@@ -1889,7 +2023,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
        // If all of the other operands were loop invariant, we are done.
        if (Ops.size() == 1) return NewRec;
  
-      // Otherwise, multiply the folded AddRec by the non-liv parts.
+      // Otherwise, multiply the folded AddRec by the non-invariant parts.
        for (unsigned i = 0;; ++i)
          if (Ops[i] == AddRec) {
            Ops[i] = NewRec;
@@ -1903,31 +2037,65 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
      // multiplied together.  If so, we can fold them.
      for (unsigned OtherIdx = Idx+1;
           OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
-         ++OtherIdx)
+         ++OtherIdx) {
        if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
-        // F * G, where F = {A,+,B}<L> and G = {C,+,D}<L>  -->
-        // {A*C,+,F*D + G*B + B*D}<L>
+        // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
+        // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
+        //       choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
+        //   ]]],+,...up to x=2n}.
+        // Note that the arguments to choose() are always integers with values
+        // known at compile time, never SCEV objects.
+        //
+        // The implementation avoids pointless extra computations when the two
+        // addrec's are of different length (mathematically, it's equivalent to
+        // an infinite stream of zeros on the right).
+        bool OpsModified = false;
          for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
               ++OtherIdx)
            if (const SCEVAddRecExpr *OtherAddRec =
                  dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
              if (OtherAddRec->getLoop() == AddRecLoop) {
-              const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec;
-              const SCEV *NewStart = getMulExpr(F->getStart(), G->getStart());
-              const SCEV *B = F->getStepRecurrence(*this);
-              const SCEV *D = G->getStepRecurrence(*this);
-              const SCEV *NewStep = getAddExpr(getMulExpr(F, D),
-                                               getMulExpr(G, B),
-                                               getMulExpr(B, D));
-              const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep,
-                                                    F->getLoop(),
-                                                    SCEV::FlagAnyWrap);
-              if (Ops.size() == 2) return NewAddRec;
-              Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec);
-              Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+              bool Overflow = false;
+              Type *Ty = AddRec->getType();
+              bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
+              SmallVector<const SCEV*, 7> AddRecOps;
+              for (int x = 0, xe = AddRec->getNumOperands() +
+                     OtherAddRec->getNumOperands() - 1;
+                   x != xe && !Overflow; ++x) {
+                const SCEV *Term = getConstant(Ty, 0);
+                for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
+                  uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
+                  for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
+                         ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
+                       z < ze && !Overflow; ++z) {
+                    uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
+                    uint64_t Coeff;
+                    if (LargerThan64Bits)
+                      Coeff = umul_ov(Coeff1, Coeff2, Overflow);
+                    else
+                      Coeff = Coeff1*Coeff2;
+                    const SCEV *CoeffTerm = getConstant(Ty, Coeff);
+                    const SCEV *Term1 = AddRec->getOperand(y-z);
+                    const SCEV *Term2 = OtherAddRec->getOperand(z);
+                    Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
+                  }
+                }
+                AddRecOps.push_back(Term);
+              }
+              if (!Overflow) {
+                const SCEV *NewAddRec = getAddRecExpr(AddRecOps,
+                                                      AddRec->getLoop(),
+                                                      SCEV::FlagAnyWrap);
+                if (Ops.size() == 2) return NewAddRec;
+                Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec);
+                Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+                OpsModified = true;
+              }
              }
-        return getMulExpr(Ops);
+        if (OpsModified)
+          return getMulExpr(Ops);
        }
+    }
  
      // Otherwise couldn't fold anything into this recurrence.  Move onto the
      // next one.
@@ -1971,21 +2139,22 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
        // Determine if the division can be folded into the operands of
        // its operands.
        // TODO: Generalize this to non-constants by using known-bits information.
-      const Type *Ty = LHS->getType();
+      Type *Ty = LHS->getType();
        unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
        unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
        // For non-power-of-two values, effectively round the value up to the
        // nearest power of two.
        if (!RHSC->getValue()->getValue().isPowerOf2())
          ++MaxShiftAmt;
-      const IntegerType *ExtTy =
+      IntegerType *ExtTy =
          IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
-      // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
        if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
          if (const SCEVConstant *Step =
-              dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this)))
-          if (!Step->getValue()->getValue()
-                .urem(RHSC->getValue()->getValue()) &&
+            dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
+          // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
+          const APInt &StepInt = Step->getValue()->getValue();
+          const APInt &DivInt = RHSC->getValue()->getValue();
+          if (!StepInt.urem(DivInt) &&
                getZeroExtendExpr(AR, ExtTy) ==
                getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
                              getZeroExtendExpr(Step, ExtTy),
@@ -1996,6 +2165,22 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
              return getAddRecExpr(Operands, AR->getLoop(),
                                   SCEV::FlagNW);
            }
+          /// Get a canonical UDivExpr for a recurrence.
+          /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
+          // We can currently only fold X%N if X is constant.
+          const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
+          if (StartC && !DivInt.urem(StepInt) &&
+              getZeroExtendExpr(AR, ExtTy) ==
+              getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
+                            getZeroExtendExpr(Step, ExtTy),
+                            AR->getLoop(), SCEV::FlagAnyWrap)) {
+            const APInt &StartInt = StartC->getValue()->getValue();
+            const APInt &StartRem = StartInt.urem(StepInt);
+            if (StartRem != 0)
+              LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step,
+                                  AR->getLoop(), SCEV::FlagNW);
+          }
+        }
        // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
        if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
          SmallVector<const SCEV *, 4> Operands;
@@ -2080,7 +2265,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
                                 const Loop *L, SCEV::NoWrapFlags Flags) {
    if (Operands.size() == 1) return Operands[0];
  #ifndef NDEBUG
-  const Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
+  Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
    for (unsigned i = 1, e = Operands.size(); i != e; ++i)
      assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
             "SCEVAddRecExpr operand types don't match!");
@@ -2198,7 +2383,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
    assert(!Ops.empty() && "Cannot get empty smax!");
    if (Ops.size() == 1) return Ops[0];
  #ifndef NDEBUG
-  const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+  Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
    for (unsigned i = 1, e = Ops.size(); i != e; ++i)
      assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
             "SCEVSMaxExpr operand types don't match!");
@@ -2302,7 +2487,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
    assert(!Ops.empty() && "Cannot get empty umax!");
    if (Ops.size() == 1) return Ops[0];
  #ifndef NDEBUG
-  const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+  Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
    for (unsigned i = 1, e = Ops.size(); i != e; ++i)
      assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
             "SCEVUMaxExpr operand types don't match!");
@@ -2405,7 +2590,7 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
    return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
  }
  
-const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) {
+const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) {
    // If we have TargetData, we can bypass creating a target-independent
    // constant expression and then folding it back into a ConstantInt.
    // This is just a compile-time optimization.
@@ -2415,22 +2600,22 @@ const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) {
  
    Constant *C = ConstantExpr::getSizeOf(AllocTy);
    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
        C = Folded;
-  const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+  Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
    return getTruncateOrZeroExtend(getSCEV(C), Ty);
  }
  
-const SCEV *ScalarEvolution::getAlignOfExpr(const Type *AllocTy) {
+const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) {
    Constant *C = ConstantExpr::getAlignOf(AllocTy);
    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
        C = Folded;
-  const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+  Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
    return getTruncateOrZeroExtend(getSCEV(C), Ty);
  }
  
-const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy,
+const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy,
                                               unsigned FieldNo) {
    // If we have TargetData, we can bypass creating a target-independent
    // constant expression and then folding it back into a ConstantInt.
@@ -2441,19 +2626,19 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy,
  
    Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
        C = Folded;
-  const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
+  Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
    return getTruncateOrZeroExtend(getSCEV(C), Ty);
  }
  
-const SCEV *ScalarEvolution::getOffsetOfExpr(const Type *CTy,
+const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy,
                                               Constant *FieldNo) {
    Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo);
    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
        C = Folded;
-  const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
+  Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
    return getTruncateOrZeroExtend(getSCEV(C), Ty);
  }
  
@@ -2487,14 +2672,14 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) {
  /// the SCEV framework. This primarily includes integer types, and it
  /// can optionally include pointer types if the ScalarEvolution class
  /// has access to target-specific information.
-bool ScalarEvolution::isSCEVable(const Type *Ty) const {
+bool ScalarEvolution::isSCEVable(Type *Ty) const {
    // Integers and pointers are always SCEVable.
    return Ty->isIntegerTy() || Ty->isPointerTy();
  }
  
  /// getTypeSizeInBits - Return the size in bits of the specified type,
  /// for which isSCEVable must return true.
-uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const {
+uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
    assert(isSCEVable(Ty) && "Type is not SCEVable!");
  
    // If we have a TargetData, use it!
@@ -2515,7 +2700,7 @@ uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const {
  /// the given type and which represents how SCEV will treat the given
  /// type, for which isSCEVable must return true. For pointer types,
  /// this is the pointer-sized integer type.
-const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const {
+Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
    assert(isSCEVable(Ty) && "Type is not SCEVable!");
  
    if (Ty->isIntegerTy())
@@ -2557,7 +2742,7 @@ const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
      return getConstant(
                 cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
  
-  const Type *Ty = V->getType();
+  Type *Ty = V->getType();
    Ty = getEffectiveSCEVType(Ty);
    return getMulExpr(V,
                    getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
@@ -2569,7 +2754,7 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
      return getConstant(
                  cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
  
-  const Type *Ty = V->getType();
+  Type *Ty = V->getType();
    Ty = getEffectiveSCEVType(Ty);
    const SCEV *AllOnes =
                     getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
@@ -2593,8 +2778,8 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
  /// input value to the specified type.  If the type must be extended, it is zero
  /// extended.
  const SCEV *
-ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, const Type *Ty) {
-  const Type *SrcTy = V->getType();
+ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) {
+  Type *SrcTy = V->getType();
    assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
           (Ty->isIntegerTy() || Ty->isPointerTy()) &&
           "Cannot truncate or zero extend with non-integer arguments!");
@@ -2610,8 +2795,8 @@ ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, const Type *Ty) {
  /// extended.
  const SCEV *
  ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
-                                         const Type *Ty) {
-  const Type *SrcTy = V->getType();
+                                         Type *Ty) {
+  Type *SrcTy = V->getType();
    assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
           (Ty->isIntegerTy() || Ty->isPointerTy()) &&
           "Cannot truncate or zero extend with non-integer arguments!");
@@ -2626,8 +2811,8 @@ ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
  /// input value to the specified type.  If the type must be extended, it is zero
  /// extended.  The conversion must not be narrowing.
  const SCEV *
-ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) {
-  const Type *SrcTy = V->getType();
+ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) {
+  Type *SrcTy = V->getType();
    assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
           (Ty->isIntegerTy() || Ty->isPointerTy()) &&
           "Cannot noop or zero extend with non-integer arguments!");
@@ -2642,8 +2827,8 @@ ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) {
  /// input value to the specified type.  If the type must be extended, it is sign
  /// extended.  The conversion must not be narrowing.
  const SCEV *
-ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) {
-  const Type *SrcTy = V->getType();
+ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) {
+  Type *SrcTy = V->getType();
    assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
           (Ty->isIntegerTy() || Ty->isPointerTy()) &&
           "Cannot noop or sign extend with non-integer arguments!");
@@ -2659,8 +2844,8 @@ ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) {
  /// it is extended with unspecified bits. The conversion must not be
  /// narrowing.
  const SCEV *
-ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) {
-  const Type *SrcTy = V->getType();
+ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) {
+  Type *SrcTy = V->getType();
    assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
           (Ty->isIntegerTy() || Ty->isPointerTy()) &&
           "Cannot noop or any extend with non-integer arguments!");
@@ -2674,8 +2859,8 @@ ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) {
  /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
  /// input value to the specified type.  The conversion must not be widening.
  const SCEV *
-ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) {
-  const Type *SrcTy = V->getType();
+ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) {
+  Type *SrcTy = V->getType();
    assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
           (Ty->isIntegerTy() || Ty->isPointerTy()) &&
           "Cannot truncate or noop with non-integer arguments!");
@@ -2942,7 +3127,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
    // PHI's incoming blocks are in a different loop, in which case doing so
    // risks breaking LCSSA form. Instcombine would normally zap these, but
    // it doesn't have DominatorTree information, so it may miss cases.
-  if (Value *V = SimplifyInstruction(PN, TD, DT))
+  if (Value *V = SimplifyInstruction(PN, TD, TLI, DT))
      if (LI->replacementPreservesLCSSAForm(PN, V))
        return getSCEV(V);
  
@@ -2961,7 +3146,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
    // context.
    bool isInBounds = GEP->isInBounds();
  
-  const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
+  Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
    Value *Base = GEP->getOperand(0);
    // Don't attempt to analyze GEPs over unsized objects.
    if (!cast<PointerType>(Base->getType())->getElementType()->isSized())
@@ -2973,7 +3158,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
         I != E; ++I) {
      Value *Index = *I;
      // Compute the (potentially symbolic) offset in bytes for this index.
-    if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
+    if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
        // For a struct, add the member offset.
        unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
        const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo);
@@ -3173,7 +3358,7 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
  
      // TODO: non-affine addrec
      if (AddRec->isAffine()) {
-      const Type *Ty = AddRec->getType();
+      Type *Ty = AddRec->getType();
        const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
        if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
            getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
@@ -3325,7 +3510,7 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
  
      // TODO: non-affine addrec
      if (AddRec->isAffine()) {
-      const Type *Ty = AddRec->getType();
+      Type *Ty = AddRec->getType();
        const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
        if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
            getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
@@ -3418,6 +3603,12 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
      // because it leads to N-1 getAddExpr calls for N ultimate operands.
      // Instead, gather up all the operands and make a single getAddExpr call.
      // LLVM IR canonical form means we need only traverse the left operands.
+    //
+    // Don't apply this instruction's NSW or NUW flags to the new
+    // expression. The instruction may be guarded by control flow that the
+    // no-wrap behavior depends on. Non-control-equivalent instructions can be
+    // mapped to the same SCEV expression, and it would be incorrect to transfer
+    // NSW/NUW semantics to those operations.
      SmallVector<const SCEV *, 4> AddOps;
      AddOps.push_back(getSCEV(U->getOperand(1)));
      for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
@@ -3435,7 +3626,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
      return getAddExpr(AddOps);
    }
    case Instruction::Mul: {
-    // See the Add code above.
+    // Don't transfer NSW/NUW for the same reason as AddExpr.
      SmallVector<const SCEV *, 4> MulOps;
      MulOps.push_back(getSCEV(U->getOperand(1)));
      for (Value *Op = U->getOperand(0);
@@ -3530,9 +3721,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
                LCI->getValue() == CI->getValue())
              if (const SCEVZeroExtendExpr *Z =
                    dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) {
-              const Type *UTy = U->getType();
+              Type *UTy = U->getType();
                const SCEV *Z0 = Z->getOperand();
-              const Type *Z0Ty = Z0->getType();
+              Type *Z0Ty = Z0->getType();
                unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
  
                // If C is a low-bits mask, the zero extend is serving to
@@ -3742,6 +3933,79 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
  //                   Iteration Count Computation Code
  //
  
+/// getSmallConstantTripCount - Returns the maximum trip count of this loop as a
+/// normal unsigned value. Returns 0 if the trip count is unknown or not
+/// constant. Will also return 0 if the maximum trip count is very large (>=
+/// 2^32).
+///
+/// This "trip count" assumes that control exits via ExitingBlock. More
+/// precisely, it is the number of times that control may reach ExitingBlock
+/// before taking the branch. For loops with multiple exits, it may not be the
+/// number times that the loop header executes because the loop may exit
+/// prematurely via another branch.
+unsigned ScalarEvolution::
+getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock) {
+  const SCEVConstant *ExitCount =
+    dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
+  if (!ExitCount)
+    return 0;
+
+  ConstantInt *ExitConst = ExitCount->getValue();
+
+  // Guard against huge trip counts.
+  if (ExitConst->getValue().getActiveBits() > 32)
+    return 0;
+
+  // In case of integer overflow, this returns 0, which is correct.
+  return ((unsigned)ExitConst->getZExtValue()) + 1;
+}
+
+/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
+/// trip count of this loop as a normal unsigned value, if possible. This
+/// means that the actual trip count is always a multiple of the returned
+/// value (don't forget the trip count could very well be zero as well!).
+///
+/// Returns 1 if the trip count is unknown or not guaranteed to be the
+/// multiple of a constant (which is also the case if the trip count is simply
+/// constant, use getSmallConstantTripCount for that case), Will also return 1
+/// if the trip count is very large (>= 2^32).
+///
+/// As explained in the comments for getSmallConstantTripCount, this assumes
+/// that control exits the loop via ExitingBlock.
+unsigned ScalarEvolution::
+getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock) {
+  const SCEV *ExitCount = getExitCount(L, ExitingBlock);
+  if (ExitCount == getCouldNotCompute())
+    return 1;
+
+  // Get the trip count from the BE count by adding 1.
+  const SCEV *TCMul = getAddExpr(ExitCount,
+                                 getConstant(ExitCount->getType(), 1));
+  // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
+  // to factor simple cases.
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
+    TCMul = Mul->getOperand(0);
+
+  const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul);
+  if (!MulC)
+    return 1;
+
+  ConstantInt *Result = MulC->getValue();
+
+  // Guard against huge trip counts.
+  if (!Result || Result->getValue().getActiveBits() > 32)
+    return 1;
+
+  return (unsigned)Result->getZExtValue();
+}
+
+// getExitCount - Get the expression for the number of loop iterations for which
+// this loop is guaranteed not to exit via ExitintBlock. Otherwise return
+// SCEVCouldNotCompute.
+const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) {
+  return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
+}
+
  /// getBackedgeTakenCount - If the specified loop has a predictable
  /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
  /// object. The backedge-taken count is the number of times the loop header
@@ -3754,14 +4018,14 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
  /// hasLoopInvariantBackedgeTakenCount).
  ///
  const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
-  return getBackedgeTakenInfo(L).Exact;
+  return getBackedgeTakenInfo(L).getExact(this);
  }
  
  /// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
  /// return the least SCEV value that is known never to be less than the
  /// actual backedge taken count.
  const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
-  return getBackedgeTakenInfo(L).Max;
+  return getBackedgeTakenInfo(L).getMax(this);
  }
  
  /// PushLoopPHIs - Push PHI nodes in the header of the given loop
@@ -3778,33 +4042,31 @@ PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
  
  const ScalarEvolution::BackedgeTakenInfo &
  ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
-  // Initially insert a CouldNotCompute for this loop. If the insertion
+  // Initially insert an invalid entry for this loop. If the insertion
    // succeeds, proceed to actually compute a backedge-taken count and
    // update the value. The temporary CouldNotCompute value tells SCEV
    // code elsewhere that it shouldn't attempt to request a new
    // backedge-taken count, which could result in infinite recursion.
-  std::pair<std::map<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
-    BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute()));
+  std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
+    BackedgeTakenCounts.insert(std::make_pair(L, BackedgeTakenInfo()));
    if (!Pair.second)
      return Pair.first->second;
  
-  BackedgeTakenInfo Result = getCouldNotCompute();
-  BackedgeTakenInfo Computed = ComputeBackedgeTakenCount(L);
-  if (Computed.Exact != getCouldNotCompute()) {
-    assert(isLoopInvariant(Computed.Exact, L) &&
-           isLoopInvariant(Computed.Max, L) &&
+  // ComputeBackedgeTakenCount may allocate memory for its result. Inserting it
+  // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result
+  // must be cleared in this scope.
+  BackedgeTakenInfo Result = ComputeBackedgeTakenCount(L);
+
+  if (Result.getExact(this) != getCouldNotCompute()) {
+    assert(isLoopInvariant(Result.getExact(this), L) &&
+           isLoopInvariant(Result.getMax(this), L) &&
             "Computed backedge-taken count isn't loop invariant for loop!");
      ++NumTripCountsComputed;
-
-    // Update the value in the map.
-    Result = Computed;
-  } else {
-    if (Computed.Max != getCouldNotCompute())
-      // Update the value in the map.
-      Result = Computed;
-    if (isa<PHINode>(L->getHeader()->begin()))
-      // Only count loops that have phi nodes as not being computable.
-      ++NumTripCountsNotComputed;
+  }
+  else if (Result.getMax(this) == getCouldNotCompute() &&
+           isa<PHINode>(L->getHeader()->begin())) {
+    // Only count loops that have phi nodes as not being computable.
+    ++NumTripCountsNotComputed;
    }
  
    // Now that we know more about the trip count for this loop, forget any
@@ -3812,7 +4074,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
    // conservative estimates made without the benefit of trip count
    // information. This is similar to the code in forgetLoop, except that
    // it handles SCEVUnknown PHI nodes specially.
-  if (Computed.hasAnyInfo()) {
+  if (Result.hasAnyInfo()) {
      SmallVector<Instruction *, 16> Worklist;
      PushLoopPHIs(L, Worklist);
  
@@ -3857,7 +4119,12 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
  /// compute a trip count, or if the loop is deleted.
  void ScalarEvolution::forgetLoop(const Loop *L) {
    // Drop any stored trip count value.
-  BackedgeTakenCounts.erase(L);
+  DenseMap<const Loop*, BackedgeTakenInfo>::iterator BTCPos =
+    BackedgeTakenCounts.find(L);
+  if (BTCPos != BackedgeTakenCounts.end()) {
+    BTCPos->second.clear();
+    BackedgeTakenCounts.erase(BTCPos);
+  }
  
    // Drop information about expressions based on loop-header PHIs.
    SmallVector<Instruction *, 16> Worklist;
@@ -3913,6 +4180,91 @@ void ScalarEvolution::forgetValue(Value *V) {
    }
  }
  
+/// getExact - Get the exact loop backedge taken count considering all loop
+/// exits. A computable result can only be return for loops with a single exit.
+/// Returning the minimum taken count among all exits is incorrect because one
+/// of the loop's exit limit's may have been skipped. HowFarToZero assumes that
+/// the limit of each loop test is never skipped. This is a valid assumption as
+/// long as the loop exits via that test. For precise results, it is the
+/// caller's responsibility to specify the relevant loop exit using
+/// getExact(ExitingBlock, SE).
+const SCEV *
+ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
+  // If any exits were not computable, the loop is not computable.
+  if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
+
+  // We need exactly one computable exit.
+  if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
+  assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
+
+  const SCEV *BECount = 0;
+  for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
+       ENT != 0; ENT = ENT->getNextExit()) {
+
+    assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
+
+    if (!BECount)
+      BECount = ENT->ExactNotTaken;
+    else if (BECount != ENT->ExactNotTaken)
+      return SE->getCouldNotCompute();
+  }
+  assert(BECount && "Invalid not taken count for loop exit");
+  return BECount;
+}
+
+/// getExact - Get the exact not taken count for this loop exit.
+const SCEV *
+ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
+                                             ScalarEvolution *SE) const {
+  for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
+       ENT != 0; ENT = ENT->getNextExit()) {
+
+    if (ENT->ExitingBlock == ExitingBlock)
+      return ENT->ExactNotTaken;
+  }
+  return SE->getCouldNotCompute();
+}
+
+/// getMax - Get the max backedge taken count for the loop.
+const SCEV *
+ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
+  return Max ? Max : SE->getCouldNotCompute();
+}
+
+/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
+/// computable exit into a persistent ExitNotTakenInfo array.
+ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
+  SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts,
+  bool Complete, const SCEV *MaxCount) : Max(MaxCount) {
+
+  if (!Complete)
+    ExitNotTaken.setIncomplete();
+
+  unsigned NumExits = ExitCounts.size();
+  if (NumExits == 0) return;
+
+  ExitNotTaken.ExitingBlock = ExitCounts[0].first;
+  ExitNotTaken.ExactNotTaken = ExitCounts[0].second;
+  if (NumExits == 1) return;
+
+  // Handle the rare case of multiple computable exits.
+  ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1];
+
+  ExitNotTakenInfo *PrevENT = &ExitNotTaken;
+  for (unsigned i = 1; i < NumExits; ++i, PrevENT = ENT, ++ENT) {
+    PrevENT->setNextExit(ENT);
+    ENT->ExitingBlock = ExitCounts[i].first;
+    ENT->ExactNotTaken = ExitCounts[i].second;
+  }
+}
+
+/// clear - Invalidate this result and free the ExitNotTakenInfo array.
+void ScalarEvolution::BackedgeTakenInfo::clear() {
+  ExitNotTaken.ExitingBlock = 0;
+  ExitNotTaken.ExactNotTaken = 0;
+  delete[] ExitNotTaken.getNextExit();
+}
+
  /// ComputeBackedgeTakenCount - Compute the number of times the backedge
  /// of the specified loop will execute.
  ScalarEvolution::BackedgeTakenInfo
@@ -3921,38 +4273,38 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
    L->getExitingBlocks(ExitingBlocks);
  
    // Examine all exits and pick the most conservative values.
-  const SCEV *BECount = getCouldNotCompute();
    const SCEV *MaxBECount = getCouldNotCompute();
-  bool CouldNotComputeBECount = false;
+  bool CouldComputeBECount = true;
+  SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
-    BackedgeTakenInfo NewBTI =
-      ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]);
-
-    if (NewBTI.Exact == getCouldNotCompute()) {
+    ExitLimit EL = ComputeExitLimit(L, ExitingBlocks[i]);
+    if (EL.Exact == getCouldNotCompute())
        // We couldn't compute an exact value for this exit, so
        // we won't be able to compute an exact value for the loop.
-      CouldNotComputeBECount = true;
-      BECount = getCouldNotCompute();
-    } else if (!CouldNotComputeBECount) {
-      if (BECount == getCouldNotCompute())
-        BECount = NewBTI.Exact;
-      else
-        BECount = getUMinFromMismatchedTypes(BECount, NewBTI.Exact);
-    }
+      CouldComputeBECount = false;
+    else
+      ExitCounts.push_back(std::make_pair(ExitingBlocks[i], EL.Exact));
+
      if (MaxBECount == getCouldNotCompute())
-      MaxBECount = NewBTI.Max;
-    else if (NewBTI.Max != getCouldNotCompute())
-      MaxBECount = getUMinFromMismatchedTypes(MaxBECount, NewBTI.Max);
+      MaxBECount = EL.Max;
+    else if (EL.Max != getCouldNotCompute()) {
+      // We cannot take the "min" MaxBECount, because non-unit stride loops may
+      // skip some loop tests. Taking the max over the exits is sufficiently
+      // conservative.  TODO: We could do better taking into consideration
+      // that (1) the loop has unit stride (2) the last loop test is
+      // less-than/greater-than (3) any loop test is less-than/greater-than AND
+      // falls-through some constant times less then the other tests.
+      MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max);
+    }
    }
  
-  return BackedgeTakenInfo(BECount, MaxBECount);
+  return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
  }
  
-/// ComputeBackedgeTakenCountFromExit - Compute the number of times the backedge
-/// of the specified loop will execute if it exits via the specified block.
-ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
-                                                   BasicBlock *ExitingBlock) {
+/// ComputeExitLimit - Compute the number of times the backedge of the specified
+/// loop will execute if it exits via the specified block.
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
  
    // Okay, we've chosen an exiting block.  See what condition causes us to
    // exit at this block.
@@ -4010,95 +4362,91 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
    }
  
    // Proceed to the next level to examine the exit condition expression.
-  return ComputeBackedgeTakenCountFromExitCond(L, ExitBr->getCondition(),
-                                               ExitBr->getSuccessor(0),
-                                               ExitBr->getSuccessor(1));
+  return ComputeExitLimitFromCond(L, ExitBr->getCondition(),
+                                  ExitBr->getSuccessor(0),
+                                  ExitBr->getSuccessor(1));
  }
  
-/// ComputeBackedgeTakenCountFromExitCond - Compute the number of times the
+/// ComputeExitLimitFromCond - Compute the number of times the
  /// backedge of the specified loop will execute if its exit condition
  /// were a conditional branch of ExitCond, TBB, and FBB.
-ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
-                                                       Value *ExitCond,
-                                                       BasicBlock *TBB,
-                                                       BasicBlock *FBB) {
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
+                                          Value *ExitCond,
+                                          BasicBlock *TBB,
+                                          BasicBlock *FBB) {
    // Check if the controlling expression for this loop is an And or Or.
    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
      if (BO->getOpcode() == Instruction::And) {
        // Recurse on the operands of the and.
-      BackedgeTakenInfo BTI0 =
-        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
-      BackedgeTakenInfo BTI1 =
-        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+      ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB);
+      ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB);
        const SCEV *BECount = getCouldNotCompute();
        const SCEV *MaxBECount = getCouldNotCompute();
        if (L->contains(TBB)) {
          // Both conditions must be true for the loop to continue executing.
          // Choose the less conservative count.
-        if (BTI0.Exact == getCouldNotCompute() ||
-            BTI1.Exact == getCouldNotCompute())
+        if (EL0.Exact == getCouldNotCompute() ||
+            EL1.Exact == getCouldNotCompute())
            BECount = getCouldNotCompute();
          else
-          BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
-        if (BTI0.Max == getCouldNotCompute())
-          MaxBECount = BTI1.Max;
-        else if (BTI1.Max == getCouldNotCompute())
-          MaxBECount = BTI0.Max;
+          BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
+        if (EL0.Max == getCouldNotCompute())
+          MaxBECount = EL1.Max;
+        else if (EL1.Max == getCouldNotCompute())
+          MaxBECount = EL0.Max;
          else
-          MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
+          MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
        } else {
          // Both conditions must be true at the same time for the loop to exit.
          // For now, be conservative.
          assert(L->contains(FBB) && "Loop block has no successor in loop!");
-        if (BTI0.Max == BTI1.Max)
-          MaxBECount = BTI0.Max;
-        if (BTI0.Exact == BTI1.Exact)
-          BECount = BTI0.Exact;
+        if (EL0.Max == EL1.Max)
+          MaxBECount = EL0.Max;
+        if (EL0.Exact == EL1.Exact)
+          BECount = EL0.Exact;
        }
  
-      return BackedgeTakenInfo(BECount, MaxBECount);
+      return ExitLimit(BECount, MaxBECount);
      }
      if (BO->getOpcode() == Instruction::Or) {
        // Recurse on the operands of the or.
-      BackedgeTakenInfo BTI0 =
-        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
-      BackedgeTakenInfo BTI1 =
-        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+      ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB);
+      ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB);
        const SCEV *BECount = getCouldNotCompute();
        const SCEV *MaxBECount = getCouldNotCompute();
        if (L->contains(FBB)) {
          // Both conditions must be false for the loop to continue executing.
          // Choose the less conservative count.
-        if (BTI0.Exact == getCouldNotCompute() ||
-            BTI1.Exact == getCouldNotCompute())
+        if (EL0.Exact == getCouldNotCompute() ||
+            EL1.Exact == getCouldNotCompute())
            BECount = getCouldNotCompute();
          else
-          BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
-        if (BTI0.Max == getCouldNotCompute())
-          MaxBECount = BTI1.Max;
-        else if (BTI1.Max == getCouldNotCompute())
-          MaxBECount = BTI0.Max;
+          BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
+        if (EL0.Max == getCouldNotCompute())
+          MaxBECount = EL1.Max;
+        else if (EL1.Max == getCouldNotCompute())
+          MaxBECount = EL0.Max;
          else
-          MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
+          MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
        } else {
          // Both conditions must be false at the same time for the loop to exit.
          // For now, be conservative.
          assert(L->contains(TBB) && "Loop block has no successor in loop!");
-        if (BTI0.Max == BTI1.Max)
-          MaxBECount = BTI0.Max;
-        if (BTI0.Exact == BTI1.Exact)
-          BECount = BTI0.Exact;
+        if (EL0.Max == EL1.Max)
+          MaxBECount = EL0.Max;
+        if (EL0.Exact == EL1.Exact)
+          BECount = EL0.Exact;
        }
  
-      return BackedgeTakenInfo(BECount, MaxBECount);
+      return ExitLimit(BECount, MaxBECount);
      }
    }
  
    // With an icmp, it may be feasible to compute an exact backedge-taken count.
    // Proceed to the next level to examine the icmp.
    if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
-    return ComputeBackedgeTakenCountFromExitCondICmp(L, ExitCondICmp, TBB, FBB);
+    return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB);
  
    // Check for a constant condition. These are normally stripped out by
    // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
@@ -4114,17 +4462,17 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
    }
  
    // If it's not an integer or pointer comparison then compute it the hard way.
-  return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
+  return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
  }
  
-/// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the
+/// ComputeExitLimitFromICmp - Compute the number of times the
  /// backedge of the specified loop will execute if its exit condition
  /// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
-ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
-                                                           ICmpInst *ExitCond,
-                                                           BasicBlock *TBB,
-                                                           BasicBlock *FBB) {
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
+                                          ICmpInst *ExitCond,
+                                          BasicBlock *TBB,
+                                          BasicBlock *FBB) {
  
    // If the condition was exit on true, convert the condition to exit on false
    ICmpInst::Predicate Cond;
@@ -4136,8 +4484,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
    // Handle common loops like: for (X = "string"; *X; ++X)
    if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
      if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
-      BackedgeTakenInfo ItCnt =
-        ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond);
+      ExitLimit ItCnt =
+        ComputeLoadConstantCompareExitLimit(LI, RHS, L, Cond);
        if (ItCnt.hasAnyInfo())
          return ItCnt;
      }
@@ -4176,36 +4524,36 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
    switch (Cond) {
    case ICmpInst::ICMP_NE: {                     // while (X != Y)
      // Convert to: while (X-Y != 0)
-    BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L);
-    if (BTI.hasAnyInfo()) return BTI;
+    ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L);
+    if (EL.hasAnyInfo()) return EL;
      break;
    }
    case ICmpInst::ICMP_EQ: {                     // while (X == Y)
      // Convert to: while (X-Y == 0)
-    BackedgeTakenInfo BTI = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
-    if (BTI.hasAnyInfo()) return BTI;
+    ExitLimit EL = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
+    if (EL.hasAnyInfo()) return EL;
      break;
    }
    case ICmpInst::ICMP_SLT: {
-    BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, true);
-    if (BTI.hasAnyInfo()) return BTI;
+    ExitLimit EL = HowManyLessThans(LHS, RHS, L, true);
+    if (EL.hasAnyInfo()) return EL;
      break;
    }
    case ICmpInst::ICMP_SGT: {
-    BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS),
+    ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
                                               getNotSCEV(RHS), L, true);
-    if (BTI.hasAnyInfo()) return BTI;
+    if (EL.hasAnyInfo()) return EL;
      break;
    }
    case ICmpInst::ICMP_ULT: {
-    BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, false);
-    if (BTI.hasAnyInfo()) return BTI;
+    ExitLimit EL = HowManyLessThans(LHS, RHS, L, false);
+    if (EL.hasAnyInfo()) return EL;
      break;
    }
    case ICmpInst::ICMP_UGT: {
-    BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS),
+    ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
                                               getNotSCEV(RHS), L, false);
-    if (BTI.hasAnyInfo()) return BTI;
+    if (EL.hasAnyInfo()) return EL;
      break;
    }
    default:
@@ -4219,8 +4567,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
  #endif
      break;
    }
-  return
-    ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
+  return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
  }
  
  static ConstantInt *
@@ -4233,49 +4580,16 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
    return cast<SCEVConstant>(Val)->getValue();
  }
  
-/// GetAddressedElementFromGlobal - Given a global variable with an initializer
-/// and a GEP expression (missing the pointer index) indexing into it, return
-/// the addressed element of the initializer or null if the index expression is
-/// invalid.
-static Constant *
-GetAddressedElementFromGlobal(GlobalVariable *GV,
-                              const std::vector<ConstantInt*> &Indices) {
-  Constant *Init = GV->getInitializer();
-  for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
-    uint64_t Idx = Indices[i]->getZExtValue();
-    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
-      assert(Idx < CS->getNumOperands() && "Bad struct index!");
-      Init = cast<Constant>(CS->getOperand(Idx));
-    } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
-      if (Idx >= CA->getNumOperands()) return 0;  // Bogus program
-      Init = cast<Constant>(CA->getOperand(Idx));
-    } else if (isa<ConstantAggregateZero>(Init)) {
-      if (const StructType *STy = dyn_cast<StructType>(Init->getType())) {
-        assert(Idx < STy->getNumElements() && "Bad struct index!");
-        Init = Constant::getNullValue(STy->getElementType(Idx));
-      } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) {
-        if (Idx >= ATy->getNumElements()) return 0;  // Bogus program
-        Init = Constant::getNullValue(ATy->getElementType());
-      } else {
-        llvm_unreachable("Unknown constant aggregate type!");
-      }
-      return 0;
-    } else {
-      return 0; // Unknown initializer type
-    }
-  }
-  return Init;
-}
-
-/// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of
+/// ComputeLoadConstantCompareExitLimit - Given an exit condition of
  /// 'icmp op load X, cst', try to see if we can compute the backedge
  /// execution count.
-ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
-                                                LoadInst *LI,
-                                                Constant *RHS,
-                                                const Loop *L,
-                                                ICmpInst::Predicate predicate) {
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeLoadConstantCompareExitLimit(
+  LoadInst *LI,
+  Constant *RHS,
+  const Loop *L,
+  ICmpInst::Predicate predicate) {
+
    if (LI->isVolatile()) return getCouldNotCompute();
  
    // Check to see if the loaded pointer is a getelementptr of a global.
@@ -4293,7 +4607,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
  
    // Okay, we allow one non-constant index into the GEP instruction.
    Value *VarIdx = 0;
-  std::vector<ConstantInt*> Indexes;
+  std::vector<Constant*> Indexes;
    unsigned VarIdxNum = 0;
    for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
      if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
@@ -4327,7 +4641,8 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
      // Form the GEP offset.
      Indexes[VarIdxNum] = Val;
  
-    Constant *Result = GetAddressedElementFromGlobal(GV, Indexes);
+    Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
+                                                         Indexes);
      if (Result == 0) break;  // Cannot compute!
  
      // Evaluate the condition for this iteration.
@@ -4351,7 +4666,8 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
  /// specified type, assuming that all operands were constants.
  static bool CanConstantFold(const Instruction *I) {
    if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
-      isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I))
+      isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
+      isa<LoadInst>(I))
      return true;
  
    if (const CallInst *CI = dyn_cast<CallInst>(I))
@@ -4360,69 +4676,129 @@ static bool CanConstantFold(const Instruction *I) {
    return false;
  }
  
-/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
-/// in the loop that V is derived from.  We allow arbitrary operations along the
-/// way, but the operands of an operation must either be constants or a value
-/// derived from a constant PHI.  If this expression does not fit with these
-/// constraints, return null.
-static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
-  // If this is not an instruction, or if this is an instruction outside of the
-  // loop, it can't be derived from a loop PHI.
-  Instruction *I = dyn_cast<Instruction>(V);
-  if (I == 0 || !L->contains(I)) return 0;
+/// Determine whether this instruction can constant evolve within this loop
+/// assuming its operands can all constant evolve.
+static bool canConstantEvolve(Instruction *I, const Loop *L) {
+  // An instruction outside of the loop can't be derived from a loop PHI.
+  if (!L->contains(I)) return false;
  
-  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+  if (isa<PHINode>(I)) {
      if (L->getHeader() == I->getParent())
-      return PN;
+      return true;
      else
        // We don't currently keep track of the control flow needed to evaluate
        // PHIs, so we cannot handle PHIs inside of loops.
-      return 0;
+      return false;
    }
  
    // If we won't be able to constant fold this expression even if the operands
-  // are constants, return early.
-  if (!CanConstantFold(I)) return 0;
+  // are constants, bail early.
+  return CanConstantFold(I);
+}
+
+/// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by
+/// recursing through each instruction operand until reaching a loop header phi.
+static PHINode *
+getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
+                               DenseMap<Instruction *, PHINode *> &PHIMap) {
  
    // Otherwise, we can evaluate this instruction if all of its operands are
    // constant or derived from a PHI node themselves.
    PHINode *PHI = 0;
-  for (unsigned Op = 0, e = I->getNumOperands(); Op != e; ++Op)
-    if (!isa<Constant>(I->getOperand(Op))) {
-      PHINode *P = getConstantEvolvingPHI(I->getOperand(Op), L);
-      if (P == 0) return 0;  // Not evolving from PHI
-      if (PHI == 0)
-        PHI = P;
-      else if (PHI != P)
-        return 0;  // Evolving from multiple different PHIs.
+  for (Instruction::op_iterator OpI = UseInst->op_begin(),
+         OpE = UseInst->op_end(); OpI != OpE; ++OpI) {
+
+    if (isa<Constant>(*OpI)) continue;
+
+    Instruction *OpInst = dyn_cast<Instruction>(*OpI);
+    if (!OpInst || !canConstantEvolve(OpInst, L)) return 0;
+
+    PHINode *P = dyn_cast<PHINode>(OpInst);
+    if (!P)
+      // If this operand is already visited, reuse the prior result.
+      // We may have P != PHI if this is the deepest point at which the
+      // inconsistent paths meet.
+      P = PHIMap.lookup(OpInst);
+    if (!P) {
+      // Recurse and memoize the results, whether a phi is found or not.
+      // This recursive call invalidates pointers into PHIMap.
+      P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap);
+      PHIMap[OpInst] = P;
      }
-
+    if (P == 0) return 0;        // Not evolving from PHI
+    if (PHI && PHI != P) return 0;  // Evolving from multiple different PHIs.
+    PHI = P;
+  }
    // This is a expression evolving from a constant PHI!
    return PHI;
  }
  
+/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
+/// in the loop that V is derived from.  We allow arbitrary operations along the
+/// way, but the operands of an operation must either be constants or a value
+/// derived from a constant PHI.  If this expression does not fit with these
+/// constraints, return null.
+static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0 || !canConstantEvolve(I, L)) return 0;
+
+  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+    return PN;
+  }
+
+  // Record non-constant instructions contained by the loop.
+  DenseMap<Instruction *, PHINode *> PHIMap;
+  return getConstantEvolvingPHIOperands(I, L, PHIMap);
+}
+
  /// EvaluateExpression - Given an expression that passes the
  /// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
  /// in the loop has the value PHIVal.  If we can't fold this expression for some
  /// reason, return null.
-static Constant *EvaluateExpression(Value *V, Constant *PHIVal,
-                                    const TargetData *TD) {
-  if (isa<PHINode>(V)) return PHIVal;
+static Constant *EvaluateExpression(Value *V, const Loop *L,
+                                    DenseMap<Instruction *, Constant *> &Vals,
+                                    const TargetData *TD,
+                                    const TargetLibraryInfo *TLI) {
+  // Convenient constant check, but redundant for recursive calls.
    if (Constant *C = dyn_cast<Constant>(V)) return C;
-  Instruction *I = cast<Instruction>(V);
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return 0;
+
+  if (Constant *C = Vals.lookup(I)) return C;
+
+  // An instruction inside the loop depends on a value outside the loop that we
+  // weren't given a mapping for, or a value such as a call inside the loop.
+  if (!canConstantEvolve(I, L)) return 0;
+
+  // An unmapped PHI can be due to a branch or another loop inside this loop,
+  // or due to this not being the initial iteration through a loop where we
+  // couldn't compute the evolution of this particular PHI last time.
+  if (isa<PHINode>(I)) return 0;
  
    std::vector<Constant*> Operands(I->getNumOperands());
  
    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
-    Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal, TD);
-    if (Operands[i] == 0) return 0;
+    Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i));
+    if (!Operand) {
+      Operands[i] = dyn_cast<Constant>(I->getOperand(i));
+      if (!Operands[i]) return 0;
+      continue;
+    }
+    Constant *C = EvaluateExpression(Operand, L, Vals, TD, TLI);
+    Vals[Operand] = C;
+    if (!C) return 0;
+    Operands[i] = C;
    }
  
-  if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+  if (CmpInst *CI = dyn_cast<CmpInst>(I))
      return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
-                                           Operands[1], TD);
-  return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
-                                  &Operands[0], Operands.size(), TD);
+                                           Operands[1], TD, TLI);
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    if (!LI->isVolatile())
+      return ConstantFoldLoadFromConstPtr(Operands[0], TD);
+  }
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD,
+                                  TLI);
  }
  
  /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
@@ -4433,7 +4809,7 @@ Constant *
  ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
                                                     const APInt &BEs,
                                                     const Loop *L) {
-  std::map<PHINode*, Constant*>::const_iterator I =
+  DenseMap<PHINode*, Constant*>::const_iterator I =
      ConstantEvolutionLoopExitValue.find(PN);
    if (I != ConstantEvolutionLoopExitValue.end())
      return I->second;
@@ -4443,19 +4819,26 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
  
    Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
  
+  DenseMap<Instruction *, Constant *> CurrentIterVals;
+  BasicBlock *Header = L->getHeader();
+  assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
+
    // Since the loop is canonicalized, the PHI node must have two entries.  One
    // entry must be a constant (coming in from outside of the loop), and the
    // second must be derived from the same PHI.
    bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
-  Constant *StartCST =
-    dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge));
-  if (StartCST == 0)
-    return RetVal = 0;  // Must be a constant.
+  PHINode *PHI = 0;
+  for (BasicBlock::iterator I = Header->begin();
+       (PHI = dyn_cast<PHINode>(I)); ++I) {
+    Constant *StartCST =
+      dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
+    if (StartCST == 0) continue;
+    CurrentIterVals[PHI] = StartCST;
+  }
+  if (!CurrentIterVals.count(PN))
+    return RetVal = 0;
  
    Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
-  if (getConstantEvolvingPHI(BEValue, L) != PN &&
-      !isa<Constant>(BEValue))
-    return RetVal = 0;  // Not derived from same PHI.
  
    // Execute the loop symbolically to determine the exit value.
    if (BEs.getActiveBits() >= 32)
@@ -4463,29 +4846,62 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
  
    unsigned NumIterations = BEs.getZExtValue(); // must be in range
    unsigned IterationNum = 0;
-  for (Constant *PHIVal = StartCST; ; ++IterationNum) {
+  for (; ; ++IterationNum) {
      if (IterationNum == NumIterations)
-      return RetVal = PHIVal;  // Got exit value!
+      return RetVal = CurrentIterVals[PN];  // Got exit value!
  
-    // Compute the value of the PHI node for the next iteration.
-    Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD);
-    if (NextPHI == PHIVal)
-      return RetVal = NextPHI;  // Stopped evolving!
+    // Compute the value of the PHIs for the next iteration.
+    // EvaluateExpression adds non-phi values to the CurrentIterVals map.
+    DenseMap<Instruction *, Constant *> NextIterVals;
+    Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD,
+                                           TLI);
      if (NextPHI == 0)
        return 0;        // Couldn't evaluate!
-    PHIVal = NextPHI;
+    NextIterVals[PN] = NextPHI;
+
+    bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
+
+    // Also evaluate the other PHI nodes.  However, we don't get to stop if we
+    // cease to be able to evaluate one of them or if they stop evolving,
+    // because that doesn't necessarily prevent us from computing PN.
+    SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute;
+    for (DenseMap<Instruction *, Constant *>::const_iterator
+           I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
+      PHINode *PHI = dyn_cast<PHINode>(I->first);
+      if (!PHI || PHI == PN || PHI->getParent() != Header) continue;
+      PHIsToCompute.push_back(std::make_pair(PHI, I->second));
+    }
+    // We use two distinct loops because EvaluateExpression may invalidate any
+    // iterators into CurrentIterVals.
+    for (SmallVectorImpl<std::pair<PHINode *, Constant*> >::const_iterator
+             I = PHIsToCompute.begin(), E = PHIsToCompute.end(); I != E; ++I) {
+      PHINode *PHI = I->first;
+      Constant *&NextPHI = NextIterVals[PHI];
+      if (!NextPHI) {   // Not already computed.
+        Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
+        NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
+      }
+      if (NextPHI != I->second)
+        StoppedEvolving = false;
+    }
+
+    // If all entries in CurrentIterVals == NextIterVals then we can stop
+    // iterating, the loop can't continue to change.
+    if (StoppedEvolving)
+      return RetVal = CurrentIterVals[PN];
+
+    CurrentIterVals.swap(NextIterVals);
    }
  }
  
-/// ComputeBackedgeTakenCountExhaustively - If the loop is known to execute a
+/// ComputeExitCountExhaustively - If the loop is known to execute a
  /// constant number of times (the condition evolves only from constants),
  /// try to evaluate a few iterations of the loop until we get the exit
  /// condition gets a value of ExitWhen (true or false).  If we cannot
  /// evaluate the trip count of the loop, return getCouldNotCompute().
-const SCEV *
-ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
-                                                       Value *Cond,
-                                                       bool ExitWhen) {
+const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
+                                                          Value *Cond,
+                                                          bool ExitWhen) {
    PHINode *PN = getConstantEvolvingPHI(Cond, L);
    if (PN == 0) return getCouldNotCompute();
  
@@ -4493,27 +4909,33 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
    // That's the only form we support here.
    if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();
  
+  DenseMap<Instruction *, Constant *> CurrentIterVals;
+  BasicBlock *Header = L->getHeader();
+  assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
+
    // One entry must be a constant (coming in from outside of the loop), and the
    // second must be derived from the same PHI.
    bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
-  Constant *StartCST =
-    dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge));
-  if (StartCST == 0) return getCouldNotCompute();  // Must be a constant.
-
-  Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
-  if (getConstantEvolvingPHI(BEValue, L) != PN &&
-      !isa<Constant>(BEValue))
-    return getCouldNotCompute();  // Not derived from same PHI.
+  PHINode *PHI = 0;
+  for (BasicBlock::iterator I = Header->begin();
+       (PHI = dyn_cast<PHINode>(I)); ++I) {
+    Constant *StartCST =
+      dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
+    if (StartCST == 0) continue;
+    CurrentIterVals[PHI] = StartCST;
+  }
+  if (!CurrentIterVals.count(PN))
+    return getCouldNotCompute();
  
    // Okay, we find a PHI node that defines the trip count of this loop.  Execute
    // the loop symbolically to determine when the condition gets a value of
    // "ExitWhen".
-  unsigned IterationNum = 0;
+
    unsigned MaxIterations = MaxBruteForceIterations;   // Limit analysis.
-  for (Constant *PHIVal = StartCST;
-       IterationNum != MaxIterations; ++IterationNum) {
+  for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
      ConstantInt *CondVal =
-      dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, PHIVal, TD));
+      dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, CurrentIterVals,
+                                                       TD, TLI));
  
      // Couldn't symbolically evaluate.
      if (!CondVal) return getCouldNotCompute();
@@ -4523,11 +4945,29 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
        return getConstant(Type::getInt32Ty(getContext()), IterationNum);
      }
  
-    // Compute the value of the PHI node for the next iteration.
-    Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD);
-    if (NextPHI == 0 || NextPHI == PHIVal)
-      return getCouldNotCompute();// Couldn't evaluate or not making progress...
-    PHIVal = NextPHI;
+    // Update all the PHI nodes for the next iteration.
+    DenseMap<Instruction *, Constant *> NextIterVals;
+
+    // Create a list of which PHIs we need to compute. We want to do this before
+    // calling EvaluateExpression on them because that may invalidate iterators
+    // into CurrentIterVals.
+    SmallVector<PHINode *, 8> PHIsToCompute;
+    for (DenseMap<Instruction *, Constant *>::const_iterator
+           I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
+      PHINode *PHI = dyn_cast<PHINode>(I->first);
+      if (!PHI || PHI->getParent() != Header) continue;
+      PHIsToCompute.push_back(PHI);
+    }
+    for (SmallVectorImpl<PHINode *>::const_iterator I = PHIsToCompute.begin(),
+             E = PHIsToCompute.end(); I != E; ++I) {
+      PHINode *PHI = *I;
+      Constant *&NextPHI = NextIterVals[PHI];
+      if (NextPHI) continue;    // Already computed!
+
+      Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
+      NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
+    }
+    CurrentIterVals.swap(NextIterVals);
    }
  
    // Too many iterations were needed to evaluate.
@@ -4558,6 +4998,98 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
    return C;
  }
  
+/// This builds up a Constant using the ConstantExpr interface.  That way, we
+/// will return Constants for objects which aren't represented by a
+/// SCEVConstant, because SCEVConstant is restricted to ConstantInt.
+/// Returns NULL if the SCEV isn't representable as a Constant.
+static Constant *BuildConstantFromSCEV(const SCEV *V) {
+  switch (V->getSCEVType()) {
+    default:  // TODO: smax, umax.
+    case scCouldNotCompute:
+    case scAddRecExpr:
+      break;
+    case scConstant:
+      return cast<SCEVConstant>(V)->getValue();
+    case scUnknown:
+      return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
+    case scSignExtend: {
+      const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V);
+      if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand()))
+        return ConstantExpr::getSExt(CastOp, SS->getType());
+      break;
+    }
+    case scZeroExtend: {
+      const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V);
+      if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand()))
+        return ConstantExpr::getZExt(CastOp, SZ->getType());
+      break;
+    }
+    case scTruncate: {
+      const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V);
+      if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand()))
+        return ConstantExpr::getTrunc(CastOp, ST->getType());
+      break;
+    }
+    case scAddExpr: {
+      const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
+      if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
+        if (C->getType()->isPointerTy())
+          C = ConstantExpr::getBitCast(C, Type::getInt8PtrTy(C->getContext()));
+        for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
+          Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
+          if (!C2) return 0;
+
+          // First pointer!
+          if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
+            std::swap(C, C2);
+            // The offsets have been converted to bytes.  We can add bytes to an
+            // i8* by GEP with the byte count in the first index.
+            C = ConstantExpr::getBitCast(C,Type::getInt8PtrTy(C->getContext()));
+          }
+
+          // Don't bother trying to sum two pointers. We probably can't
+          // statically compute a load that results from it anyway.
+          if (C2->getType()->isPointerTy())
+            return 0;
+
+          if (C->getType()->isPointerTy()) {
+            if (cast<PointerType>(C->getType())->getElementType()->isStructTy())
+              C2 = ConstantExpr::getIntegerCast(
+                  C2, Type::getInt32Ty(C->getContext()), true);
+            C = ConstantExpr::getGetElementPtr(C, C2);
+          } else
+            C = ConstantExpr::getAdd(C, C2);
+        }
+        return C;
+      }
+      break;
+    }
+    case scMulExpr: {
+      const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
+      if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
+        // Don't bother with pointers at all.
+        if (C->getType()->isPointerTy()) return 0;
+        for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
+          Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
+          if (!C2 || C2->getType()->isPointerTy()) return 0;
+          C = ConstantExpr::getMul(C, C2);
+        }
+        return C;
+      }
+      break;
+    }
+    case scUDivExpr: {
+      const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V);
+      if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS()))
+        if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS()))
+          if (LHS->getType() == RHS->getType())
+            return ConstantExpr::getUDiv(LHS, RHS);
+      break;
+    }
+  }
+  return 0;
+}
+
  const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
    if (isa<SCEVConstant>(V)) return V;
  
@@ -4610,11 +5142,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
            const SCEV *OpV = getSCEVAtScope(OrigV, L);
            MadeImprovement |= OrigV != OpV;
  
-          Constant *C = 0;
-          if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV))
-            C = SC->getValue();
-          if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(OpV))
-            C = dyn_cast<Constant>(SU->getValue());
+          Constant *C = BuildConstantFromSCEV(OpV);
            if (!C) return V;
            if (C->getType() != Op->getType())
              C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
@@ -4629,10 +5157,14 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
            Constant *C = 0;
            if (const CmpInst *CI = dyn_cast<CmpInst>(I))
              C = ConstantFoldCompareInstOperands(CI->getPredicate(),
-                                                Operands[0], Operands[1], TD);
-          else
+                                                Operands[0], Operands[1], TD,
+                                                TLI);
+          else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+            if (!LI->isVolatile())
+              C = ConstantFoldLoadFromConstPtr(Operands[0], TD);
+          } else
              C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
-                                         &Operands[0], Operands.size(), TD);
+                                         Operands, TD, TLI);
            if (!C) return V;
            return getSCEV(C);
          }
@@ -4750,7 +5282,6 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
    }
  
    llvm_unreachable("Unknown SCEV type!");
-  return 0;
  }
  
  /// getSCEVAtScope - This is a convenience function which does
@@ -4854,7 +5385,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
      // Compute the two solutions for the quadratic formula.
      // The divisions must be performed as signed divisions.
      APInt NegB(-B);
-    APInt TwoA( A << 1 );
+    APInt TwoA(A << 1);
      if (TwoA.isMinValue()) {
        const SCEV *CNC = SE.getCouldNotCompute();
        return std::make_pair(CNC, CNC);
@@ -4869,7 +5400,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
  
      return std::make_pair(SE.getConstant(Solution1),
                            SE.getConstant(Solution2));
-    } // end APIntOps namespace
+  } // end APIntOps namespace
  }
  
  /// HowFarToZero - Return the number of times a backedge comparing the specified
@@ -4879,7 +5410,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
  /// now expressed as a single expression, V = x-y. So the exit test is
  /// effectively V != 0.  We know and take advantage of the fact that this
  /// expression only being used in a comparison by zero context.
-ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ExitLimit
  ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
    // If the value is a constant
    if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
@@ -4963,8 +5494,19 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
    // Handle unitary steps, which cannot wraparound.
    // 1*N = -Start; -1*N = Start (mod 2^BW), so:
    //   N = Distance (as unsigned)
-  if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue())
-    return Distance;
+  if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) {
+    ConstantRange CR = getUnsignedRange(Start);
+    const SCEV *MaxBECount;
+    if (!CountDown && CR.getUnsignedMin().isMinValue())
+      // When counting up, the worst starting value is 1, not 0.
+      MaxBECount = CR.getUnsignedMax().isMinValue()
+        ? getConstant(APInt::getMinValue(CR.getBitWidth()))
+        : getConstant(APInt::getMaxValue(CR.getBitWidth()));
+    else
+      MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
+                                         : -CR.getUnsignedMin());
+    return ExitLimit(Distance, MaxBECount);
+  }
  
    // If the recurrence is known not to wraparound, unsigned divide computes the
    // back edge count. We know that the value will either become zero (and thus
@@ -4976,10 +5518,10 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
    // behavior. Loops must exhibit defined behavior until a wrapped value is
    // actually used. So the trip count computed by udiv could be smaller than the
    // number of well-defined iterations.
-  if (AddRec->getNoWrapFlags(SCEV::FlagNW))
+  if (AddRec->getNoWrapFlags(SCEV::FlagNW)) {
      // FIXME: We really want an "isexact" bit for udiv.
      return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
-
+  }
    // Then, try to solve the above equation provided that Start is constant.
    if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
      return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
@@ -4991,7 +5533,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
  /// HowFarToNonZero - Return the number of times a backedge checking the
  /// specified value for nonzero will execute.  If not computable, return
  /// CouldNotCompute
-ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ExitLimit
  ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
    // Loops that look like: while (X == 0) are very strange indeed.  We don't
    // handle them yet except for the trivial case.  This could be expanded in the
@@ -5370,7 +5912,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
    switch (Pred) {
    default:
      llvm_unreachable("Unexpected ICmpInst::Predicate value!");
-    break;
    case ICmpInst::ICMP_SGT:
      Pred = ICmpInst::ICMP_SLT;
      std::swap(LHS, RHS);
@@ -5670,7 +6211,7 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
    assert(!isKnownNegative(Step) &&
           "This code doesn't handle negative strides yet!");
  
-  const Type *Ty = Start->getType();
+  Type *Ty = Start->getType();
  
    // When Start == End, we have an exact BECount == 0. Short-circuit this case
    // here because SCEV may not be able to determine that the unsigned division
@@ -5689,7 +6230,7 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
    if (!NoWrap) {
      // Check Add for unsigned overflow.
      // TODO: More sophisticated things could be done here.
-    const Type *WideTy = IntegerType::get(getContext(),
+    Type *WideTy = IntegerType::get(getContext(),
                                            getTypeSizeInBits(Ty) + 1);
      const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy);
      const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy);
@@ -5704,7 +6245,7 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
  /// HowManyLessThans - Return the number of times a backedge containing the
  /// specified less-than comparison will execute.  If not computable, return
  /// CouldNotCompute.
-ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ExitLimit
  ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
                                    const Loop *L, bool isSigned) {
    // Only handle:  "ADDREC < LoopInvariant".
@@ -5715,8 +6256,9 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
      return getCouldNotCompute();
  
    // Check to see if we have a flag which makes analysis easy.
-  bool NoWrap = isSigned ? AddRec->getNoWrapFlags(SCEV::FlagNSW) :
-                           AddRec->getNoWrapFlags(SCEV::FlagNUW);
+  bool NoWrap = isSigned ?
+    AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNW)) :
+    AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNW));
  
    if (AddRec->isAffine()) {
      unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
@@ -5811,7 +6353,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
      if (isa<SCEVCouldNotCompute>(MaxBECount))
        MaxBECount = BECount;
  
-    return BackedgeTakenInfo(BECount, MaxBECount);
+    return ExitLimit(BECount, MaxBECount);
    }
  
    return getCouldNotCompute();
@@ -6007,6 +6549,7 @@ bool ScalarEvolution::runOnFunction(Function &F) {
    this->F = &F;
    LI = &getAnalysis<LoopInfo>();
    TD = getAnalysisIfAvailable<TargetData>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
    DT = &getAnalysis<DominatorTree>();
    return false;
  }
@@ -6019,6 +6562,15 @@ void ScalarEvolution::releaseMemory() {
    FirstUnknown = 0;
  
    ValueExprMap.clear();
+
+  // Free any extra memory created for ExitNotTakenInfo in the unlikely event
+  // that a loop had multiple computable exits.
+  for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
+         BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end();
+       I != E; ++I) {
+    I->second.clear();
+  }
+
    BackedgeTakenCounts.clear();
    ConstantEvolutionLoopExitValue.clear();
    ValuesAtScopes.clear();
@@ -6034,6 +6586,7 @@ void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
    AU.setPreservesAll();
    AU.addRequiredTransitive<LoopInfo>();
    AU.addRequiredTransitive<DominatorTree>();
+  AU.addRequired<TargetLibraryInfo>();
  }
  
  bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
@@ -6209,11 +6762,8 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
      return LoopInvariant;
    case scCouldNotCompute:
      llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-    return LoopVariant;
-  default: break;
+  default: llvm_unreachable("Unknown SCEV kind!");
    }
-  llvm_unreachable("Unknown SCEV kind!");
-  return LoopVariant;
  }
  
  bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
@@ -6295,11 +6845,9 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
      return ProperlyDominatesBlock;
    case scCouldNotCompute:
      llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-    return DoesNotDominateBlock;
-  default: break;
+  default: 
+    llvm_unreachable("Unknown SCEV kind!");
    }
-  llvm_unreachable("Unknown SCEV kind!");
-  return DoesNotDominateBlock;
  }
  
  bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
@@ -6345,11 +6893,9 @@ bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
      return false;
    case scCouldNotCompute:
      llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-    return false;
-  default: break;
+  default:
+    llvm_unreachable("Unknown SCEV kind!");
    }
-  llvm_unreachable("Unknown SCEV kind!");
-  return false;
  }
  
  void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {