Make all pointers to TargetRegisterClass const since they are all pointers to static...

[oota-llvm.git] / lib / Transforms / Scalar / IndVarSimplify.cpp
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp

index 489289d8a042c77fd1e8786dcea056c8d1a6cd0b..d1e57e101bf0a34caa4a8ff188bd8d51758564fb 100644 (file)
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -58,18 +58,16 @@ STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
  STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
  STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
  
-namespace llvm {
-  cl::opt<bool> EnableIVRewrite(
-    "enable-iv-rewrite", cl::Hidden,
-    cl::desc("Enable canonical induction variable rewriting"));
-
-  // Trip count verification can be enabled by default under NDEBUG if we
-  // implement a strong expression equivalence checker in SCEV. Until then, we
-  // use the verify-indvars flag, which may assert in some cases.
-  cl::opt<bool> VerifyIndvars(
-    "verify-indvars", cl::Hidden,
-    cl::desc("Verify the ScalarEvolution result after running indvars"));
-}
+static cl::opt<bool> EnableIVRewrite(
+  "enable-iv-rewrite", cl::Hidden,
+  cl::desc("Enable canonical induction variable rewriting"));
+
+// Trip count verification can be enabled by default under NDEBUG if we
+// implement a strong expression equivalence checker in SCEV. Until then, we
+// use the verify-indvars flag, which may assert in some cases.
+static cl::opt<bool> VerifyIndvars(
+  "verify-indvars", cl::Hidden,
+  cl::desc("Verify the ScalarEvolution result after running indvars"));
  
  namespace {
    class IndVarSimplify : public LoopPass {
@@ -119,8 +117,6 @@ namespace {
  
      void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM);
  
-    void SimplifyCongruentIVs(Loop *L);
-
      void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
  
      void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
@@ -182,6 +178,11 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
      // base of a recurrence. This handles the case in which SCEV expansion
      // converts a pointer type recurrence into a nonrecurrent pointer base
      // indexed by an integer recurrence.
+
+    // If the GEP base pointer is a vector of pointers, abort.
+    if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
+      return false;
+
      const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
      const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
      if (FromBase == ToBase)
@@ -723,10 +724,11 @@ namespace {
    // extend operations. This information is recorded by CollectExtend and
    // provides the input to WidenIV.
    struct WideIVInfo {
+    PHINode *NarrowIV;
      Type *WidestNativeType; // Widest integer type created [sz]ext
      bool IsSigned;          // Was an sext user seen before a zext?
  
-    WideIVInfo() : WidestNativeType(0), IsSigned(false) {}
+    WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {}
    };
  
    class WideIVVisitor : public IVVisitor {
@@ -736,8 +738,9 @@ namespace {
    public:
      WideIVInfo WI;
  
-    WideIVVisitor(ScalarEvolution *SCEV, const TargetData *TData) :
-      SE(SCEV), TD(TData) {}
+    WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV,
+                  const TargetData *TData) :
+      SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; }
  
      // Implement the interface used by simplifyUsersOfIV.
      virtual void visitCast(CastInst *Cast);
@@ -814,10 +817,10 @@ class WidenIV {
    SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
  
  public:
-  WidenIV(PHINode *PN, const WideIVInfo &WI, LoopInfo *LInfo,
+  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
            ScalarEvolution *SEv, DominatorTree *DTree,
            SmallVectorImpl<WeakVH> &DI) :
-    OrigPhi(PN),
+    OrigPhi(WI.NarrowIV),
      WideType(WI.WidestNativeType),
      IsSigned(WI.IsSigned),
      LI(LInfo),
@@ -843,7 +846,7 @@ protected:
  
    const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
  
-  Instruction *WidenIVUse(NarrowIVDefUse DU);
+  Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
  
    void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
  };
@@ -917,41 +920,6 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
      }
      return WideBO;
    }
-  llvm_unreachable(0);
-}
-
-/// HoistStep - Attempt to hoist an IV increment above a potential use.
-///
-/// To successfully hoist, two criteria must be met:
-/// - IncV operands dominate InsertPos and
-/// - InsertPos dominates IncV
-///
-/// Meeting the second condition means that we don't need to check all of IncV's
-/// existing uses (it's moving up in the domtree).
-///
-/// This does not yet recursively hoist the operands, although that would
-/// not be difficult.
-static bool HoistStep(Instruction *IncV, Instruction *InsertPos,
-                      const DominatorTree *DT)
-{
-  if (DT->dominates(IncV, InsertPos))
-    return true;
-
-  if (!DT->dominates(InsertPos->getParent(), IncV->getParent()))
-    return false;
-
-  if (IncV->mayHaveSideEffects())
-    return false;
-
-  // Attempt to hoist IncV
-  for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end();
-       OI != OE; ++OI) {
-    Instruction *OInst = dyn_cast<Instruction>(OI);
-    if (OInst && !DT->dominates(OInst, InsertPos))
-      return false;
-  }
-  IncV->moveBefore(InsertPos);
-  return true;
  }
  
  /// No-wrap operations can transfer sign extension of their result to their
@@ -980,9 +948,13 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
    else
      return 0;
  
+  // When creating this AddExpr, don't apply the current operations NSW or NUW
+  // flags. This instruction may be guarded by control flow that the no-wrap
+  // behavior depends on. Non-control-equivalent instructions can be mapped to
+  // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
+  // semantics to those operations.
    const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(
-    SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr,
-                   IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW));
+    SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr));
  
    if (!AddRec || AddRec->getLoop() != L)
      return 0;
@@ -1017,7 +989,7 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
  
  /// WidenIVUse - Determine whether an individual user of the narrow IV can be
  /// widened. If so, return the wide clone of the user.
-Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU) {
+Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
  
    // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
    if (isa<PHINode>(DU.NarrowUse) &&
@@ -1084,9 +1056,9 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU) {
    // Reuse the IV increment that SCEVExpander created as long as it dominates
    // NarrowUse.
    Instruction *WideUse = 0;
-  if (WideAddRec == WideIncExpr && HoistStep(WideInc, DU.NarrowUse, DT)) {
+  if (WideAddRec == WideIncExpr
+      && Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
      WideUse = WideInc;
-  }
    else {
      WideUse = CloneIVUser(DU);
      if (!WideUse)
@@ -1190,7 +1162,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
  
      // Process a def-use edge. This may replace the use, so don't hold a
      // use_iterator across it.
-    Instruction *WideUse = WidenIVUse(DU);
+    Instruction *WideUse = WidenIVUse(DU, Rewriter);
  
      // Follow all def-use edges from the previous narrow use.
      if (WideUse)
@@ -1217,7 +1189,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
  void IndVarSimplify::SimplifyAndExtend(Loop *L,
                                         SCEVExpander &Rewriter,
                                         LPPassManager &LPM) {
-  std::map<PHINode *, WideIVInfo> WideIVMap;
+  SmallVector<WideIVInfo, 8> WideIVs;
  
    SmallVector<PHINode*, 8> LoopPhis;
    for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
@@ -1238,72 +1210,22 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
        PHINode *CurrIV = LoopPhis.pop_back_val();
  
        // Information about sign/zero extensions of CurrIV.
-      WideIVVisitor WIV(SE, TD);
+      WideIVVisitor WIV(CurrIV, SE, TD);
  
        Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV);
  
        if (WIV.WI.WidestNativeType) {
-        WideIVMap[CurrIV] = WIV.WI;
+        WideIVs.push_back(WIV.WI);
        }
      } while(!LoopPhis.empty());
  
-    for (std::map<PHINode *, WideIVInfo>::const_iterator I = WideIVMap.begin(),
-           E = WideIVMap.end(); I != E; ++I) {
-      WidenIV Widener(I->first, I->second, LI, SE, DT, DeadInsts);
+    for (; !WideIVs.empty(); WideIVs.pop_back()) {
+      WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts);
        if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
          Changed = true;
          LoopPhis.push_back(WidePhi);
        }
      }
-    WideIVMap.clear();
-  }
-}
-
-/// SimplifyCongruentIVs - Check for congruent phis in this loop header and
-/// replace them with their chosen representative.
-///
-void IndVarSimplify::SimplifyCongruentIVs(Loop *L) {
-  DenseMap<const SCEV *, PHINode *> ExprToIVMap;
-  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
-    PHINode *Phi = cast<PHINode>(I);
-    if (!SE->isSCEVable(Phi->getType()))
-      continue;
-
-    const SCEV *S = SE->getSCEV(Phi);
-    std::pair<DenseMap<const SCEV *, PHINode *>::const_iterator, bool> Tmp =
-      ExprToIVMap.insert(std::make_pair(S, Phi));
-    if (Tmp.second)
-      continue;
-    PHINode *OrigPhi = Tmp.first->second;
-
-    // If one phi derives from the other via GEPs, types may differ.
-    if (OrigPhi->getType() != Phi->getType())
-      continue;
-
-    // Replacing the congruent phi is sufficient because acyclic redundancy
-    // elimination, CSE/GVN, should handle the rest. However, once SCEV proves
-    // that a phi is congruent, it's almost certain to be the head of an IV
-    // user cycle that is isomorphic with the original phi. So it's worth
-    // eagerly cleaning up the common case of a single IV increment.
-    if (BasicBlock *LatchBlock = L->getLoopLatch()) {
-      Instruction *OrigInc =
-        cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
-      Instruction *IsomorphicInc =
-        cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
-      if (OrigInc != IsomorphicInc &&
-          OrigInc->getType() == IsomorphicInc->getType() &&
-          SE->getSCEV(OrigInc) == SE->getSCEV(IsomorphicInc) &&
-          HoistStep(OrigInc, IsomorphicInc, DT)) {
-        DEBUG(dbgs() << "INDVARS: Eliminated congruent iv.inc: "
-              << *IsomorphicInc << '\n');
-        IsomorphicInc->replaceAllUsesWith(OrigInc);
-        DeadInsts.push_back(IsomorphicInc);
-      }
-    }
-    DEBUG(dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n');
-    ++NumElimIV;
-    Phi->replaceAllUsesWith(OrigPhi);
-    DeadInsts.push_back(Phi);
    }
  }
  
@@ -1315,7 +1237,11 @@ void IndVarSimplify::SimplifyCongruentIVs(Loop *L) {
  /// BackedgeTakenInfo. If these expressions have not been reduced, then
  /// expanding them may incur additional cost (albeit in the loop preheader).
  static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
+                                SmallPtrSet<const SCEV*, 8> &Processed,
                                  ScalarEvolution *SE) {
+  if (!Processed.insert(S))
+    return false;
+
    // If the backedge-taken count is a UDiv, it's very likely a UDiv that
    // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a
    // precise expression, rather than a UDiv from the user's code. If we can't
@@ -1343,7 +1269,7 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
    if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
      for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
           I != E; ++I) {
-      if (isHighCostExpansion(*I, BI, SE))
+      if (isHighCostExpansion(*I, BI, Processed, SE))
          return true;
      }
      return false;
@@ -1354,14 +1280,24 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
    if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
      return true;
  
-  // If we haven't recognized an expensive SCEV patter, assume its an expression
-  // produced by program code.
+  // If we haven't recognized an expensive SCEV pattern, assume it's an
+  // expression produced by program code.
    return false;
  }
  
  /// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
  /// count expression can be safely and cheaply expanded into an instruction
  /// sequence that can be used by LinearFunctionTestReplace.
+///
+/// TODO: This fails for pointer-type loop counters with greater than one byte
+/// strides, consequently preventing LFTR from running. For the purpose of LFTR
+/// we could skip this check in the case that the LFTR loop counter (chosen by
+/// FindLoopCounter) is also pointer type. Instead, we could directly convert
+/// the loop test to an inequality test by checking the target data's alignment
+/// of element types (given that the initial pointer value originates from or is
+/// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
+/// However, we don't yet have a strong motivation for converting loop tests
+/// into inequality tests.
  static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
    const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
    if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
@@ -1376,7 +1312,8 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
    if (!BI)
      return false;
  
-  if (isHighCostExpansion(BackedgeTakenCount, BI, SE))
+  SmallPtrSet<const SCEV*, 8> Processed;
+  if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE))
      return false;
  
    return true;
@@ -1513,6 +1450,10 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
  
  /// FindLoopCounter - Find an affine IV in canonical form.
  ///
+/// BECount may be an i8* pointer type. The pointer difference is already
+/// valid count without scaling the address stride, so it remains a pointer
+/// expression as far as SCEV is concerned.
+///
  /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
  ///
  /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
@@ -1521,11 +1462,6 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
  static PHINode *
  FindLoopCounter(Loop *L, const SCEV *BECount,
                  ScalarEvolution *SE, DominatorTree *DT, const TargetData *TD) {
-  // I'm not sure how BECount could be a pointer type, but we definitely don't
-  // want to LFTR that.
-  if (BECount->getType()->isPointerTy())
-    return 0;
-
    uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
  
    Value *Cond =
@@ -1542,6 +1478,10 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
      if (!SE->isSCEVable(Phi->getType()))
        continue;
  
+    // Avoid comparing an integer IV against a pointer Limit.
+    if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
+      continue;
+
      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
      if (!AR || AR->getLoop() != L || !AR->isAffine())
        continue;
@@ -1587,6 +1527,82 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
    return BestPhi;
  }
  
+/// genLoopLimit - Help LinearFunctionTestReplace by generating a value that
+/// holds the RHS of the new loop test.
+static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
+                           SCEVExpander &Rewriter, ScalarEvolution *SE) {
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
+  assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
+  const SCEV *IVInit = AR->getStart();
+
+  // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
+  // finds a valid pointer IV. Sign extend BECount in order to materialize a
+  // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
+  // the existing GEPs whenever possible.
+  if (IndVar->getType()->isPointerTy()
+      && !IVCount->getType()->isPointerTy()) {
+
+    Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
+    const SCEV *IVOffset = SE->getTruncateOrSignExtend(IVCount, OfsTy);
+
+    // Expand the code for the iteration count.
+    assert(SE->isLoopInvariant(IVOffset, L) &&
+           "Computed iteration count is not loop invariant!");
+    BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+    Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
+
+    Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
+    assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
+    // We could handle pointer IVs other than i8*, but we need to compensate for
+    // gep index scaling. See canExpandBackedgeTakenCount comments.
+    assert(SE->getSizeOfExpr(
+             cast<PointerType>(GEPBase->getType())->getElementType())->isOne()
+           && "unit stride pointer IV must be i8*");
+
+    IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
+    return Builder.CreateGEP(GEPBase, GEPOffset, "lftr.limit");
+  }
+  else {
+    // In any other case, convert both IVInit and IVCount to integers before
+    // comparing. This may result in SCEV expension of pointers, but in practice
+    // SCEV will fold the pointer arithmetic away as such:
+    // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
+    //
+    // Valid Cases: (1) both integers is most common; (2) both may be pointers
+    // for simple memset-style loops; (3) IVInit is an integer and IVCount is a
+    // pointer may occur when enable-iv-rewrite generates a canonical IV on top
+    // of case #2.
+
+    const SCEV *IVLimit = 0;
+    // For unit stride, IVCount = Start + BECount with 2's complement overflow.
+    // For non-zero Start, compute IVCount here.
+    if (AR->getStart()->isZero())
+      IVLimit = IVCount;
+    else {
+      assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
+      const SCEV *IVInit = AR->getStart();
+
+      // For integer IVs, truncate the IV before computing IVInit + BECount.
+      if (SE->getTypeSizeInBits(IVInit->getType())
+          > SE->getTypeSizeInBits(IVCount->getType()))
+        IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
+
+      IVLimit = SE->getAddExpr(IVInit, IVCount);
+    }
+    // Expand the code for the iteration count.
+    BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+    IRBuilder<> Builder(BI);
+    assert(SE->isLoopInvariant(IVLimit, L) &&
+           "Computed iteration count is not loop invariant!");
+    // Ensure that we generate the same type as IndVar, or a smaller integer
+    // type. In the presence of null pointer values, we have an integer type
+    // SCEV expression (IVInit) for a pointer type IV value (IndVar).
+    Type *LimitTy = IVCount->getType()->isPointerTy() ?
+      IndVar->getType() : IVCount->getType();
+    return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
+  }
+}
+
  /// LinearFunctionTestReplace - This method rewrites the exit condition of the
  /// loop to be a canonical != comparison against the incremented loop induction
  /// variable.  This pass is able to rewrite the exit tests of any loop where the
@@ -1598,37 +1614,36 @@ LinearFunctionTestReplace(Loop *L,
                            PHINode *IndVar,
                            SCEVExpander &Rewriter) {
    assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
-  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
  
    // LFTR can ignore IV overflow and truncate to the width of
    // BECount. This avoids materializing the add(zext(add)) expression.
    Type *CntTy = !EnableIVRewrite ?
      BackedgeTakenCount->getType() : IndVar->getType();
  
-  const SCEV *IVLimit = BackedgeTakenCount;
+  const SCEV *IVCount = BackedgeTakenCount;
  
-  // If the exiting block is not the same as the backedge block, we must compare
-  // against the preincremented value, otherwise we prefer to compare against
-  // the post-incremented value.
+  // If the exiting block is the same as the backedge block, we prefer to
+  // compare against the post-incremented value, otherwise we must compare
+  // against the preincremented value.
    Value *CmpIndVar;
    if (L->getExitingBlock() == L->getLoopLatch()) {
      // Add one to the "backedge-taken" count to get the trip count.
      // If this addition may overflow, we have to be more pessimistic and
      // cast the induction variable before doing the add.
      const SCEV *N =
-      SE->getAddExpr(IVLimit, SE->getConstant(IVLimit->getType(), 1));
-    if (CntTy == IVLimit->getType())
-      IVLimit = N;
+      SE->getAddExpr(IVCount, SE->getConstant(IVCount->getType(), 1));
+    if (CntTy == IVCount->getType())
+      IVCount = N;
      else {
-      const SCEV *Zero = SE->getConstant(IVLimit->getType(), 0);
+      const SCEV *Zero = SE->getConstant(IVCount->getType(), 0);
        if ((isa<SCEVConstant>(N) && !N->isZero()) ||
            SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
          // No overflow. Cast the sum.
-        IVLimit = SE->getTruncateOrZeroExtend(N, CntTy);
+        IVCount = SE->getTruncateOrZeroExtend(N, CntTy);
        } else {
          // Potential overflow. Cast before doing the add.
-        IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
-        IVLimit = SE->getAddExpr(IVLimit, SE->getConstant(CntTy, 1));
+        IVCount = SE->getTruncateOrZeroExtend(IVCount, CntTy);
+        IVCount = SE->getAddExpr(IVCount, SE->getConstant(CntTy, 1));
        }
      }
      // The BackedgeTaken expression contains the number of times that the
@@ -1636,62 +1651,17 @@ LinearFunctionTestReplace(Loop *L,
      // number of times the loop executes, so use the incremented indvar.
      CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
    } else {
-    // We have to use the preincremented value...
-    IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
+    // We must use the preincremented value...
+    IVCount = SE->getTruncateOrZeroExtend(IVCount, CntTy);
      CmpIndVar = IndVar;
    }
  
-  // For unit stride, IVLimit = Start + BECount with 2's complement overflow.
-  // So for, non-zero start compute the IVLimit here.
-  bool isPtrIV = false;
-  Type *CmpTy = CntTy;
-  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
-  assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
-  if (!AR->getStart()->isZero()) {
-    assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
-    const SCEV *IVInit = AR->getStart();
-
-    // For pointer types, sign extend BECount in order to materialize a GEP.
-    // Note that for without EnableIVRewrite, we never run SCEVExpander on a
-    // pointer type, because we must preserve the existing GEPs. Instead we
-    // directly generate a GEP later.
-    if (IVInit->getType()->isPointerTy()) {
-      isPtrIV = true;
-      CmpTy = SE->getEffectiveSCEVType(IVInit->getType());
-      IVLimit = SE->getTruncateOrSignExtend(IVLimit, CmpTy);
-    }
-    // For integer types, truncate the IV before computing IVInit + BECount.
-    else {
-      if (SE->getTypeSizeInBits(IVInit->getType())
-          > SE->getTypeSizeInBits(CmpTy))
-        IVInit = SE->getTruncateExpr(IVInit, CmpTy);
-
-      IVLimit = SE->getAddExpr(IVInit, IVLimit);
-    }
-  }
-  // Expand the code for the iteration count.
-  IRBuilder<> Builder(BI);
-
-  assert(SE->isLoopInvariant(IVLimit, L) &&
-         "Computed iteration count is not loop invariant!");
-  Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, CmpTy, BI);
-
-  // Create a gep for IVInit + IVLimit from on an existing pointer base.
-  assert(isPtrIV == IndVar->getType()->isPointerTy() &&
-         "IndVar type must match IVInit type");
-  if (isPtrIV) {
-      Value *IVStart = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
-      assert(AR->getStart() == SE->getSCEV(IVStart) && "bad loop counter");
-      assert(SE->getSizeOfExpr(
-               cast<PointerType>(IVStart->getType())->getElementType())->isOne()
-             && "unit stride pointer IV must be i8*");
-
-      Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
-      ExitCnt = Builder.CreateGEP(IVStart, ExitCnt, "lftr.limit");
-      Builder.SetInsertPoint(BI);
-  }
+  Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
+  assert(ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy()
+         && "genLoopLimit missed a cast");
  
    // Insert a new icmp_ne or icmp_eq instruction before the branch.
+  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
    ICmpInst::Predicate P;
    if (L->contains(BI->getSuccessor(0)))
      P = ICmpInst::ICMP_NE;
@@ -1703,11 +1673,13 @@ LinearFunctionTestReplace(Loop *L,
                 << "       op:\t"
                 << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
                 << "      RHS:\t" << *ExitCnt << "\n"
-               << "     Expr:\t" << *IVLimit << "\n");
+               << "  IVCount:\t" << *IVCount << "\n");
  
+  IRBuilder<> Builder(BI);
    if (SE->getTypeSizeInBits(CmpIndVar->getType())
-      > SE->getTypeSizeInBits(CmpTy)) {
-    CmpIndVar = Builder.CreateTrunc(CmpIndVar, CmpTy, "lftr.wideiv");
+      > SE->getTypeSizeInBits(ExitCnt->getType())) {
+    CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
+                                    "lftr.wideiv");
    }
  
    Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
@@ -1764,11 +1736,12 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
      if (isa<LandingPadInst>(I))
        continue;
  
-    // Don't sink static AllocaInsts out of the entry block, which would
-    // turn them into dynamic allocas!
-    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
-      if (AI->isStaticAlloca())
-        continue;
+    // Don't sink alloca: we never want to sink static alloca's out of the
+    // entry block, and correctly sinking dynamic alloca's requires
+    // checks for stacksave/stackrestore intrinsics.
+    // FIXME: Refactor this check somehow?
+    if (isa<AllocaInst>(I))
+      continue;
  
      // Determine if there is a use in or before the loop (direct or
      // otherwise).
@@ -1848,6 +1821,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
  
    // Create a rewriter object which we'll use to transform the code with.
    SCEVExpander Rewriter(*SE, "indvars");
+#ifndef NDEBUG
+  Rewriter.setDebugType(DEBUG_TYPE);
+#endif
  
    // Eliminate redundant IV users.
    //
@@ -1875,7 +1851,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
  
    // Eliminate redundant IV cycles.
    if (!EnableIVRewrite)
-    SimplifyCongruentIVs(L);
+    NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
  
    // Compute the type of the largest recurrence expression, and decide whether
    // a canonical induction variable should be inserted.