refactor a blob of code out to a new 'FoldOrOfFCmps' function and

[oota-llvm.git] / lib / Transforms / Scalar / IndVarSimplify.cpp
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp

index 56bb4fea92c2c37306d6aa43a61641b451400a5e..66a786287f526fc2f5ab214aaf9f5babb6a3b353 100644 (file)
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -17,7 +17,10 @@
  //      which starts at zero and steps by one.
  //   2. The canonical induction variable is guaranteed to be the first PHI node
  //      in the loop header block.
-//   3. Any pointer arithmetic recurrences are raised to use array subscripts.
+//   3. The canonical induction variable is guaranteed to be in a wide enough
+//      type so that IV expressions need not be (directly) zero-extended or
+//      sign-extended.
+//   4. Any pointer arithmetic recurrences are raised to use array subscripts.
  //
  // If the trip count of a loop is computable, this pass also makes the following
  // changes:
@@ -31,9 +34,7 @@
  //      expression, this transformation will make the loop dead.
  //
  // This transformation should be followed by strength reduction after all of the
-// desired loop transformations have been performed.  Additionally, on targets
-// where it is profitable, the loop could be transformed to count down to zero
-// (the "do loop" optimization).
+// desired loop transformations have been performed.
  //
  //===----------------------------------------------------------------------===//
  
@@ -42,6 +43,7 @@
  #include "llvm/BasicBlock.h"
  #include "llvm/Constants.h"
  #include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
  #include "llvm/Type.h"
  #include "llvm/Analysis/Dominators.h"
  #include "llvm/Analysis/IVUsers.h"
@@ -51,12 +53,10 @@
  #include "llvm/Support/CFG.h"
  #include "llvm/Support/Compiler.h"
  #include "llvm/Support/Debug.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
  #include "llvm/Transforms/Utils/Local.h"
  #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  #include "llvm/Support/CommandLine.h"
  #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SetVector.h"
  #include "llvm/ADT/Statistic.h"
  #include "llvm/ADT/STLExtras.h"
  using namespace llvm;
@@ -71,45 +71,45 @@ namespace {
      IVUsers         *IU;
      LoopInfo        *LI;
      ScalarEvolution *SE;
+    DominatorTree   *DT;
      bool Changed;
    public:
  
-   static char ID; // Pass identification, replacement for typeid
-   IndVarSimplify() : LoopPass(&ID) {}
-
-   virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
-
-   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-     AU.addRequired<DominatorTree>();
-     AU.addRequired<ScalarEvolution>();
-     AU.addRequiredID(LCSSAID);
-     AU.addRequiredID(LoopSimplifyID);
-     AU.addRequired<LoopInfo>();
-     AU.addRequired<IVUsers>();
-     AU.addPreserved<ScalarEvolution>();
-     AU.addPreservedID(LoopSimplifyID);
-     AU.addPreserved<IVUsers>();
-     AU.addPreservedID(LCSSAID);
-     AU.setPreservesCFG();
-   }
+    static char ID; // Pass identification, replacement for typeid
+    IndVarSimplify() : LoopPass(&ID) {}
+
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<LoopInfo>();
+      AU.addRequired<ScalarEvolution>();
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addRequiredID(LCSSAID);
+      AU.addRequired<IVUsers>();
+      AU.addPreserved<ScalarEvolution>();
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addPreservedID(LCSSAID);
+      AU.addPreserved<IVUsers>();
+      AU.setPreservesCFG();
+    }
  
    private:
  
      void RewriteNonIntegerIVs(Loop *L);
  
-    ICmpInst *LinearFunctionTestReplace(Loop *L, SCEVHandle BackedgeTakenCount,
+    ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
                                     Value *IndVar,
                                     BasicBlock *ExitingBlock,
                                     BranchInst *BI,
                                     SCEVExpander &Rewriter);
-    void RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount);
+    void RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount,
+                               SCEVExpander &Rewriter);
  
      void RewriteIVExpressions(Loop *L, const Type *LargestType,
                                SCEVExpander &Rewriter);
  
-    void SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter);
-
-    void FixUsesBeforeDefs(Loop *L, SCEVExpander &Rewriter);
+    void SinkUnusedInvariants(Loop *L);
  
      void HandleFloatingPointIV(Loop *L, PHINode *PH);
    };
@@ -129,7 +129,7 @@ Pass *llvm::createIndVarSimplifyPass() {
  /// SCEV analysis can determine a loop-invariant trip count of the loop, which
  /// is actually a much broader range than just linear tests.
  ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
-                                   SCEVHandle BackedgeTakenCount,
+                                   const SCEV *BackedgeTakenCount,
                                     Value *IndVar,
                                     BasicBlock *ExitingBlock,
                                     BranchInst *BI,
@@ -138,13 +138,13 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
    // against the preincremented value, otherwise we prefer to compare against
    // the post-incremented value.
    Value *CmpIndVar;
-  SCEVHandle RHS = BackedgeTakenCount;
+  const SCEV *RHS = BackedgeTakenCount;
    if (ExitingBlock == L->getLoopLatch()) {
      // Add one to the "backedge-taken" count to get the trip count.
      // If this addition may overflow, we have to be more pessimistic and
      // cast the induction variable before doing the add.
-    SCEVHandle Zero = SE->getIntegerSCEV(0, BackedgeTakenCount->getType());
-    SCEVHandle N =
+    const SCEV *Zero = SE->getIntegerSCEV(0, BackedgeTakenCount->getType());
+    const SCEV *N =
        SE->getAddExpr(BackedgeTakenCount,
                       SE->getIntegerSCEV(1, BackedgeTakenCount->getType()));
      if ((isa<SCEVConstant>(N) && !N->isZero()) ||
@@ -170,10 +170,10 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
      CmpIndVar = IndVar;
    }
  
-  // Expand the code for the iteration count into the preheader of the loop.
-  BasicBlock *Preheader = L->getLoopPreheader();
-  Value *ExitCnt = Rewriter.expandCodeFor(RHS, CmpIndVar->getType(),
-                                          Preheader->getTerminator());
+  // Expand the code for the iteration count.
+  assert(RHS->isLoopInvariant(L) &&
+         "Computed iteration count is not loop invariant!");
+  Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
  
    // Insert a new icmp_ne or icmp_eq instruction before the branch.
    ICmpInst::Predicate Opcode;
@@ -188,10 +188,15 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
         << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
         << "      RHS:\t" << *RHS << "\n";
  
-  ICmpInst *Cond = new ICmpInst(Opcode, CmpIndVar, ExitCnt, "exitcond", BI);
+  ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
  
    Instruction *OrigCond = cast<Instruction>(BI->getCondition());
-  OrigCond->replaceAllUsesWith(Cond);
+  // It's tempting to use replaceAllUsesWith here to fully replace the old
+  // comparison, but that's not immediately safe, since users of the old
+  // comparison may not be dominated by the new comparison. Instead, just
+  // update the branch to use the new comparison; in the common case this
+  // will make old comparison dead.
+  BI->setCondition(Cond);
    RecursivelyDeleteTriviallyDeadInstructions(OrigCond);
  
    ++NumLFTR;
@@ -210,28 +215,13 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
  /// able to brute-force evaluate arbitrary instructions as long as they have
  /// constant operands at the beginning of the loop.
  void IndVarSimplify::RewriteLoopExitValues(Loop *L,
-                                           const SCEV *BackedgeTakenCount) {
+                                           const SCEV *BackedgeTakenCount,
+                                           SCEVExpander &Rewriter) {
    // Verify the input to the pass in already in LCSSA form.
    assert(L->isLCSSAForm());
  
-  BasicBlock *Preheader = L->getLoopPreheader();
-
-  // Scan all of the instructions in the loop, looking at those that have
-  // extra-loop users and which are recurrences.
-  SCEVExpander Rewriter(*SE);
-
-  // We insert the code into the preheader of the loop if the loop contains
-  // multiple exit blocks, or in the exit block if there is exactly one.
-  BasicBlock *BlockToInsertInto;
    SmallVector<BasicBlock*, 8> ExitBlocks;
    L->getUniqueExitBlocks(ExitBlocks);
-  if (ExitBlocks.size() == 1)
-    BlockToInsertInto = ExitBlocks[0];
-  else
-    BlockToInsertInto = Preheader;
-  BasicBlock::iterator InsertPt = BlockToInsertInto->getFirstNonPHI();
-
-  std::map<Instruction*, Value*> ExitValues;
  
    // Find all values that are computed inside the loop, but used outside of it.
    // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
@@ -249,7 +239,8 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
      // Iterate over all of the PHI nodes.
      BasicBlock::iterator BBI = ExitBB->begin();
      while ((PN = dyn_cast<PHINode>(BBI++))) {
-
+      if (PN->use_empty())
+        continue; // dead use, don't replace it
        // Iterate over all of the values in all the PHI nodes.
        for (unsigned i = 0; i != NumPreds; ++i) {
          // If the value being merged in is not integer or is not defined
@@ -273,20 +264,14 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
          // Okay, this instruction has a user outside of the current loop
          // and varies predictably *inside* the loop.  Evaluate the value it
          // contains when the loop exits, if possible.
-        SCEVHandle SH = SE->getSCEV(Inst);
-        SCEVHandle ExitValue = SE->getSCEVAtScope(SH, L->getParentLoop());
-        if (isa<SCEVCouldNotCompute>(ExitValue) ||
-            !ExitValue->isLoopInvariant(L))
+        const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
+        if (!ExitValue->isLoopInvariant(L))
            continue;
  
          Changed = true;
          ++NumReplaced;
  
-        // See if we already computed the exit value for the instruction, if so,
-        // just reuse it.
-        Value *&ExitVal = ExitValues[Inst];
-        if (!ExitVal)
-          ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), InsertPt);
+        Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
  
          DOUT << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
               << "  LoopVal = " << *Inst << "\n";
@@ -296,16 +281,23 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
          // If this instruction is dead now, delete it.
          RecursivelyDeleteTriviallyDeadInstructions(Inst);
  
-        // See if this is a single-entry LCSSA PHI node.  If so, we can (and
-        // have to) remove
-        // the PHI entirely.  This is safe, because the NewVal won't be variant
-        // in the loop, so we don't need an LCSSA phi node anymore.
          if (NumPreds == 1) {
+          // Completely replace a single-pred PHI. This is safe, because the
+          // NewVal won't be variant in the loop, so we don't need an LCSSA phi
+          // node anymore.
            PN->replaceAllUsesWith(ExitVal);
            RecursivelyDeleteTriviallyDeadInstructions(PN);
-          break;
          }
        }
+      if (NumPreds != 1) {
+        // Clone the PHI and delete the original one. This lets IVUsers and
+        // any other maps purge the original user from their records.
+        PHINode *NewPN = PN->clone(PN->getContext());
+        NewPN->takeName(PN);
+        NewPN->insertBefore(PN);
+        PN->replaceAllUsesWith(NewPN);
+        PN->eraseFromParent();
+      }
      }
    }
  }
@@ -337,15 +329,18 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
    IU = &getAnalysis<IVUsers>();
    LI = &getAnalysis<LoopInfo>();
    SE = &getAnalysis<ScalarEvolution>();
+  DT = &getAnalysis<DominatorTree>();
    Changed = false;
  
    // If there are any floating-point recurrences, attempt to
    // transform them to use integer recurrences.
    RewriteNonIntegerIVs(L);
  
-  BasicBlock *Header       = L->getHeader();
    BasicBlock *ExitingBlock = L->getExitingBlock(); // may be null
-  SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+
+  // Create a rewriter object which we'll use to transform the code with.
+  SCEVExpander Rewriter(*SE);
  
    // Check to see if this loop has a computable loop-invariant execution count.
    // If so, this means that we can compute the final value of any expressions
@@ -354,7 +349,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
    // the current expressions.
    //
    if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
-    RewriteLoopExitValues(L, BackedgeTakenCount);
+    RewriteLoopExitValues(L, BackedgeTakenCount, Rewriter);
  
    // Compute the type of the largest recurrence expression, and decide whether
    // a canonical induction variable should be inserted.
@@ -370,14 +365,14 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
        NeedCannIV = true;
    }
    for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
-    SCEVHandle Stride = IU->StrideOrder[i];
+    const SCEV *Stride = IU->StrideOrder[i];
      const Type *Ty = SE->getEffectiveSCEVType(Stride->getType());
      if (!LargestType ||
          SE->getTypeSizeInBits(Ty) >
            SE->getTypeSizeInBits(LargestType))
        LargestType = Ty;
  
-    std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI =
+    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
        IU->IVUsesByStride.find(IU->StrideOrder[i]);
      assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
  
@@ -385,17 +380,35 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
        NeedCannIV = true;
    }
  
-  // Create a rewriter object which we'll use to transform the code with.
-  SCEVExpander Rewriter(*SE);
-
    // Now that we know the largest of of the induction variable expressions
    // in this loop, insert a canonical induction variable of the largest size.
    Value *IndVar = 0;
    if (NeedCannIV) {
-    IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L,LargestType);
+    // Check to see if the loop already has a canonical-looking induction
+    // variable. If one is present and it's wider than the planned canonical
+    // induction variable, temporarily remove it, so that the Rewriter
+    // doesn't attempt to reuse it.
+    PHINode *OldCannIV = L->getCanonicalInductionVariable();
+    if (OldCannIV) {
+      if (SE->getTypeSizeInBits(OldCannIV->getType()) >
+          SE->getTypeSizeInBits(LargestType))
+        OldCannIV->removeFromParent();
+      else
+        OldCannIV = 0;
+    }
+
+    IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType);
+
      ++NumInserted;
      Changed = true;
      DOUT << "INDVARS: New CanIV: " << *IndVar;
+
+    // Now that the official induction variable is established, reinsert
+    // the old canonical-looking variable after it so that the IR remains
+    // consistent. It will be deleted as part of the dead-PHI deletion at
+    // the end of the pass.
+    if (OldCannIV)
+      OldCannIV->insertAfter(cast<Instruction>(IndVar));
    }
  
    // If we have a trip count expression, rewrite the loop's exit condition
@@ -410,17 +423,14 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
                                            ExitingBlock, BI, Rewriter);
    }
  
-  Rewriter.setInsertionPoint(Header->getFirstNonPHI());
-
-  // Rewrite IV-derived expressions.
+  // Rewrite IV-derived expressions. Clears the rewriter cache.
    RewriteIVExpressions(L, LargestType, Rewriter);
  
+  // The Rewriter may not be used from this point on.
+
    // Loop-invariant instructions in the preheader that aren't used in the
    // loop may be sunk below the loop to reduce register pressure.
-  SinkUnusedInvariants(L, Rewriter);
-
-  // Reorder instructions to avoid use-before-def conditions.
-  FixUsesBeforeDefs(L, Rewriter);
+  SinkUnusedInvariants(L);
  
    // For completeness, inform IVUsers of the IV use in the newly-created
    // loop exit test instruction.
@@ -446,21 +456,20 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
    // the need for the code evaluation methods to insert induction variables
    // of different sizes.
    for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
-    SCEVHandle Stride = IU->StrideOrder[i];
+    const SCEV *Stride = IU->StrideOrder[i];
  
-    std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI =
+    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
        IU->IVUsesByStride.find(IU->StrideOrder[i]);
      assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
      ilist<IVStrideUse> &List = SI->second->Users;
      for (ilist<IVStrideUse>::iterator UI = List.begin(),
           E = List.end(); UI != E; ++UI) {
-      SCEVHandle Offset = UI->getOffset();
        Value *Op = UI->getOperandValToReplace();
+      const Type *UseTy = Op->getType();
        Instruction *User = UI->getUser();
-      bool isSigned = UI->isSigned();
  
        // Compute the final addrec to expand into code.
-      SCEVHandle AR = IU->getReplacementExpr(*UI);
+      const SCEV *AR = IU->getReplacementExpr(*UI);
  
        // FIXME: It is an extremely bad idea to indvar substitute anything more
        // complex than affine induction variables.  Doing so will put expensive
@@ -468,93 +477,29 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
        // currently can only reduce affine polynomials.  For now just disable
        // indvar subst on anything more complex than an affine addrec, unless
        // it can be expanded to a trivial value.
-      if (!Stride->isLoopInvariant(L) &&
-          !isa<SCEVConstant>(AR) &&
-          L->contains(User->getParent()))
+      if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L))
          continue;
  
-      Value *NewVal = 0;
-      if (AR->isLoopInvariant(L)) {
-        BasicBlock::iterator I = Rewriter.getInsertionPoint();
-        // Expand loop-invariant values in the loop preheader. They will
-        // be sunk to the exit block later, if possible.
-        NewVal =
-          Rewriter.expandCodeFor(AR, LargestType,
-                                 L->getLoopPreheader()->getTerminator());
-        Rewriter.setInsertionPoint(I);
-        ++NumReplaced;
-      } else {
-        const Type *IVTy = Offset->getType();
-        const Type *UseTy = Op->getType();
-
-        // Promote the Offset and Stride up to the canonical induction
-        // variable's bit width.
-        SCEVHandle PromotedOffset = Offset;
-        SCEVHandle PromotedStride = Stride;
-        if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType)) {
-          // It doesn't matter for correctness whether zero or sign extension
-          // is used here, since the value is truncated away below, but if the
-          // value is signed, sign extension is more likely to be folded.
-          if (isSigned) {
-            PromotedOffset = SE->getSignExtendExpr(PromotedOffset, LargestType);
-            PromotedStride = SE->getSignExtendExpr(PromotedStride, LargestType);
-          } else {
-            PromotedOffset = SE->getZeroExtendExpr(PromotedOffset, LargestType);
-            // If the stride is obviously negative, use sign extension to
-            // produce things like x-1 instead of x+255.
-            if (isa<SCEVConstant>(PromotedStride) &&
-                cast<SCEVConstant>(PromotedStride)
-                  ->getValue()->getValue().isNegative())
-              PromotedStride = SE->getSignExtendExpr(PromotedStride,
-                                                     LargestType);
+      // Determine the insertion point for this user. By default, insert
+      // immediately before the user. The SCEVExpander class will automatically
+      // hoist loop invariants out of the loop. For PHI nodes, there may be
+      // multiple uses, so compute the nearest common dominator for the
+      // incoming blocks.
+      Instruction *InsertPt = User;
+      if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
+        for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+          if (PHI->getIncomingValue(i) == Op) {
+            if (InsertPt == User)
+              InsertPt = PHI->getIncomingBlock(i)->getTerminator();
              else
-              PromotedStride = SE->getZeroExtendExpr(PromotedStride,
-                                                     LargestType);
+              InsertPt =
+                DT->findNearestCommonDominator(InsertPt->getParent(),
+                                               PHI->getIncomingBlock(i))
+                      ->getTerminator();
            }
-        }
-
-        // Create the SCEV representing the offset from the canonical
-        // induction variable, still in the canonical induction variable's
-        // type, so that all expanded arithmetic is done in the same type.
-        SCEVHandle NewAR = SE->getAddRecExpr(SE->getIntegerSCEV(0, LargestType),
-                                           PromotedStride, L);
-        // Add the PromotedOffset as a separate step, because it may not be
-        // loop-invariant.
-        NewAR = SE->getAddExpr(NewAR, PromotedOffset);
-
-        // Expand the addrec into instructions.
-        Value *V = Rewriter.expandCodeFor(NewAR);
-
-        // Insert an explicit cast if necessary to truncate the value
-        // down to the original stride type. This is done outside of
-        // SCEVExpander because in SCEV expressions, a truncate of an
-        // addrec is always folded.
-        if (LargestType != IVTy) {
-          if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType))
-            NewAR = SE->getTruncateExpr(NewAR, IVTy);
-          if (Rewriter.isInsertedExpression(NewAR))
-            V = Rewriter.expandCodeFor(NewAR);
-          else {
-            V = Rewriter.InsertCastOfTo(CastInst::getCastOpcode(V, false,
-                                                                IVTy, false),
-                                        V, IVTy);
-            assert(!isa<SExtInst>(V) && !isa<ZExtInst>(V) &&
-                   "LargestType wasn't actually the largest type!");
-            // Force the rewriter to use this trunc whenever this addrec
-            // appears so that it doesn't insert new phi nodes or
-            // arithmetic in a different type.
-            Rewriter.addInsertedValue(V, NewAR);
-          }
-        }
  
-        DOUT << "INDVARS: Made offset-and-trunc IV for offset "
-             << *IVTy << " " << *Offset << ": ";
-        DEBUG(WriteAsOperand(*DOUT, V, false));
-        DOUT << "\n";
-
-        // Now expand it into actual Instructions and patch it into place.
-        NewVal = Rewriter.expandCodeFor(AR, UseTy);
-      }
+      // Now expand it into actual Instructions and patch it into place.
+      Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
  
        // Patch the new value into place.
        if (Op->hasName())
@@ -571,6 +516,10 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
      }
    }
  
+  // Clear the rewriter cache, because values that are in the rewriter's cache
+  // can be deleted in the loop below, causing the AssertingVH in the cache to
+  // trigger.
+  Rewriter.clear();
    // Now that we're done iterating through lists, clean up any instructions
    // which are now dead.
    while (!DeadInsts.empty()) {
@@ -583,19 +532,26 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
  /// If there's a single exit block, sink any loop-invariant values that
  /// were defined in the preheader but not used inside the loop into the
  /// exit block to reduce register pressure in the loop.
-void IndVarSimplify::SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter) {
+void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
    BasicBlock *ExitBlock = L->getExitBlock();
    if (!ExitBlock) return;
  
-  Instruction *NonPHI = ExitBlock->getFirstNonPHI();
+  Instruction *InsertPt = ExitBlock->getFirstNonPHI();
    BasicBlock *Preheader = L->getLoopPreheader();
    BasicBlock::iterator I = Preheader->getTerminator();
    while (I != Preheader->begin()) {
      --I;
-    // New instructions were inserted at the end of the preheader. Only
-    // consider those new instructions.
-    if (!Rewriter.isInsertedInstruction(I))
+    // New instructions were inserted at the end of the preheader.
+    if (isa<PHINode>(I))
        break;
+    // Don't move instructions which might have side effects, since the side
+    // effects need to complete before instructions inside the loop.  Also
+    // don't move instructions which might read memory, since the loop may
+    // modify memory. Note that it's okay if the instruction might have
+    // undefined behavior: LoopSimplify guarantees that the preheader
+    // dominates the exit block.
+    if (I->mayHaveSideEffects() || I->mayReadFromMemory())
+      continue;
      // Determine if there is a use in or before the loop (direct or
      // otherwise).
      bool UsedInLoop = false;
@@ -622,71 +578,13 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter) {
        --I;
      else
        Done = true;
-    ToMove->moveBefore(NonPHI);
+    ToMove->moveBefore(InsertPt);
      if (Done)
        break;
+    InsertPt = ToMove;
    }
  }
  
-/// Re-schedule the inserted instructions to put defs before uses. This
-/// fixes problems that arrise when SCEV expressions contain loop-variant
-/// values unrelated to the induction variable which are defined inside the
-/// loop. FIXME: It would be better to insert instructions in the right
-/// place so that this step isn't needed.
-void IndVarSimplify::FixUsesBeforeDefs(Loop *L, SCEVExpander &Rewriter) {
-  // Visit all the blocks in the loop in pre-order dom-tree dfs order.
-  DominatorTree *DT = &getAnalysis<DominatorTree>();
-  std::map<Instruction *, unsigned> NumPredsLeft;
-  SmallVector<DomTreeNode *, 16> Worklist;
-  Worklist.push_back(DT->getNode(L->getHeader()));
-  do {
-    DomTreeNode *Node = Worklist.pop_back_val();
-    for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I)
-      if (L->contains((*I)->getBlock()))
-        Worklist.push_back(*I);
-    BasicBlock *BB = Node->getBlock();
-    // Visit all the instructions in the block top down.
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-      // Count the number of operands that aren't properly dominating.
-      unsigned NumPreds = 0;
-      if (Rewriter.isInsertedInstruction(I) && !isa<PHINode>(I))
-        for (User::op_iterator OI = I->op_begin(), OE = I->op_end();
-             OI != OE; ++OI)
-          if (Instruction *Inst = dyn_cast<Instruction>(OI))
-            if (L->contains(Inst->getParent()) && !NumPredsLeft.count(Inst))
-              ++NumPreds;
-      NumPredsLeft[I] = NumPreds;
-      // Notify uses of the position of this instruction, and move the
-      // users (and their dependents, recursively) into place after this
-      // instruction if it is their last outstanding operand.
-      for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
-           UI != UE; ++UI) {
-        Instruction *Inst = cast<Instruction>(UI);
-        std::map<Instruction *, unsigned>::iterator Z = NumPredsLeft.find(Inst);
-        if (Z != NumPredsLeft.end() && Z->second != 0 && --Z->second == 0) {
-          SmallVector<Instruction *, 4> UseWorkList;
-          UseWorkList.push_back(Inst);
-          BasicBlock::iterator InsertPt = next(I);
-          while (isa<PHINode>(InsertPt)) ++InsertPt;
-          do {
-            Instruction *Use = UseWorkList.pop_back_val();
-            Use->moveBefore(InsertPt);
-            NumPredsLeft.erase(Use);
-            for (Value::use_iterator IUI = Use->use_begin(),
-                 IUE = Use->use_end(); IUI != IUE; ++IUI) {
-              Instruction *IUIInst = cast<Instruction>(IUI);
-              if (L->contains(IUIInst->getParent()) &&
-                  Rewriter.isInsertedInstruction(IUIInst) &&
-                  !isa<PHINode>(IUIInst))
-                UseWorkList.push_back(IUIInst);
-            }
-          } while (!UseWorkList.empty());
-        }
-      }
-    }
-  } while (!Worklist.empty());
-}
-
  /// Return true if it is OK to use SIToFPInst for an inducation variable
  /// with given inital and exit values.
  static bool useSIToFPInst(ConstantFP &InitV, ConstantFP &ExitV,
@@ -745,7 +643,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
    BinaryOperator *Incr =
      dyn_cast<BinaryOperator>(PH->getIncomingValue(BackEdge));
    if (!Incr) return;
-  if (Incr->getOpcode() != Instruction::Add) return;
+  if (Incr->getOpcode() != Instruction::FAdd) return;
    ConstantFP *IncrValue = NULL;
    unsigned IncrVIndex = 1;
    if (Incr->getOperand(1) == PH)
@@ -815,36 +713,39 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
    }
    if (NewPred == CmpInst::BAD_ICMP_PREDICATE) return;
  
+  LLVMContext &Context = PH->getContext();
+
    // Insert new integer induction variable.
    PHINode *NewPHI = PHINode::Create(Type::Int32Ty,
                                      PH->getName()+".int", PH);
-  NewPHI->addIncoming(ConstantInt::get(Type::Int32Ty, newInitValue),
+  NewPHI->addIncoming(Context.getConstantInt(Type::Int32Ty, newInitValue),
                        PH->getIncomingBlock(IncomingEdge));
  
    Value *NewAdd = BinaryOperator::CreateAdd(NewPHI,
-                                            ConstantInt::get(Type::Int32Ty,
+                                          Context.getConstantInt(Type::Int32Ty,
                                                               newIncrValue),
                                              Incr->getName()+".int", Incr);
    NewPHI->addIncoming(NewAdd, PH->getIncomingBlock(BackEdge));
  
    // The back edge is edge 1 of newPHI, whatever it may have been in the
    // original PHI.
-  ConstantInt *NewEV = ConstantInt::get(Type::Int32Ty, intEV);
+  ConstantInt *NewEV = Context.getConstantInt(Type::Int32Ty, intEV);
    Value *LHS = (EVIndex == 1 ? NewPHI->getIncomingValue(1) : NewEV);
    Value *RHS = (EVIndex == 1 ? NewEV : NewPHI->getIncomingValue(1));
-  ICmpInst *NewEC = new ICmpInst(NewPred, LHS, RHS, EC->getNameStart(),
-                                 EC->getParent()->getTerminator());
+  ICmpInst *NewEC = new ICmpInst(EC->getParent()->getTerminator(),
+                                 NewPred, LHS, RHS, EC->getNameStart());
  
    // In the following deltions, PH may become dead and may be deleted.
    // Use a WeakVH to observe whether this happens.
    WeakVH WeakPH = PH;
  
    // Delete old, floating point, exit comparision instruction.
+  NewEC->takeName(EC);
    EC->replaceAllUsesWith(NewEC);
    RecursivelyDeleteTriviallyDeadInstructions(EC);
  
    // Delete old, floating point, increment instruction.
-  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
+  Incr->replaceAllUsesWith(Context.getUndef(Incr->getType()));
    RecursivelyDeleteTriviallyDeadInstructions(Incr);
  
    // Replace floating induction variable, if it isn't already deleted.