This test case:

[oota-llvm.git] / lib / Transforms / Scalar / LoopStrengthReduce.cpp
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp

index cf3d702d633f25650a3889214e8b96dc73151cf5..14274296de7da2bebf3a69d6370dcf64203a639b 100644 (file)
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -198,7 +198,7 @@ struct Formula {
  
  }
  
-/// DoInitialMatch - Recurrsion helper for InitialMatch.
+/// DoInitialMatch - Recursion helper for InitialMatch.
  static void DoInitialMatch(const SCEV *S, Loop *L,
                             SmallVectorImpl<const SCEV *> &Good,
                             SmallVectorImpl<const SCEV *> &Bad,
@@ -912,7 +912,7 @@ public:
                                        MaxOffset(INT64_MIN),
                                        AllFixupsOutsideLoop(true) {}
  
-  bool InsertFormula(size_t LUIdx, const Formula &F);
+  bool InsertFormula(const Formula &F);
  
    void check() const;
  
@@ -922,7 +922,7 @@ public:
  
  /// InsertFormula - If the given formula has not yet been inserted, add it to
  /// the list, and return true. Return false otherwise.
-bool LSRUse::InsertFormula(size_t LUIdx, const Formula &F) {
+bool LSRUse::InsertFormula(const Formula &F) {
    SmallVector<const SCEV *, 2> Key = F.BaseRegs;
    if (F.ScaledReg) Key.push_back(F.ScaledReg);
    // Unstable sort by host order ok, because this is only used for uniquifying.
@@ -1057,8 +1057,7 @@ static bool isAlwaysFoldable(int64_t BaseOffs,
                               GlobalValue *BaseGV,
                               bool HasBaseReg,
                               LSRUse::KindType Kind, const Type *AccessTy,
-                             const TargetLowering *TLI,
-                             ScalarEvolution &SE) {
+                             const TargetLowering *TLI) {
    // Fast-path: zero is always foldable.
    if (BaseOffs == 0 && !BaseGV) return true;
  
@@ -1186,7 +1185,7 @@ class LSRInstance {
                                      const Type *AccessTy);
  
  public:
-  void InsertInitialFormula(const SCEV *S, Loop *L, LSRUse &LU, size_t LUIdx);
+  void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
    void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
    void CountRegisters(const Formula &F, size_t LUIdx);
    bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
@@ -1217,23 +1216,18 @@ public:
  
    Value *Expand(const LSRFixup &LF,
                  const Formula &F,
-                BasicBlock::iterator IP, Loop *L, Instruction *IVIncInsertPos,
+                BasicBlock::iterator IP,
                  SCEVExpander &Rewriter,
-                SmallVectorImpl<WeakVH> &DeadInsts,
-                ScalarEvolution &SE, DominatorTree &DT) const;
+                SmallVectorImpl<WeakVH> &DeadInsts) const;
    void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
                       const Formula &F,
-                     Loop *L, Instruction *IVIncInsertPos,
                       SCEVExpander &Rewriter,
                       SmallVectorImpl<WeakVH> &DeadInsts,
-                     ScalarEvolution &SE, DominatorTree &DT,
                       Pass *P) const;
    void Rewrite(const LSRFixup &LF,
                 const Formula &F,
-               Loop *L, Instruction *IVIncInsertPos,
                 SCEVExpander &Rewriter,
                 SmallVectorImpl<WeakVH> &DeadInsts,
-               ScalarEvolution &SE, DominatorTree &DT,
                 Pass *P) const;
    void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
                           Pass *P);
@@ -1252,7 +1246,7 @@ public:
  }
  
  /// OptimizeShadowIV - If IV is used in a int-to-float cast
-/// inside the loop then try to eliminate the cast opeation.
+/// inside the loop then try to eliminate the cast operation.
  void LSRInstance::OptimizeShadowIV() {
    const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
    if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
@@ -1655,12 +1649,12 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
    // Conservatively assume HasBaseReg is true for now.
    if (NewOffset < LU.MinOffset) {
      if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, /*HasBaseReg=*/true,
-                          Kind, AccessTy, TLI, SE))
+                          Kind, AccessTy, TLI))
        return false;
      NewMinOffset = NewOffset;
    } else if (NewOffset > LU.MaxOffset) {
      if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, /*HasBaseReg=*/true,
-                          Kind, AccessTy, TLI, SE))
+                          Kind, AccessTy, TLI))
        return false;
      NewMaxOffset = NewOffset;
    }
@@ -1679,7 +1673,7 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
  
  /// getUse - Return an LSRUse index and an offset value for a fixup which
  /// needs the given expression, with the given kind and optional access type.
-/// Either reuse an exisitng use or create a new one, as needed.
+/// Either reuse an existing use or create a new one, as needed.
  std::pair<size_t, int64_t>
  LSRInstance::getUse(const SCEV *&Expr,
                      LSRUse::KindType Kind, const Type *AccessTy) {
@@ -1687,8 +1681,7 @@ LSRInstance::getUse(const SCEV *&Expr,
    int64_t Offset = ExtractImmediate(Expr, SE);
  
    // Basic uses can't accept any offset, for example.
-  if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true,
-                        Kind, AccessTy, TLI, SE)) {
+  if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
      Expr = Copy;
      Offset = 0;
    }
@@ -1761,7 +1754,8 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
          if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
            Factors.insert(Factor->getValue()->getValue().getSExtValue());
        } else if (const SCEVConstant *Factor =
-                   dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride, NewStride,
+                   dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
+                                                               NewStride,
                                                                 SE, true))) {
          if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
            Factors.insert(Factor->getValue()->getValue().getSExtValue());
@@ -1834,7 +1828,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
  
      // If this is the first use of this LSRUse, give it a formula.
      if (LU.Formulae.empty()) {
-      InsertInitialFormula(S, L, LU, LF.LUIdx);
+      InsertInitialFormula(S, LU, LF.LUIdx);
        CountRegisters(LU.Formulae.back(), LF.LUIdx);
      }
    }
@@ -1843,8 +1837,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
  }
  
  void
-LSRInstance::InsertInitialFormula(const SCEV *S, Loop *L,
-                                  LSRUse &LU, size_t LUIdx) {
+LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
    Formula F;
    F.InitialMatch(S, L, SE, DT);
    bool Inserted = InsertFormula(LU, LUIdx, F);
@@ -1874,7 +1867,7 @@ void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
  /// InsertFormula - If the given formula has not yet been inserted, add it to
  /// the list, and return true. Return false otherwise.
  bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
-  if (!LU.InsertFormula(LUIdx, F))
+  if (!LU.InsertFormula(F))
      return false;
  
    CountRegisters(F, LUIdx);
@@ -2042,7 +2035,7 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
  /// loop-dominating registers added into a single register.
  void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
                                         Formula Base) {
-  // This method is only intersting on a plurality of registers.
+  // This method is only interesting on a plurality of registers.
    if (Base.BaseRegs.size() <= 1) return;
  
    Formula F = Base;
@@ -2061,7 +2054,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
      const SCEV *Sum = SE.getAddExpr(Ops);
      // TODO: If Sum is zero, it probably means ScalarEvolution missed an
      // opportunity to fold something. For now, just ignore such cases
-    // rather than procede with zero in a register.
+    // rather than proceed with zero in a register.
      if (!Sum->isZero()) {
        F.BaseRegs.push_back(Sum);
        (void)InsertFormula(LU, LUIdx, F);
@@ -2408,7 +2401,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
      const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
      unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
  
-    // TODO: Use a more targetted data structure.
+    // TODO: Use a more targeted data structure.
      for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
        Formula F = LU.Formulae[L];
        // Use the immediate in the scaled register.
@@ -2576,9 +2569,9 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
          });
  }
  
-/// NarrowSearchSpaceUsingHeuristics - If there are an extrordinary number of
+/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
  /// formulae to choose from, use some rough heuristics to prune down the number
-/// of formulae. This keeps the main solver from taking an extrordinary amount
+/// of formulae. This keeps the main solver from taking an extraordinary amount
  /// of time in some worst-case scenarios.
  void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
    // This is a rough guess that seems to work fairly well.
@@ -2628,7 +2621,7 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
      }
  
      DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
-                 << " will yeild profitable reuse.\n");
+                 << " will yield profitable reuse.\n");
      Taken.insert(Best);
  
      // In any use with formulae which references this register, delete formulae
@@ -2675,7 +2668,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
    //    - sort the formula so that the most profitable solutions are found first
    //    - sort the uses too
    //  - search faster:
-  //    - dont compute a cost, and then compare. compare while computing a cost
+  //    - don't compute a cost, and then compare. compare while computing a cost
    //      and bail early.
    //    - track register sets with SmallBitVector
  
@@ -2786,10 +2779,8 @@ static BasicBlock *getImmediateDominator(BasicBlock *BB, DominatorTree &DT) {
  Value *LSRInstance::Expand(const LSRFixup &LF,
                             const Formula &F,
                             BasicBlock::iterator IP,
-                           Loop *L, Instruction *IVIncInsertPos,
                             SCEVExpander &Rewriter,
-                           SmallVectorImpl<WeakVH> &DeadInsts,
-                           ScalarEvolution &SE, DominatorTree &DT) const {
+                           SmallVectorImpl<WeakVH> &DeadInsts) const {
    const LSRUse &LU = Uses[LF.LUIdx];
  
    // Then, collect some instructions which we will remain dominated by when
@@ -2802,8 +2793,12 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
      if (Instruction *I =
            dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
        Inputs.push_back(I);
-  if (LF.PostIncLoop && !L->contains(LF.UserInst))
-    Inputs.push_back(L->getLoopLatch()->getTerminator());
+  if (LF.PostIncLoop) {
+    if (!L->contains(LF.UserInst))
+      Inputs.push_back(L->getLoopLatch()->getTerminator());
+    else
+      Inputs.push_back(IVIncInsertPos);
+  }
  
    // Then, climb up the immediate dominator tree as far as we can go while
    // still being dominated by the input positions.
@@ -2866,8 +2861,10 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
        if (AR->getLoop() == LF.PostIncLoop) {
          Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE));
          // If the user is inside the loop, insert the code after the increment
-        // so that it is dominated by its operand.
-        if (L->contains(LF.UserInst))
+        // so that it is dominated by its operand. If the original insert point
+        // was already dominated by the increment, keep it, because there may
+        // be loop-variant operands that need to be respected also.
+        if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP))
            IP = IVIncInsertPos;
          break;
        }
@@ -2977,10 +2974,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
  void LSRInstance::RewriteForPHI(PHINode *PN,
                                  const LSRFixup &LF,
                                  const Formula &F,
-                                Loop *L, Instruction *IVIncInsertPos,
                                  SCEVExpander &Rewriter,
                                  SmallVectorImpl<WeakVH> &DeadInsts,
-                                ScalarEvolution &SE, DominatorTree &DT,
                                  Pass *P) const {
    DenseMap<BasicBlock *, Value *> Inserted;
    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
@@ -3014,8 +3009,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
        if (!Pair.second)
          PN->setIncomingValue(i, Pair.first->second);
        else {
-        Value *FullV = Expand(LF, F, BB->getTerminator(), L, IVIncInsertPos,
-                              Rewriter, DeadInsts, SE, DT);
+        Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
  
          // If this is reuse-by-noop-cast, insert the noop cast.
          const Type *OpTy = LF.OperandValToReplace->getType();
@@ -3037,18 +3031,15 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
  /// the newly expanded value.
  void LSRInstance::Rewrite(const LSRFixup &LF,
                            const Formula &F,
-                          Loop *L, Instruction *IVIncInsertPos,
                            SCEVExpander &Rewriter,
                            SmallVectorImpl<WeakVH> &DeadInsts,
-                          ScalarEvolution &SE, DominatorTree &DT,
                            Pass *P) const {
    // First, find an insertion point that dominates UserInst. For PHI nodes,
    // find the nearest block which dominates all the relevant uses.
    if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
-    RewriteForPHI(PN, LF, F, L, IVIncInsertPos, Rewriter, DeadInsts, SE, DT, P);
+    RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P);
    } else {
-    Value *FullV = Expand(LF, F, LF.UserInst, L, IVIncInsertPos,
-                          Rewriter, DeadInsts, SE, DT);
+    Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
  
      // If this is reuse-by-noop-cast, insert the noop cast.
      const Type *OpTy = LF.OperandValToReplace->getType();
@@ -3088,8 +3079,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
    for (size_t i = 0, e = Fixups.size(); i != e; ++i) {
      size_t LUIdx = Fixups[i].LUIdx;
  
-    Rewrite(Fixups[i], *Solution[LUIdx], L, IVIncInsertPos, Rewriter,
-            DeadInsts, SE, DT, P);
+    Rewrite(Fixups[i], *Solution[LUIdx], Rewriter, DeadInsts, P);
  
      Changed = true;
    }
@@ -3118,7 +3108,7 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
          dbgs() << ":\n");
  
    /// OptimizeShadowIV - If IV is used in a int-to-float cast
-  /// inside the loop then try to eliminate the cast opeation.
+  /// inside the loop then try to eliminate the cast operation.
    OptimizeShadowIV();
  
    // Change loop terminating condition to use the postinc iv when possible.