Pass the right sign to TLI->isLegalICmpImmediate.

[oota-llvm.git] / lib / Transforms / Scalar / LoopStrengthReduce.cpp
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp

index 85f1389fe31934d5a34515672b5b8262000a0323..d57ec22f44ab512746a3e16d094ab4eeb67e135a 100644 (file)
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -77,12 +77,6 @@
  #include <algorithm>
  using namespace llvm;
  
-static cl::opt<bool> EnableNested(
-  "enable-lsr-nested", cl::Hidden, cl::desc("Enable LSR on nested loops"));
-
-static cl::opt<bool> EnableRetry(
-  "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry"));
-
  // Temporary flag to cleanup congruent phis after LSR phi expansion.
  // It's currently disabled until we can determine whether it's truly useful or
  // not. The flag should be removed after the v3.0 release.
@@ -710,8 +704,9 @@ static bool isHighCostExpansion(const SCEV *S,
          Value *UVal = U->getValue();
          for (Value::use_iterator UI = UVal->use_begin(), UE = UVal->use_end();
               UI != UE; ++UI) {
-          Instruction *User = cast<Instruction>(*UI);
-          if (User->getOpcode() == Instruction::Mul
+          // If U is a constant, it may be used by a ConstantExpr.
+          Instruction *User = dyn_cast<Instruction>(*UI);
+          if (User && User->getOpcode() == Instruction::Mul
                && SE.isSCEVable(User->getType())) {
              return SE.getSCEV(User) == Mul;
            }
@@ -824,36 +819,20 @@ void Cost::RateRegister(const SCEV *Reg,
                          const Loop *L,
                          ScalarEvolution &SE, DominatorTree &DT) {
    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
-    if (AR->getLoop() == L)
-      AddRecCost += 1; /// TODO: This should be a function of the stride.
-
      // If this is an addrec for another loop, don't second-guess its addrec phi
      // nodes. LSR isn't currently smart enough to reason about more than one
-    // loop at a time. LSR has either already run on inner loops, will not run
-    // on other loops, and cannot be expected to change sibling loops. If the
-    // AddRec exists, consider it's register free and leave it alone. Otherwise,
-    // do not consider this formula at all.
-    else if (!EnableNested || L->contains(AR->getLoop()) ||
-             (!AR->getLoop()->contains(L) &&
-              DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
+    // loop at a time. LSR has already run on inner loops, will not run on outer
+    // loops, and cannot be expected to change sibling loops.
+    if (AR->getLoop() != L) {
+      // If the AddRec exists, consider it's register free and leave it alone.
        if (isExistingPhi(AR, SE))
          return;
  
-      // For !EnableNested, never rewrite IVs in other loops.
-      if (!EnableNested) {
-        Loose();
-        return;
-      }
-      // If this isn't one of the addrecs that the loop already has, it
-      // would require a costly new phi and add. TODO: This isn't
-      // precisely modeled right now.
-      ++NumBaseAdds;
-      if (!Regs.count(AR->getStart())) {
-        RateRegister(AR->getStart(), Regs, L, SE, DT);
-        if (isLoser())
-          return;
-      }
+      // Otherwise, do not consider this formula at all.
+      Loose();
+      return;
      }
+    AddRecCost += 1; /// TODO: This should be a function of the stride.
  
      // Add the step value register, if it needs one.
      // TODO: The non-affine case isn't precisely modeled here.
@@ -1303,10 +1282,19 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM,
      // If we have low-level target information, ask the target if it can fold an
      // integer immediate on an icmp.
      if (AM.BaseOffs != 0) {
-      if (TLI) return TLI->isLegalICmpImmediate(-(uint64_t)AM.BaseOffs);
-      return false;
+      if (!TLI)
+        return false;
+      // We have one of:
+      // ICmpZero     BaseReg + Offset => ICmp BaseReg, -Offset
+      // ICmpZero -1*ScaleReg + Offset => ICmp ScaleReg, Offset
+      // Offs is the ICmp immediate.
+      int64_t Offs = AM.BaseOffs;
+      if (AM.Scale == 0)
+        Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN.
+      return TLI->isLegalICmpImmediate(Offs);
      }
  
+    // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
      return true;
  
    case LSRUse::Basic:
@@ -1318,7 +1306,7 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM,
      return AM.Scale == 0 || AM.Scale == -1;
    }
  
-  return false;
+  llvm_unreachable("Invalid LSRUse Kind!");
  }
  
  static bool isLegalUse(TargetLowering::AddrMode AM,
@@ -1573,9 +1561,11 @@ class LSRInstance {
    BasicBlock::iterator
      HoistInsertPosition(BasicBlock::iterator IP,
                          const SmallVectorImpl<Instruction *> &Inputs) const;
-  BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
-                                                     const LSRFixup &LF,
-                                                     const LSRUse &LU) const;
+  BasicBlock::iterator
+    AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+                                  const LSRFixup &LF,
+                                  const LSRUse &LU,
+                                  SCEVExpander &Rewriter) const;
  
    Value *Expand(const LSRFixup &LF,
                  const Formula &F,
@@ -2191,7 +2181,7 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
      do {
        const SCEV *S = Worklist.pop_back_val();
        if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
-        if (EnableNested || AR->getLoop() == L)
+        if (AR->getLoop() == L)
            Strides.insert(AR->getStepRecurrence(SE));
          Worklist.push_back(AR->getStart());
        } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
@@ -2461,7 +2451,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
      if (!isCompatibleIVType(PrevIV, NextIV))
        continue;
  
-    // A phi nodes terminates a chain.
+    // A phi node terminates a chain.
      if (isa<PHINode>(UserInst)
          && isa<PHINode>(IVChainVec[ChainIdx].back().UserInst))
        continue;
@@ -2482,11 +2472,15 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
        DEBUG(dbgs() << "IV Chain Limit\n");
        return;
      }
+    LastIncExpr = SE.getSCEV(NextIV);
+    // IVUsers may have skipped over sign/zero extensions. We don't currently
+    // attempt to form chains involving extensions unless they can be hoisted
+    // into this loop's AddRec.
+    if (!isa<SCEVAddRecExpr>(LastIncExpr))
+      return;
      ++NChains;
      IVChainVec.resize(NChains);
      ChainUsersVec.resize(NChains);
-    LastIncExpr = SE.getSCEV(NextIV);
-    assert(isa<SCEVAddRecExpr>(LastIncExpr) && "expect recurrence at IV user");
      DEBUG(dbgs() << "IV Head: (" << *UserInst << ") IV=" << *LastIncExpr
            << "\n");
    }
@@ -2513,13 +2507,14 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
    for (Value::use_iterator UseIter = IVOper->use_begin(),
           UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) {
      Instruction *OtherUse = dyn_cast<Instruction>(*UseIter);
+    if (!OtherUse || OtherUse == UserInst)
+      continue;
      if (SE.isSCEVable(OtherUse->getType())
          && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
          && IU.isIVUserOrOperand(OtherUse)) {
        continue;
      }
-    if (OtherUse && OtherUse != UserInst)
-      NearUsers.insert(OtherUse);
+    NearUsers.insert(OtherUse);
    }
  
    // Since this user is part of the chain, it's no longer considered a use
@@ -3980,24 +3975,29 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
      if (LU.Regs.count(*I))
        ReqRegs.insert(*I);
  
-  bool AnySatisfiedReqRegs = false;
    SmallPtrSet<const SCEV *, 16> NewRegs;
    Cost NewCost;
-retry:
    for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
         E = LU.Formulae.end(); I != E; ++I) {
      const Formula &F = *I;
  
      // Ignore formulae which do not use any of the required registers.
+    bool SatisfiedReqReg = true;
      for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
           JE = ReqRegs.end(); J != JE; ++J) {
        const SCEV *Reg = *J;
        if ((!F.ScaledReg || F.ScaledReg != Reg) &&
            std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
-          F.BaseRegs.end())
-        goto skip;
+          F.BaseRegs.end()) {
+        SatisfiedReqReg = false;
+        break;
+      }
+    }
+    if (!SatisfiedReqReg) {
+      // If none of the formulae satisfied the required registers, then we could
+      // clear ReqRegs and try again. Currently, we simply give up in this case.
+      continue;
      }
-    AnySatisfiedReqRegs = true;
  
      // Evaluate the cost of the current formula. If it's already worse than
      // the current best, prune the search at that point.
@@ -4024,18 +4024,6 @@ retry:
        }
        Workspace.pop_back();
      }
-  skip:;
-  }
-
-  if (!EnableRetry && !AnySatisfiedReqRegs)
-    return;
-
-  // If none of the formulae had all of the required registers, relax the
-  // constraint so that we don't exclude all formulae.
-  if (!AnySatisfiedReqRegs) {
-    assert(!ReqRegs.empty() && "Solver failed even without required registers");
-    ReqRegs.clear();
-    goto retry;
    }
  }
  
@@ -4131,9 +4119,10 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
  /// AdjustInsertPositionForExpand - Determine an input position which will be
  /// dominated by the operands and which will dominate the result.
  BasicBlock::iterator
-LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
                                             const LSRFixup &LF,
-                                           const LSRUse &LU) const {
+                                           const LSRUse &LU,
+                                           SCEVExpander &Rewriter) const {
    // Collect some instructions which must be dominated by the
    // expanding replacement. These must be dominated by any operands that
    // will be required in the expansion.
@@ -4168,9 +4157,13 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
      }
    }
  
+  assert(!isa<PHINode>(LowestIP) && !isa<LandingPadInst>(LowestIP)
+         && !isa<DbgInfoIntrinsic>(LowestIP) &&
+         "Insertion point must be a normal instruction");
+
    // Then, climb up the immediate dominator tree as far as we can go while
    // still being dominated by the input positions.
-  IP = HoistInsertPosition(IP, Inputs);
+  BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);
  
    // Don't insert instructions before PHI nodes.
    while (isa<PHINode>(IP)) ++IP;
@@ -4181,6 +4174,11 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
    // Ignore debug intrinsics.
    while (isa<DbgInfoIntrinsic>(IP)) ++IP;
  
+  // Set IP below instructions recently inserted by SCEVExpander. This keeps the
+  // IP consistent across expansions and allows the previously inserted
+  // instructions to be reused by subsequent expansion.
+  while (Rewriter.isInsertedInstruction(IP) && IP != LowestIP) ++IP;
+
    return IP;
  }
  
@@ -4195,7 +4193,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
  
    // Determine an input position which will be dominated by the operands and
    // which will dominate the result.
-  IP = AdjustInsertPositionForExpand(IP, LF, LU);
+  IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
  
    // Inform the Rewriter if we have a post-increment use, so that it can
    // perform an advantageous expansion.
@@ -4518,15 +4516,26 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
    if (!L->isLoopSimplifyForm())
      return;
  
-  // All outer loops must have preheaders, or SCEVExpander may not be able to
-  // materialize an AddRecExpr whose Start is an outer AddRecExpr.
-  for (const Loop *OuterLoop = L; (OuterLoop = OuterLoop->getParentLoop());) {
-    if (!OuterLoop->getLoopPreheader())
-      return;
-  }
    // If there's no interesting work to be done, bail early.
    if (IU.empty()) return;
  
+#ifndef NDEBUG
+  // All dominating loops must have preheaders, or SCEVExpander may not be able
+  // to materialize an AddRecExpr whose Start is an outer AddRecExpr.
+  //
+  // IVUsers analysis should only create users that are dominated by simple loop
+  // headers. Since this loop should dominate all of its users, its user list
+  // should be empty if this loop itself is not within a simple loop nest.
+  for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader());
+       Rung; Rung = Rung->getIDom()) {
+    BasicBlock *BB = Rung->getBlock();
+    const Loop *DomLoop = LI.getLoopFor(BB);
+    if (DomLoop && DomLoop->getHeader() == BB) {
+      assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest");
+    }
+  }
+#endif // DEBUG
+
    DEBUG(dbgs() << "\nLSR on loop ";
          WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
          dbgs() << ":\n");
@@ -4539,7 +4548,7 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
    if (IU.empty()) return;
  
    // Skip nested loops until we can model them better with formulae.
-  if (!EnableNested && !L->empty()) {
+  if (!L->empty()) {
      DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
      return;
    }