Switch the SCEV expander and LoopStrengthReduce to use

author Chandler Carruth <chandlerc@gmail.com>

Mon, 7 Jan 2013 14:41:08 +0000 (14:41 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Mon, 7 Jan 2013 14:41:08 +0000 (14:41 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Mon, 7 Jan 2013 14:41:08 +0000 (14:41 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Mon, 7 Jan 2013 14:41:08 +0000 (14:41 +0000)
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h

index 9ee3a97bc70df3a138f85373e64a0723c3a81b15..ea45aff09be6b41569508daf4c1b2595f4948a7a 100644 (file)
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -22,7 +22,7 @@
  #include <set>
  
  namespace llvm {
-  class TargetLowering;
+  class TargetTransformInfo;
  
    /// Return true if the given expression is safe to expand in the sense that
    /// all materialized values are safe to speculate.
@@ -129,7 +129,7 @@ namespace llvm {
      /// representative. Return the number of phis eliminated.
      unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT,
                                   SmallVectorImpl<WeakVH> &DeadInsts,
-                                 const TargetLowering *TLI = NULL);
+                                 const TargetTransformInfo *TTI = NULL);
  
      /// expandCodeFor - Insert code to directly compute the specified SCEV
      /// expression into the program.  The inserted code is inserted into the
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h

index a5d8eed7462205a4129c250636ad0e8d5fe64910..d465127ba0a29a4fe92f31a591118872d42534da 100644 (file)
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -115,11 +115,9 @@ Pass *createLICMPass();
  //===----------------------------------------------------------------------===//
  //
  // LoopStrengthReduce - This pass is strength reduces GEP instructions that use
-// a loop's canonical induction variable as one of their indices.  It takes an
-// optional parameter used to consult the target machine whether certain
-// transformations are profitable.
+// a loop's canonical induction variable as one of their indices.
  //
-Pass *createLoopStrengthReducePass(const TargetLowering *TLI = 0);
+Pass *createLoopStrengthReducePass();
  
  Pass *createGlobalMergePass(const TargetLowering *TLI = 0);
  
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp

index 5f0ba2e0b85576f4ba1d02bbe502e6661883c8c5..b87ad75389775473ef5cdd1659159eee0fc1cb06 100644 (file)
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -16,11 +16,11 @@
  #include "llvm/Analysis/ScalarEvolutionExpander.h"
  #include "llvm/ADT/STLExtras.h"
  #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
  #include "llvm/IR/DataLayout.h"
  #include "llvm/IR/IntrinsicInst.h"
  #include "llvm/IR/LLVMContext.h"
  #include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLowering.h"
  
  using namespace llvm;
  
@@ -1600,14 +1600,14 @@ static bool width_descending(Value *lhs, Value *rhs) {
  /// the same context that SCEVExpander is used.
  unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
                                             SmallVectorImpl<WeakVH> &DeadInsts,
-                                           const TargetLowering *TLI) {
+                                           const TargetTransformInfo *TTI) {
    // Find integer phis in order of increasing width.
    SmallVector<PHINode*, 8> Phis;
    for (BasicBlock::iterator I = L->getHeader()->begin();
         PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
      Phis.push_back(Phi);
    }
-  if (TLI)
+  if (TTI)
      std::sort(Phis.begin(), Phis.end(), width_descending);
  
    unsigned NumElim = 0;
@@ -1635,8 +1635,8 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
      PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
      if (!OrigPhiRef) {
        OrigPhiRef = Phi;
-      if (Phi->getType()->isIntegerTy() && TLI
-          && TLI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
+      if (Phi->getType()->isIntegerTy() && TTI
+          && TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
          // This phi can be freely truncated to the narrowest phi type. Map the
          // truncated expression to it so it will be reused for narrow types.
          const SCEV *TruncExpr =
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp

index 029937397e1326bdedf76479c0ad0b993fb93cdc..63f495a430526107fa4a849d1753332f218a0953 100644 (file)
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -179,7 +179,9 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
  
    bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
                               bool HasBaseReg, int64_t Scale) const {
-    return false;
+    // Guess that reg+reg addressing is allowed. This heuristic is taken from
+    // the implementation of LSR.
+    return !BaseGV && BaseOffset == 0 && Scale <= 1;
    }
  
    bool isTruncateFree(Type *Ty1, Type *Ty2) const {
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp

index 02ac836337aa9ebbf07b34ebfd7e134487af49c8..2a135bcc1e1fcb07c5c5e1da350f40e231504546 100644 (file)
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -362,7 +362,7 @@ void TargetPassConfig::addIRPasses() {
  
    // Run loop strength reduction before anything else.
    if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
-    addPass(createLoopStrengthReducePass(getTargetLowering()));
+    addPass(createLoopStrengthReducePass());
      if (PrintLSR)
        addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
    }
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp

index 70ccc5431e3895ebce18ac42bab1aefb4e1c7e4d..c1b881fc1c461ae0b74e6650bd9ccc4e357891ce 100644 (file)
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -37,8 +37,8 @@
  //
  // TODO: Handle multiple loops at a time.
  //
-// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
-//       instead of a GlobalValue?
+// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
+//       of a GlobalValue?
  //
  // TODO: When truncation is free, truncate ICmp users' operands to make it a
  //       smaller encoding (on x86 at least).
@@ -63,6 +63,7 @@
  #include "llvm/Analysis/IVUsers.h"
  #include "llvm/Analysis/LoopPass.h"
  #include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
  #include "llvm/Assembly/Writer.h"
  #include "llvm/IR/Constants.h"
  #include "llvm/IR/DerivedTypes.h"
@@ -72,7 +73,6 @@
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/ValueHandle.h"
  #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
  #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  #include "llvm/Transforms/Utils/Local.h"
  #include <algorithm>
@@ -1270,46 +1270,42 @@ void LSRUse::dump() const {
  /// isLegalUse - Test whether the use described by AM is "legal", meaning it can
  /// be completely folded into the user instruction at isel time. This includes
  /// address-mode folding and special icmp tricks.
-static bool isLegalUse(const AddrMode &AM,
-                       LSRUse::KindType Kind, Type *AccessTy,
-                       const TargetLowering *TLI) {
+static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
+                       Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset,
+                       bool HasBaseReg, int64_t Scale) {
    switch (Kind) {
    case LSRUse::Address:
-    // If we have low-level target information, ask the target if it can
-    // completely fold this address.
-    if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
+    return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
  
      // Otherwise, just guess that reg+reg addressing is legal.
-    return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
+    //return ;
  
    case LSRUse::ICmpZero:
      // There's not even a target hook for querying whether it would be legal to
      // fold a GV into an ICmp.
-    if (AM.BaseGV)
+    if (BaseGV)
        return false;
  
      // ICmp only has two operands; don't allow more than two non-trivial parts.
-    if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0)
+    if (Scale != 0 && HasBaseReg && BaseOffset != 0)
        return false;
  
      // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
      // putting the scaled register in the other operand of the icmp.
-    if (AM.Scale != 0 && AM.Scale != -1)
+    if (Scale != 0 && Scale != -1)
        return false;
  
      // If we have low-level target information, ask the target if it can fold an
      // integer immediate on an icmp.
-    if (AM.BaseOffs != 0) {
-      if (!TLI)
-        return false;
+    if (BaseOffset != 0) {
        // We have one of:
-      // ICmpZero     BaseReg + Offset => ICmp BaseReg, -Offset
-      // ICmpZero -1*ScaleReg + Offset => ICmp ScaleReg, Offset
+      // ICmpZero     BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
+      // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
        // Offs is the ICmp immediate.
-      int64_t Offs = AM.BaseOffs;
-      if (AM.Scale == 0)
-        Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN.
-      return TLI->isLegalICmpImmediate(Offs);
+      if (Scale == 0)
+        // The cast does the right thing with INT64_MIN.
+        BaseOffset = -(uint64_t)BaseOffset;
+      return TTI.isLegalICmpImmediate(BaseOffset);
      }
  
      // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
@@ -1317,92 +1313,87 @@ static bool isLegalUse(const AddrMode &AM,
  
    case LSRUse::Basic:
      // Only handle single-register values.
-    return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
+    return !BaseGV && Scale == 0 && BaseOffset == 0;
  
    case LSRUse::Special:
      // Special case Basic to handle -1 scales.
-    return !AM.BaseGV && (AM.Scale == 0 || AM.Scale == -1) && AM.BaseOffs == 0;
+    return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
    }
  
    llvm_unreachable("Invalid LSRUse Kind!");
  }
  
-static bool isLegalUse(AddrMode AM,
-                       int64_t MinOffset, int64_t MaxOffset,
-                       LSRUse::KindType Kind, Type *AccessTy,
-                       const TargetLowering *TLI) {
+static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
+                       int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
+                       GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
+                       int64_t Scale) {
    // Check for overflow.
-  if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
+  if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
        (MinOffset > 0))
      return false;
-  AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
-  if (isLegalUse(AM, Kind, AccessTy, TLI)) {
-    AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
-    // Check for overflow.
-    if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
-        (MaxOffset > 0))
-      return false;
-    AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
-    return isLegalUse(AM, Kind, AccessTy, TLI);
-  }
-  return false;
+  MinOffset = (uint64_t)BaseOffset + MinOffset;
+  if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
+      (MaxOffset > 0))
+    return false;
+  MaxOffset = (uint64_t)BaseOffset + MaxOffset;
+
+  return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg,
+                    Scale) &&
+         isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale);
  }
  
-static bool isAlwaysFoldable(int64_t BaseOffs,
-                             GlobalValue *BaseGV,
-                             bool HasBaseReg,
+static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
+                       int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
+                       const Formula &F) {
+  return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.AM.BaseGV,
+                    F.AM.BaseOffs, F.AM.HasBaseReg, F.AM.Scale);
+}
+
+static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
                               LSRUse::KindType Kind, Type *AccessTy,
-                             const TargetLowering *TLI) {
+                             GlobalValue *BaseGV, int64_t BaseOffset,
+                             bool HasBaseReg) {
    // Fast-path: zero is always foldable.
-  if (BaseOffs == 0 && !BaseGV) return true;
+  if (BaseOffset == 0 && !BaseGV) return true;
  
    // Conservatively, create an address with an immediate and a
    // base and a scale.
-  AddrMode AM;
-  AM.BaseOffs = BaseOffs;
-  AM.BaseGV = BaseGV;
-  AM.HasBaseReg = HasBaseReg;
-  AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+  int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
  
    // Canonicalize a scale of 1 to a base register if the formula doesn't
    // already have a base register.
-  if (!AM.HasBaseReg && AM.Scale == 1) {
-    AM.Scale = 0;
-    AM.HasBaseReg = true;
+  if (!HasBaseReg && Scale == 1) {
+    Scale = 0;
+    HasBaseReg = true;
    }
  
-  return isLegalUse(AM, Kind, AccessTy, TLI);
+  return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
  }
  
-static bool isAlwaysFoldable(const SCEV *S,
-                             int64_t MinOffset, int64_t MaxOffset,
-                             bool HasBaseReg,
-                             LSRUse::KindType Kind, Type *AccessTy,
-                             const TargetLowering *TLI,
-                             ScalarEvolution &SE) {
+static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
+                             ScalarEvolution &SE, int64_t MinOffset,
+                             int64_t MaxOffset, LSRUse::KindType Kind,
+                             Type *AccessTy, const SCEV *S, bool HasBaseReg) {
    // Fast-path: zero is always foldable.
    if (S->isZero()) return true;
  
    // Conservatively, create an address with an immediate and a
    // base and a scale.
-  int64_t BaseOffs = ExtractImmediate(S, SE);
+  int64_t BaseOffset = ExtractImmediate(S, SE);
    GlobalValue *BaseGV = ExtractSymbol(S, SE);
  
    // If there's anything else involved, it's not foldable.
    if (!S->isZero()) return false;
  
    // Fast-path: zero is always foldable.
-  if (BaseOffs == 0 && !BaseGV) return true;
+  if (BaseOffset == 0 && !BaseGV) return true;
  
    // Conservatively, create an address with an immediate and a
    // base and a scale.
-  AddrMode AM;
-  AM.BaseOffs = BaseOffs;
-  AM.BaseGV = BaseGV;
-  AM.HasBaseReg = HasBaseReg;
-  AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+  int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
  
-  return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
+  return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
+                    BaseOffset, HasBaseReg, Scale);
  }
  
  namespace {
@@ -1502,7 +1493,7 @@ class LSRInstance {
    ScalarEvolution &SE;
    DominatorTree &DT;
    LoopInfo &LI;
-  const TargetLowering *const TLI;
+  const TargetTransformInfo &TTI;
    Loop *const L;
    bool Changed;
  
@@ -1638,7 +1629,7 @@ class LSRInstance {
                           Pass *P);
  
  public:
-  LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
+  LSRInstance(Loop *L, Pass *P);
  
    bool getChanged() const { return Changed; }
  
@@ -1688,12 +1679,9 @@ void LSRInstance::OptimizeShadowIV() {
      }
      if (!DestTy) continue;
  
-    if (TLI) {
-      // If target does not support DestTy natively then do not apply
-      // this transformation.
-      EVT DVT = TLI->getValueType(DestTy);
-      if (!TLI->isTypeLegal(DVT)) continue;
-    }
+    // If target does not support DestTy natively then do not apply
+    // this transformation.
+    if (!TTI.isTypeLegal(DestTy)) continue;
  
      PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
      if (!PH) continue;
@@ -2015,18 +2003,17 @@ LSRInstance::OptimizeLoopTermCond() {
              if (C->getValue().getMinSignedBits() >= 64 ||
                  C->getValue().isMinSignedValue())
                goto decline_post_inc;
-            // Without TLI, assume that any stride might be valid, and so any
-            // use might be shared.
-            if (!TLI)
-              goto decline_post_inc;
              // Check for possible scaled-address reuse.
              Type *AccessTy = getAccessType(UI->getUser());
-            AddrMode AM;
-            AM.Scale = C->getSExtValue();
-            if (TLI->isLegalAddressingMode(AM, AccessTy))
+            int64_t Scale = C->getSExtValue();
+            if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+                                          /*BaseOffset=*/ 0,
+                                          /*HasBaseReg=*/ false, Scale))
                goto decline_post_inc;
-            AM.Scale = -AM.Scale;
-            if (TLI->isLegalAddressingMode(AM, AccessTy))
+            Scale = -Scale;
+            if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+                                          /*BaseOffset=*/ 0,
+                                          /*HasBaseReg=*/ false, Scale))
                goto decline_post_inc;
            }
          }
@@ -2096,13 +2083,13 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
      return false;
    // Conservatively assume HasBaseReg is true for now.
    if (NewOffset < LU.MinOffset) {
-    if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg,
-                          Kind, AccessTy, TLI))
+    if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+                          LU.MaxOffset - NewOffset, HasBaseReg))
        return false;
      NewMinOffset = NewOffset;
    } else if (NewOffset > LU.MaxOffset) {
-    if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg,
-                          Kind, AccessTy, TLI))
+    if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+                          NewOffset - LU.MinOffset, HasBaseReg))
        return false;
      NewMaxOffset = NewOffset;
    }
@@ -2131,7 +2118,8 @@ LSRInstance::getUse(const SCEV *&Expr,
    int64_t Offset = ExtractImmediate(Expr, SE);
  
    // Basic uses can't accept any offset, for example.
-  if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
+  if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+                        Offset, /*HasBaseReg=*/ true)) {
      Expr = Copy;
      Offset = 0;
    }
@@ -2396,7 +2384,7 @@ bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
  /// TODO: Consider IVInc free if it's already used in another chains.
  static bool
  isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
-                  ScalarEvolution &SE, const TargetLowering *TLI) {
+                  ScalarEvolution &SE, const TargetTransformInfo &TTI) {
    if (StressIVChain)
      return true;
  
@@ -2654,7 +2642,7 @@ void LSRInstance::CollectChains() {
    for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
         UsersIdx < NChains; ++UsersIdx) {
      if (!isProfitableChain(IVChainVec[UsersIdx],
-                           ChainUsersVec[UsersIdx].FarUsers, SE, TLI))
+                           ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
        continue;
      // Preserve the chain at UsesIdx.
      if (ChainIdx != UsersIdx)
@@ -2681,7 +2669,7 @@ void LSRInstance::FinalizeChain(IVChain &Chain) {
  
  /// Return true if the IVInc can be folded into an addressing mode.
  static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
-                             Value *Operand, const TargetLowering *TLI) {
+                             Value *Operand, const TargetTransformInfo &TTI) {
    const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
    if (!IncConst || !isAddressUse(UserInst, Operand))
      return false;
@@ -2690,8 +2678,9 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
      return false;
  
    int64_t IncOffset = IncConst->getValue()->getSExtValue();
-  if (!isAlwaysFoldable(IncOffset, /*BaseGV=*/0, /*HaseBaseReg=*/false,
-                       LSRUse::Address, getAccessType(UserInst), TLI))
+  if (!isAlwaysFoldable(TTI, LSRUse::Address,
+                        getAccessType(UserInst), /*BaseGV=*/ 0,
+                        IncOffset, /*HaseBaseReg=*/ false))
      return false;
  
    return true;
@@ -2762,7 +2751,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
  
        // If an IV increment can't be folded, use it as the next IV value.
        if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
-                            TLI)) {
+                            TTI)) {
          assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
          IVSrc = IVOper;
          LeftOverExpr = 0;
@@ -3106,9 +3095,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
  
        // Don't pull a constant into a register if the constant could be folded
        // into an immediate field.
-      if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
-                           Base.getNumRegs() > 1,
-                           LU.Kind, LU.AccessTy, TLI, SE))
+      if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+                           LU.AccessTy, *J, Base.getNumRegs() > 1))
          continue;
  
        // Collect all operands except *J.
@@ -3120,9 +3108,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
        // Don't leave just a constant behind in a register if the constant could
        // be folded into an immediate field.
        if (InnerAddOps.size() == 1 &&
-          isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
-                           Base.getNumRegs() > 1,
-                           LU.Kind, LU.AccessTy, TLI, SE))
+          isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+                           LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
          continue;
  
        const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
@@ -3132,10 +3119,10 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
  
        // Add the remaining pieces of the add back into the new formula.
        const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
-      if (TLI && InnerSumSC &&
+      if (InnerSumSC &&
            SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
-          TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
-                                   InnerSumSC->getValue()->getZExtValue())) {
+          TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+                                  InnerSumSC->getValue()->getZExtValue())) {
          F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
                             InnerSumSC->getValue()->getZExtValue();
          F.BaseRegs.erase(F.BaseRegs.begin() + i);
@@ -3144,9 +3131,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
  
        // Add J as its own register, or an unfolded immediate.
        const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
-      if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
-          TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
-                                   SC->getValue()->getZExtValue()))
+      if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
+          TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+                                  SC->getValue()->getZExtValue()))
          F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
                             SC->getValue()->getZExtValue();
        else
@@ -3204,8 +3191,7 @@ void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
        continue;
      Formula F = Base;
      F.AM.BaseGV = GV;
-    if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
-                    LU.Kind, LU.AccessTy, TLI))
+    if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
        continue;
      F.BaseRegs[i] = G;
      (void)InsertFormula(LU, LUIdx, F);
@@ -3229,8 +3215,8 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
           E = Worklist.end(); I != E; ++I) {
        Formula F = Base;
        F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
-      if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
-                     LU.Kind, LU.AccessTy, TLI)) {
+      if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
+                     LU.AccessTy, F)) {
          // Add the offset to the base register.
          const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
          // If it cancelled out, drop the base register, otherwise update it.
@@ -3249,8 +3235,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
        continue;
      Formula F = Base;
      F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
-    if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
-                    LU.Kind, LU.AccessTy, TLI))
+    if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
        continue;
      F.BaseRegs[i] = G;
      (void)InsertFormula(LU, LUIdx, F);
@@ -3297,7 +3282,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
      F.AM.BaseOffs = NewBaseOffs;
  
      // Check that this scale is legal.
-    if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
+    if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
        continue;
  
      // Compensate for the use having MinOffset built into it.
@@ -3352,13 +3337,13 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
      Base.AM.Scale = Factor;
      Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
      // Check whether this scale is going to be legal.
-    if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
-                    LU.Kind, LU.AccessTy, TLI)) {
+    if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+                    Base)) {
        // As a special-case, handle special out-of-loop Basic users specially.
        // TODO: Reconsider this special case.
        if (LU.Kind == LSRUse::Basic &&
-          isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
-                     LSRUse::Special, LU.AccessTy, TLI) &&
+          isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
+                     LU.AccessTy, Base) &&
            LU.AllFixupsOutsideLoop)
          LU.Kind = LSRUse::Special;
        else
@@ -3391,9 +3376,6 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
  
  /// GenerateTruncates - Generate reuse formulae from different IV types.
  void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
-  // This requires TargetLowering to tell us which truncates are free.
-  if (!TLI) return;
-
    // Don't bother truncating symbolic values.
    if (Base.AM.BaseGV) return;
  
@@ -3405,7 +3387,7 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
    for (SmallSetVector<Type *, 4>::const_iterator
         I = Types.begin(), E = Types.end(); I != E; ++I) {
      Type *SrcTy = *I;
-    if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
+    if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
        Formula F = Base;
  
        if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
@@ -3560,8 +3542,8 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
            continue;
          Formula NewF = F;
          NewF.AM.BaseOffs = Offs;
-        if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
-                        LU.Kind, LU.AccessTy, TLI))
+        if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+                        NewF))
            continue;
          NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
  
@@ -3585,10 +3567,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
              continue;
            Formula NewF = F;
            NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
-          if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
-                          LU.Kind, LU.AccessTy, TLI)) {
-            if (!TLI ||
-                !TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
+          if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
+                          LU.Kind, LU.AccessTy, NewF)) {
+            if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
                continue;
              NewF = F;
              NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
@@ -3898,9 +3879,8 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
                bool Any = false;
                for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
                  Formula &F = LUThatHas->Formulae[i];
-                if (!isLegalUse(F.AM,
-                                LUThatHas->MinOffset, LUThatHas->MaxOffset,
-                                LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
+                if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
+                                LUThatHas->Kind, LUThatHas->AccessTy, F)) {
                    DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
                          dbgs() << '\n');
                    LUThatHas->DeleteFormula(F);
@@ -4589,13 +4569,11 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
    Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
  }
  
-LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
-  : IU(P->getAnalysis<IVUsers>()),
-    SE(P->getAnalysis<ScalarEvolution>()),
-    DT(P->getAnalysis<DominatorTree>()),
-    LI(P->getAnalysis<LoopInfo>()),
-    TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
-
+LSRInstance::LSRInstance(Loop *L, Pass *P)
+    : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
+      DT(P->getAnalysis<DominatorTree>()), LI(P->getAnalysis<LoopInfo>()),
+      TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false),
+      IVIncInsertPos(0) {
    // If LoopSimplify form is not available, stay out of trouble.
    if (!L->isLoopSimplifyForm())
      return;
@@ -4678,14 +4656,14 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
  
  #ifndef NDEBUG
    // Formulae should be legal.
-  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
-       E = Uses.end(); I != E; ++I) {
-     const LSRUse &LU = *I;
-     for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
-          JE = LU.Formulae.end(); J != JE; ++J)
-        assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset,
-                          LU.Kind, LU.AccessTy, TLI) &&
-               "Illegal formula generated!");
+  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end();
+       I != E; ++I) {
+    const LSRUse &LU = *I;
+    for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+                                                  JE = LU.Formulae.end();
+         J != JE; ++J)
+      assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+                        *J) && "Illegal formula generated!");
    };
  #endif
  
@@ -4757,13 +4735,9 @@ void LSRInstance::dump() const {
  namespace {
  
  class LoopStrengthReduce : public LoopPass {
-  /// TLI - Keep a pointer of a TargetLowering to consult for determining
-  /// transformation profitability.
-  const TargetLowering *const TLI;
-
  public:
    static char ID; // Pass ID, replacement for typeid
-  explicit LoopStrengthReduce(const TargetLowering *tli = 0);
+  LoopStrengthReduce();
  
  private:
    bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -4775,6 +4749,7 @@ private:
  char LoopStrengthReduce::ID = 0;
  INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
                  "Loop Strength Reduction", false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
  INITIALIZE_PASS_DEPENDENCY(DominatorTree)
  INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
  INITIALIZE_PASS_DEPENDENCY(IVUsers)
@@ -4784,14 +4759,13 @@ INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
                  "Loop Strength Reduction", false, false)
  
  
-Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
-  return new LoopStrengthReduce(TLI);
+Pass *llvm::createLoopStrengthReducePass() {
+  return new LoopStrengthReduce();
  }
  
-LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
-  : LoopPass(ID), TLI(tli) {
-    initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
-  }
+LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
+  initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
+}
  
  void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
    // We split critical edges, so we change the CFG.  However, we do update
@@ -4810,13 +4784,14 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
    AU.addRequiredID(LoopSimplifyID);
    AU.addRequired<IVUsers>();
    AU.addPreserved<IVUsers>();
+  AU.addRequired<TargetTransformInfo>();
  }
  
  bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
    bool Changed = false;
  
    // Run the main LSR transformation.
-  Changed |= LSRInstance(TLI, L, this).getChanged();
+  Changed |= LSRInstance(L, this).getChanged();
  
    // Remove any extra phis created by processing inner loops.
    Changed |= DeleteDeadPHIs(L->getHeader());
@@ -4826,8 +4801,10 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
  #ifndef NDEBUG
      Rewriter.setDebugType(DEBUG_TYPE);
  #endif
-    unsigned numFolded = Rewriter.
-      replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, TLI);
+    unsigned numFolded =
+        Rewriter.replaceCongruentIVs(L, &getAnalysis<DominatorTree>(),
+                                     DeadInsts,
+                                     &getAnalysis<TargetTransformInfo>());
      if (numFolded) {
        Changed = true;
        DeleteTriviallyDeadInstructions(DeadInsts);
diff --git a/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll b/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll

deleted file mode 100644 (file)

index c650d8c..0000000
--- a/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll
+++ /dev/null
@@ -1,99 +0,0 @@
-; RUN: opt < %s -loop-reduce -S | grep "phi double" | count 1
-
-define void @foobar(i32 %n) nounwind {
-entry:
-       icmp eq i32 %n, 0               ; <i1>:0 [#uses=2]
-       br i1 %0, label %return, label %bb.nph
-
-bb.nph:                ; preds = %entry
-       %umax = select i1 %0, i32 1, i32 %n             ; <i32> [#uses=1]
-       br label %bb
-
-bb:            ; preds = %bb, %bb.nph
-       %i.03 = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ]           ; <i32> [#uses=3]
-       tail call void @bar( i32 %i.03 ) nounwind
-       uitofp i32 %i.03 to double              ; <double>:1 [#uses=1]
-       tail call void @foo( double %1 ) nounwind
-       %indvar.next = add i32 %i.03, 1         ; <i32> [#uses=2]
-       %exitcond = icmp eq i32 %indvar.next, %umax             ; <i1> [#uses=1]
-       br i1 %exitcond, label %return, label %bb
-
-return:                ; preds = %bb, %entry
-       ret void
-}
-
-; Unable to eliminate cast because the mantissa bits for double are not enough
-; to hold all of i64 IV bits.
-define void @foobar2(i64 %n) nounwind {
-entry:
-       icmp eq i64 %n, 0               ; <i1>:0 [#uses=2]
-       br i1 %0, label %return, label %bb.nph
-
-bb.nph:                ; preds = %entry
-       %umax = select i1 %0, i64 1, i64 %n             ; <i64> [#uses=1]
-       br label %bb
-
-bb:            ; preds = %bb, %bb.nph
-       %i.03 = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ]           ; <i64> [#uses=3]
-       trunc i64 %i.03 to i32          ; <i32>:1 [#uses=1]
-       tail call void @bar( i32 %1 ) nounwind
-       uitofp i64 %i.03 to double              ; <double>:2 [#uses=1]
-       tail call void @foo( double %2 ) nounwind
-       %indvar.next = add i64 %i.03, 1         ; <i64> [#uses=2]
-       %exitcond = icmp eq i64 %indvar.next, %umax             ; <i1> [#uses=1]
-       br i1 %exitcond, label %return, label %bb
-
-return:                ; preds = %bb, %entry
-       ret void
-}
-
-; Unable to eliminate cast due to potentional overflow.
-define void @foobar3() nounwind {
-entry:
-       tail call i32 (...)* @nn( ) nounwind            ; <i32>:0 [#uses=1]
-       icmp eq i32 %0, 0               ; <i1>:1 [#uses=1]
-       br i1 %1, label %return, label %bb
-
-bb:            ; preds = %bb, %entry
-       %i.03 = phi i32 [ 0, %entry ], [ %3, %bb ]              ; <i32> [#uses=3]
-       tail call void @bar( i32 %i.03 ) nounwind
-       uitofp i32 %i.03 to double              ; <double>:2 [#uses=1]
-       tail call void @foo( double %2 ) nounwind
-       add i32 %i.03, 1                ; <i32>:3 [#uses=2]
-       tail call i32 (...)* @nn( ) nounwind            ; <i32>:4 [#uses=1]
-       icmp ugt i32 %4, %3             ; <i1>:5 [#uses=1]
-       br i1 %5, label %bb, label %return
-
-return:                ; preds = %bb, %entry
-       ret void
-}
-
-; Unable to eliminate cast due to overflow.
-define void @foobar4() nounwind {
-entry:
-       br label %bb.nph
-
-bb.nph:                ; preds = %entry
-       br label %bb
-
-bb:            ; preds = %bb, %bb.nph
-       %i.03 = phi i8 [ 0, %bb.nph ], [ %indvar.next, %bb ]            ; <i32> [#uses=3]
-       %tmp2 = sext i8 %i.03 to i32            ; <i32>:0 [#uses=1]
-       tail call void @bar( i32 %tmp2 ) nounwind
-       %tmp3 = uitofp i8 %i.03 to double               ; <double>:1 [#uses=1]
-       tail call void @foo( double %tmp3 ) nounwind
-       %indvar.next = add i8 %i.03, 1          ; <i32> [#uses=2]
-        %tmp = sext i8 %indvar.next to i32
-       %exitcond = icmp eq i32 %tmp, 32767             ; <i1> [#uses=1]
-       br i1 %exitcond, label %return, label %bb
-
-return:                ; preds = %bb, %entry
-       ret void
-}
-
-declare void @bar(i32)
-
-declare void @foo(double)
-
-declare i32 @nn(...)
-
diff --git a/test/Transforms/LoopStrengthReduce/2011-07-20-DoubleIV.ll b/test/Transforms/LoopStrengthReduce/2011-07-20-DoubleIV.ll

deleted file mode 100644 (file)

index 5d9ed64..0000000
--- a/test/Transforms/LoopStrengthReduce/2011-07-20-DoubleIV.ll
+++ /dev/null
@@ -1,43 +0,0 @@
-; RUN: opt < %s -loop-reduce -S | FileCheck %s
-;
-; Test LSR's OptimizeShadowIV. Handle a floating-point IV with a
-; nonzero initial value.
-; rdar://9786536
-
-; First, make sure LSR doesn't crash on an empty IVUsers list.
-; CHECK: @dummyIV
-; CHECK-NOT: phi
-; CHECK-NOT: sitofp
-; CHECK: br
-define void @dummyIV() nounwind {
-entry:
-  br label %loop
-
-loop:
-  %i.01 = phi i32 [ -39, %entry ], [ %inc, %loop ]
-  %conv = sitofp i32 %i.01 to double
-  %inc = add nsw i32 %i.01, 1
-  br i1 undef, label %loop, label %for.end
-
-for.end:
-  unreachable
-}
-
-; Now check that the computed double constant is correct.
-; CHECK: @doubleIV
-; CHECK: phi double [ -3.900000e+01, %entry ]
-; CHECK: br
-define void @doubleIV() nounwind {
-entry:
-  br label %loop
-
-loop:
-  %i.01 = phi i32 [ -39, %entry ], [ %inc, %loop ]
-  %conv = sitofp i32 %i.01 to double
-  %div = fdiv double %conv, 4.000000e+01
-  %inc = add nsw i32 %i.01, 1
-  br i1 undef, label %loop, label %for.end
-
-for.end:
-  unreachable
-}
diff --git a/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll b/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll

index 0aa278762072a54afdf82b2869243d026f646e2a..53da46271627c5ec064543fda5cba9bad9aa2483 100644 (file)
--- a/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
+++ b/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
@@ -5,16 +5,17 @@
  ; PR13361: LSR + SCEV "hangs" on reasonably sized test with sequence of loops
  ;
  ; Without limits on CollectSubexpr, we have thousands of formulae for
-; the use that crosses loops. With limits we have five.
+; the use that crosses loops. With limits we have six.
  ; CHECK: LSR on loop %bb221:
  ; CHECK: After generating reuse formulae:
  ; CHECK: LSR is examining the following uses:
  ; CHECK: LSR Use: Kind=Special
-; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
-; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
-; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
-; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
-; CHECK: {{.*reg\(\{\{\{\{\{\{\{\{\{}}
+; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
+; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
+; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
+; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
+; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
+; CHECK: {{.*reg\(\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{.*\{}}
  ; CHECK-NOT:reg
  ; CHECK: Filtering for use
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
diff --git a/test/Transforms/LoopStrengthReduce/X86/2008-08-14-ShadowIV.ll b/test/Transforms/LoopStrengthReduce/X86/2008-08-14-ShadowIV.ll

new file mode 100644 (file)

index 0000000..9a7f486
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/2008-08-14-ShadowIV.ll
@@ -0,0 +1,99 @@
+; RUN: opt < %s -loop-reduce -S -mtriple=x86_64-unknown-unknown | grep "phi double" | count 1
+
+define void @foobar(i32 %n) nounwind {
+entry:
+       icmp eq i32 %n, 0               ; <i1>:0 [#uses=2]
+       br i1 %0, label %return, label %bb.nph
+
+bb.nph:                ; preds = %entry
+       %umax = select i1 %0, i32 1, i32 %n             ; <i32> [#uses=1]
+       br label %bb
+
+bb:            ; preds = %bb, %bb.nph
+       %i.03 = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ]           ; <i32> [#uses=3]
+       tail call void @bar( i32 %i.03 ) nounwind
+       uitofp i32 %i.03 to double              ; <double>:1 [#uses=1]
+       tail call void @foo( double %1 ) nounwind
+       %indvar.next = add i32 %i.03, 1         ; <i32> [#uses=2]
+       %exitcond = icmp eq i32 %indvar.next, %umax             ; <i1> [#uses=1]
+       br i1 %exitcond, label %return, label %bb
+
+return:                ; preds = %bb, %entry
+       ret void
+}
+
+; Unable to eliminate cast because the mantissa bits for double are not enough
+; to hold all of i64 IV bits.
+define void @foobar2(i64 %n) nounwind {
+entry:
+       icmp eq i64 %n, 0               ; <i1>:0 [#uses=2]
+       br i1 %0, label %return, label %bb.nph
+
+bb.nph:                ; preds = %entry
+       %umax = select i1 %0, i64 1, i64 %n             ; <i64> [#uses=1]
+       br label %bb
+
+bb:            ; preds = %bb, %bb.nph
+       %i.03 = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ]           ; <i64> [#uses=3]
+       trunc i64 %i.03 to i32          ; <i32>:1 [#uses=1]
+       tail call void @bar( i32 %1 ) nounwind
+       uitofp i64 %i.03 to double              ; <double>:2 [#uses=1]
+       tail call void @foo( double %2 ) nounwind
+       %indvar.next = add i64 %i.03, 1         ; <i64> [#uses=2]
+       %exitcond = icmp eq i64 %indvar.next, %umax             ; <i1> [#uses=1]
+       br i1 %exitcond, label %return, label %bb
+
+return:                ; preds = %bb, %entry
+       ret void
+}
+
+; Unable to eliminate cast due to potentional overflow.
+define void @foobar3() nounwind {
+entry:
+       tail call i32 (...)* @nn( ) nounwind            ; <i32>:0 [#uses=1]
+       icmp eq i32 %0, 0               ; <i1>:1 [#uses=1]
+       br i1 %1, label %return, label %bb
+
+bb:            ; preds = %bb, %entry
+       %i.03 = phi i32 [ 0, %entry ], [ %3, %bb ]              ; <i32> [#uses=3]
+       tail call void @bar( i32 %i.03 ) nounwind
+       uitofp i32 %i.03 to double              ; <double>:2 [#uses=1]
+       tail call void @foo( double %2 ) nounwind
+       add i32 %i.03, 1                ; <i32>:3 [#uses=2]
+       tail call i32 (...)* @nn( ) nounwind            ; <i32>:4 [#uses=1]
+       icmp ugt i32 %4, %3             ; <i1>:5 [#uses=1]
+       br i1 %5, label %bb, label %return
+
+return:                ; preds = %bb, %entry
+       ret void
+}
+
+; Unable to eliminate cast due to overflow.
+define void @foobar4() nounwind {
+entry:
+       br label %bb.nph
+
+bb.nph:                ; preds = %entry
+       br label %bb
+
+bb:            ; preds = %bb, %bb.nph
+       %i.03 = phi i8 [ 0, %bb.nph ], [ %indvar.next, %bb ]            ; <i32> [#uses=3]
+       %tmp2 = sext i8 %i.03 to i32            ; <i32>:0 [#uses=1]
+       tail call void @bar( i32 %tmp2 ) nounwind
+       %tmp3 = uitofp i8 %i.03 to double               ; <double>:1 [#uses=1]
+       tail call void @foo( double %tmp3 ) nounwind
+       %indvar.next = add i8 %i.03, 1          ; <i32> [#uses=2]
+        %tmp = sext i8 %indvar.next to i32
+       %exitcond = icmp eq i32 %tmp, 32767             ; <i1> [#uses=1]
+       br i1 %exitcond, label %return, label %bb
+
+return:                ; preds = %bb, %entry
+       ret void
+}
+
+declare void @bar(i32)
+
+declare void @foo(double)
+
+declare i32 @nn(...)
+
diff --git a/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll b/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll

new file mode 100644 (file)

index 0000000..a932b47
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -loop-reduce -S -mtriple=x86_64-unknown-unknown | FileCheck %s
+;
+; Test LSR's OptimizeShadowIV. Handle a floating-point IV with a
+; nonzero initial value.
+; rdar://9786536
+
+; First, make sure LSR doesn't crash on an empty IVUsers list.
+; CHECK: @dummyIV
+; CHECK-NOT: phi
+; CHECK-NOT: sitofp
+; CHECK: br
+define void @dummyIV() nounwind {
+entry:
+  br label %loop
+
+loop:
+  %i.01 = phi i32 [ -39, %entry ], [ %inc, %loop ]
+  %conv = sitofp i32 %i.01 to double
+  %inc = add nsw i32 %i.01, 1
+  br i1 undef, label %loop, label %for.end
+
+for.end:
+  unreachable
+}
+
+; Now check that the computed double constant is correct.
+; CHECK: @doubleIV
+; CHECK: phi double [ -3.900000e+01, %entry ]
+; CHECK: br
+define void @doubleIV() nounwind {
+entry:
+  br label %loop
+
+loop:
+  %i.01 = phi i32 [ -39, %entry ], [ %inc, %loop ]
+  %conv = sitofp i32 %i.01 to double
+  %div = fdiv double %conv, 4.000000e+01
+  %inc = add nsw i32 %i.01, 1
+  br i1 undef, label %loop, label %for.end
+
+for.end:
+  unreachable
+}
diff --git a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll

index 96904c66e640a4ed7b7d658951e186cbd5ce3916..9e02d92a6f4b7a5df0d56743b81fa4abd463381e 100644 (file)
--- a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -4,12 +4,12 @@
  ; LSR should properly handle the post-inc offset when folding the
  ; non-IV operand of an icmp into the IV.
  
-; CHECK:   %4 = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
-; CHECK:   %5 = lshr i64 %4, 1
-; CHECK:   %6 = mul i64 %5, 2
+; CHECK:   %3 = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+; CHECK:   %4 = lshr i64 %3, 1
+; CHECK:   %5 = mul i64 %4, 2
  ; CHECK:   br label %for.body
  ; CHECK: for.body:
-; CHECK:   %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ %6, %for.body.lr.ph ]
+; CHECK:   %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ %5, %for.body.lr.ph ]
  ; CHECK:   %lsr.iv.next = add i64 %lsr.iv2, -2
  ; CHECK:   %lsr.iv.next3 = inttoptr i64 %lsr.iv.next to i16*
  ; CHECK:   %cmp27 = icmp eq i16* %lsr.iv.next3, null
author	Chandler Carruth <chandlerc@gmail.com>
	Mon, 7 Jan 2013 14:41:08 +0000 (14:41 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Mon, 7 Jan 2013 14:41:08 +0000 (14:41 +0000)
include/llvm/Analysis/ScalarEvolutionExpander.h		patch \| blob \| history
include/llvm/Transforms/Scalar.h		patch \| blob \| history
lib/Analysis/ScalarEvolutionExpander.cpp		patch \| blob \| history
lib/Analysis/TargetTransformInfo.cpp		patch \| blob \| history
lib/CodeGen/Passes.cpp		patch \| blob \| history
lib/Transforms/Scalar/LoopStrengthReduce.cpp		patch \| blob \| history
test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll	[deleted file]	patch \| blob \| history
test/Transforms/LoopStrengthReduce/2011-07-20-DoubleIV.ll	[deleted file]	patch \| blob \| history
test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll		patch \| blob \| history
test/Transforms/LoopStrengthReduce/X86/2008-08-14-ShadowIV.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/LoopStrengthReduce/X86/2011-07-20-DoubleIV.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll		patch \| blob \| history