LoopVectorize: Simplify code. No functionality change.

[oota-llvm.git] / lib / Transforms / Vectorize / LoopVectorize.cpp
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp

index e0d8e939df1d1035e41af332815dd750b9c656ec..293c6262ddb40662140acb8578a6a3f0714663d1 100644 (file)
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -55,7 +55,9 @@
  #include "llvm/ADT/StringExtras.h"
  #include "llvm/Analysis/AliasAnalysis.h"
  #include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/AssumptionTracker.h"
  #include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/CodeMetrics.h"
  #include "llvm/Analysis/LoopInfo.h"
  #include "llvm/Analysis/LoopIterator.h"
  #include "llvm/Analysis/LoopPass.h"
@@ -108,8 +110,8 @@ VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
                      cl::desc("Sets the SIMD width. Zero is autoselect."));
  
  static cl::opt<unsigned>
-VectorizationUnroll("force-vector-unroll", cl::init(0), cl::Hidden,
-                    cl::desc("Sets the vectorization unroll count. "
+VectorizationInterleave("force-vector-interleave", cl::init(0), cl::Hidden,
+                    cl::desc("Sets the vectorization interleave count. "
                               "Zero is autoselect."));
  
  static cl::opt<bool>
@@ -157,17 +159,17 @@ static cl::opt<unsigned> ForceTargetNumVectorRegs(
      "force-target-num-vector-regs", cl::init(0), cl::Hidden,
      cl::desc("A flag that overrides the target's number of vector registers."));
  
-/// Maximum vectorization unroll count.
-static const unsigned MaxUnrollFactor = 16;
+/// Maximum vectorization interleave count.
+static const unsigned MaxInterleaveFactor = 16;
  
-static cl::opt<unsigned> ForceTargetMaxScalarUnrollFactor(
-    "force-target-max-scalar-unroll", cl::init(0), cl::Hidden,
-    cl::desc("A flag that overrides the target's max unroll factor for scalar "
-             "loops."));
+static cl::opt<unsigned> ForceTargetMaxScalarInterleaveFactor(
+    "force-target-max-scalar-interleave", cl::init(0), cl::Hidden,
+    cl::desc("A flag that overrides the target's max interleave factor for "
+             "scalar loops."));
  
-static cl::opt<unsigned> ForceTargetMaxVectorUnrollFactor(
-    "force-target-max-vector-unroll", cl::init(0), cl::Hidden,
-    cl::desc("A flag that overrides the target's max unroll factor for "
+static cl::opt<unsigned> ForceTargetMaxVectorInterleaveFactor(
+    "force-target-max-vector-interleave", cl::init(0), cl::Hidden,
+    cl::desc("A flag that overrides the target's max interleave factor for "
               "vectorized loops."));
  
  static cl::opt<unsigned> ForceTargetInstructionCost(
@@ -204,6 +206,11 @@ static cl::opt<bool> EnableCondStoresVectorization(
      "enable-cond-stores-vec", cl::init(false), cl::Hidden,
      cl::desc("Enable if predication of stores during vectorization."));
  
+static cl::opt<unsigned> MaxNestedScalarReductionUF(
+    "max-nested-scalar-reduction-unroll", cl::init(2), cl::Hidden,
+    cl::desc("The maximum unroll factor to use when unrolling a scalar "
+             "reduction in a nested loop."));
+
  namespace {
  
  // Forward declarations.
@@ -783,7 +790,7 @@ private:
    /// Return true if all of the instructions in the block can be speculatively
    /// executed. \p SafePtrs is a list of addresses that are known to be legal
    /// and we know that we can read from them without segfault.
-  bool blockCanBePredicated(BasicBlock *BB, SmallPtrSet<Value *, 8>& SafePtrs);
+  bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
  
    /// Returns True, if 'Phi' is the kind of reduction variable for type
    /// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
@@ -879,8 +886,12 @@ public:
                               LoopVectorizationLegality *Legal,
                               const TargetTransformInfo &TTI,
                               const DataLayout *DL, const TargetLibraryInfo *TLI,
-                             const Function *F, const LoopVectorizeHints *Hints)
-      : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI), TheFunction(F), Hints(Hints) {}
+                             AssumptionTracker *AT, const Function *F,
+                             const LoopVectorizeHints *Hints)
+      : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI),
+        TheFunction(F), Hints(Hints) {
+    CodeMetrics::collectEphemeralValues(L, AT, EphValues);
+  }
  
    /// Information about vectorization costs
    struct VectorizationFactor {
@@ -949,6 +960,9 @@ private:
                                     *TheFunction, DL, Message.str());
    }
  
+  /// Values used only by @llvm.assume calls.
+  SmallPtrSet<const Value *, 32> EphValues;
+
    /// The loop that we evaluate.
    Loop *TheLoop;
    /// Scev analysis.
@@ -998,7 +1012,7 @@ class LoopVectorizeHints {
        case HK_WIDTH:
          return isPowerOf2_32(Val) && Val <= MaxVectorWidth;
        case HK_UNROLL:
-        return isPowerOf2_32(Val) && Val <= MaxUnrollFactor;
+        return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
        case HK_FORCE:
          return (Val <= 1);
        }
@@ -1008,12 +1022,10 @@ class LoopVectorizeHints {
  
    /// Vectorization width.
    Hint Width;
-  /// Vectorization unroll factor.
-  Hint Unroll;
+  /// Vectorization interleave factor.
+  Hint Interleave;
    /// Vectorization forced
    Hint Force;
-  /// Array to help iterating through all hints.
-  Hint *Hints[3] = { &Width, &Unroll, &Force };
  
    /// Return the loop metadata prefix.
    static StringRef Prefix() { return "llvm.loop."; }
@@ -1025,26 +1037,27 @@ public:
      FK_Enabled = 1,    ///< Forcing enabled.
    };
  
-  LoopVectorizeHints(const Loop *L, bool DisableUnrolling)
+  LoopVectorizeHints(const Loop *L, bool DisableInterleaving)
        : Width("vectorize.width", VectorizationFactor, HK_WIDTH),
-        Unroll("interleave.count", DisableUnrolling, HK_UNROLL),
+        Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
          Force("vectorize.enable", FK_Undefined, HK_FORCE),
          TheLoop(L) {
      // Populate values with existing loop metadata.
      getHintsFromMetadata();
  
-    // force-vector-unroll overrides DisableUnrolling.
-    if (VectorizationUnroll.getNumOccurrences() > 0)
-      Unroll.Value = VectorizationUnroll;
+    // force-vector-interleave overrides DisableInterleaving.
+    if (VectorizationInterleave.getNumOccurrences() > 0)
+      Interleave.Value = VectorizationInterleave;
  
-    DEBUG(if (DisableUnrolling && Unroll.Value == 1) dbgs()
-          << "LV: Unrolling disabled by the pass manager\n");
+    DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs()
+          << "LV: Interleaving disabled by the pass manager\n");
    }
  
    /// Mark the loop L as already vectorized by setting the width to 1.
    void setAlreadyVectorized() {
-    Width.Value = Unroll.Value = 1;
-    writeHintsToMetadata({ Width, Unroll });
+    Width.Value = Interleave.Value = 1;
+    Hint Hints[] = {Width, Interleave};
+    writeHintsToMetadata(Hints);
    }
  
    /// Dumps all the hint information.
@@ -1058,8 +1071,8 @@ public:
          R << " (Force=true";
          if (Width.Value != 0)
            R << ", Vector Width=" << Width.Value;
-        if (Unroll.Value != 0)
-          R << ", Interleave Count=" << Unroll.Value;
+        if (Interleave.Value != 0)
+          R << ", Interleave Count=" << Interleave.Value;
          R << ")";
        }
      }
@@ -1068,7 +1081,7 @@ public:
    }
  
    unsigned getWidth() const { return Width.Value; }
-  unsigned getUnroll() const { return Unroll.Value; }
+  unsigned getInterleave() const { return Interleave.Value; }
    enum ForceKind getForce() const { return (ForceKind)Force.Value; }
  
  private:
@@ -1119,6 +1132,7 @@ private:
      if (!C) return;
      unsigned Val = C->getZExtValue();
  
+    Hint *Hints[] = {&Width, &Interleave, &Force};
      for (auto H : Hints) {
        if (Name == H->Name) {
          if (H->validate(Val))
@@ -1133,14 +1147,13 @@ private:
    /// Create a new hint from name / value pair.
    MDNode *createHintMetadata(StringRef Name, unsigned V) const {
      LLVMContext &Context = TheLoop->getHeader()->getContext();
-    SmallVector<Value*, 2> Vals;
-    Vals.push_back(MDString::get(Context, Name));
-    Vals.push_back(ConstantInt::get(Type::getInt32Ty(Context), V));
+    Value *Vals[] = {MDString::get(Context, Name),
+                     ConstantInt::get(Type::getInt32Ty(Context), V)};
      return MDNode::get(Context, Vals);
    }
  
    /// Matches metadata with hint name.
-  bool matchesHintMetadataName(MDNode *Node, std::vector<Hint> &HintTypes) {
+  bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes) {
      MDString* Name = dyn_cast<MDString>(Node->getOperand(0));
      if (!Name)
        return false;
@@ -1152,7 +1165,7 @@ private:
    }
  
    /// Sets current hints into loop metadata, keeping other values intact.
-  void writeHintsToMetadata(std::vector<Hint> HintTypes) {
+  void writeHintsToMetadata(ArrayRef<Hint> HintTypes) {
      if (HintTypes.size() == 0)
        return;
  
@@ -1200,7 +1213,7 @@ static void emitMissedWarning(Function *F, Loop *L,
        emitLoopVectorizeWarning(
            F->getContext(), *F, L->getStartLoc(),
            "failed explicitly specified loop vectorization");
-    else if (LH.getUnroll() != 1)
+    else if (LH.getInterleave() != 1)
        emitLoopInterleaveWarning(
            F->getContext(), *F, L->getStartLoc(),
            "failed explicitly specified loop interleaving");
@@ -1235,6 +1248,7 @@ struct LoopVectorize : public FunctionPass {
    BlockFrequencyInfo *BFI;
    TargetLibraryInfo *TLI;
    AliasAnalysis *AA;
+  AssumptionTracker *AT;
    bool DisableUnrolling;
    bool AlwaysVectorize;
  
@@ -1250,6 +1264,7 @@ struct LoopVectorize : public FunctionPass {
      BFI = &getAnalysis<BlockFrequencyInfo>();
      TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
      AA = &getAnalysis<AliasAnalysis>();
+    AT = &getAnalysis<AssumptionTracker>();
  
      // Compute some weights outside of the loop over the loops. Compute this
      // using a BranchProbability to re-use its scaling math.
@@ -1306,7 +1321,7 @@ struct LoopVectorize : public FunctionPass {
                           : (Hints.getForce() == LoopVectorizeHints::FK_Enabled
                                  ? "enabled"
                                  : "?")) << " width=" << Hints.getWidth()
-                 << " unroll=" << Hints.getUnroll() << "\n");
+                 << " unroll=" << Hints.getInterleave() << "\n");
  
      // Function containing loop
      Function *F = L->getHeader()->getParent();
@@ -1333,7 +1348,7 @@ struct LoopVectorize : public FunctionPass {
        return false;
      }
  
-    if (Hints.getWidth() == 1 && Hints.getUnroll() == 1) {
+    if (Hints.getWidth() == 1 && Hints.getInterleave() == 1) {
        DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
        emitOptimizationRemarkAnalysis(
            F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
@@ -1344,8 +1359,7 @@ struct LoopVectorize : public FunctionPass {
  
      // Check the loop for a trip count threshold:
      // do not vectorize loops with a tiny trip count.
-    BasicBlock *Latch = L->getLoopLatch();
-    const unsigned TC = SE->getSmallConstantTripCount(L, Latch);
+    const unsigned TC = SE->getSmallConstantTripCount(L);
      if (TC > 0u && TC < TinyTripCountVectorThreshold) {
        DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
                     << "This loop is not worth vectorizing.");
@@ -1369,7 +1383,8 @@ struct LoopVectorize : public FunctionPass {
      }
  
      // Use the cost model.
-    LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI, F, &Hints);
+    LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI, AT, F,
+                                  &Hints);
  
      // Check the function attributes to find out if this function should be
      // optimized for size.
@@ -1456,6 +1471,7 @@ struct LoopVectorize : public FunctionPass {
    }
  
    void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AssumptionTracker>();
      AU.addRequiredID(LoopSimplifyID);
      AU.addRequiredID(LCSSAID);
      AU.addRequired<BlockFrequencyInfo>();
@@ -3232,19 +3248,9 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
        for (unsigned Part = 0; Part < UF; ++Part) {
          Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A[Part], B[Part]);
  
-        // Update the NSW, NUW and Exact flags. Notice: V can be an Undef.
-        BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V);
-        if (VecOp && isa<OverflowingBinaryOperator>(BinOp)) {
-          VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap());
-          VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap());
-        }
-        if (VecOp && isa<PossiblyExactOperator>(VecOp))
-          VecOp->setIsExact(BinOp->isExact());
-
-        // Copy the fast-math flags.
-        if (VecOp && isa<FPMathOperator>(V))
-          VecOp->setFastMathFlags(it->getFastMathFlags());
-
+        if (BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V))
+          VecOp->copyIRFlags(BinOp);
+        
          Entry[Part] = V;
        }
  
@@ -3356,6 +3362,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
        Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
        assert(ID && "Not an intrinsic call!");
        switch (ID) {
+      case Intrinsic::assume:
        case Intrinsic::lifetime_end:
        case Intrinsic::lifetime_start:
          scalarizeInstruction(it);
@@ -3597,7 +3604,7 @@ static Type* getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
  /// \brief Check that the instruction has outside loop users and is not an
  /// identified reduction variable.
  static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
-                               SmallPtrSet<Value *, 4> &Reductions) {
+                               SmallPtrSetImpl<Value *> &Reductions) {
    // Reduction instructions are allowed to have exit users. All other
    // instructions must not have external users.
    if (!Reductions.count(Inst))
@@ -4622,7 +4629,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
  
    // Bail out early if passed-in parameters make vectorization not feasible.
    unsigned ForcedFactor = VectorizationFactor ? VectorizationFactor : 1;
-  unsigned ForcedUnroll = VectorizationUnroll ? VectorizationUnroll : 1;
+  unsigned ForcedUnroll = VectorizationInterleave ? VectorizationInterleave : 1;
  
    // The distance must be bigger than the size needed for a vectorized version
    // of the operation and the size of the vectorized operation must not be
@@ -4940,7 +4947,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
  }
  
  static bool hasMultipleUsesOf(Instruction *I,
-                              SmallPtrSet<Instruction *, 8> &Insts) {
+                              SmallPtrSetImpl<Instruction *> &Insts) {
    unsigned NumUses = 0;
    for(User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) {
      if (Insts.count(dyn_cast<Instruction>(*Use)))
@@ -4952,7 +4959,7 @@ static bool hasMultipleUsesOf(Instruction *I,
    return false;
  }
  
-static bool areAllUsesIn(Instruction *I, SmallPtrSet<Instruction *, 8> &Set) {
+static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl<Instruction *> &Set) {
    for(User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
      if (!Set.count(dyn_cast<Instruction>(*Use)))
        return false;
@@ -5285,7 +5292,7 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB)  {
  }
  
  bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
-                                            SmallPtrSet<Value *, 8>& SafePtrs) {
+                                           SmallPtrSetImpl<Value *> &SafePtrs) {
    for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
      // We might be able to hoist the load.
      if (it->mayReadFromMemory()) {
@@ -5346,7 +5353,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
    }
  
    // Find the trip count.
-  unsigned TC = SE->getSmallConstantTripCount(TheLoop, TheLoop->getLoopLatch());
+  unsigned TC = SE->getSmallConstantTripCount(TheLoop);
    DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
  
    unsigned WidestType = getWidestType();
@@ -5452,6 +5459,10 @@ unsigned LoopVectorizationCostModel::getWidestType() {
      for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
        Type *T = it->getType();
  
+      // Ignore ephemeral values.
+      if (EphValues.count(it))
+        continue;
+
        // Only examine Loads, Stores and PHINodes.
        if (!isa<LoadInst>(it) && !isa<StoreInst>(it) && !isa<PHINode>(it))
          continue;
@@ -5487,23 +5498,23 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
    // -- The unroll heuristics --
    // We unroll the loop in order to expose ILP and reduce the loop overhead.
    // There are many micro-architectural considerations that we can't predict
-  // at this level. For example frontend pressure (on decode or fetch) due to
+  // at this level. For example, frontend pressure (on decode or fetch) due to
    // code size, or the number and capabilities of the execution ports.
    //
    // We use the following heuristics to select the unroll factor:
-  // 1. If the code has reductions the we unroll in order to break the cross
+  // 1. If the code has reductions, then we unroll in order to break the cross
    // iteration dependency.
-  // 2. If the loop is really small then we unroll in order to reduce the loop
+  // 2. If the loop is really small, then we unroll in order to reduce the loop
    // overhead.
    // 3. We don't unroll if we think that we will spill registers to memory due
    // to the increased register pressure.
  
    // Use the user preference, unless 'auto' is selected.
-  int UserUF = Hints->getUnroll();
+  int UserUF = Hints->getInterleave();
    if (UserUF != 0)
      return UserUF;
  
-  // When we optimize for size we don't unroll.
+  // When we optimize for size, we don't unroll.
    if (OptForSize)
      return 1;
  
@@ -5512,8 +5523,7 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
      return 1;
  
    // Do not unroll loops with a relatively small trip count.
-  unsigned TC = SE->getSmallConstantTripCount(TheLoop,
-                                              TheLoop->getLoopLatch());
+  unsigned TC = SE->getSmallConstantTripCount(TheLoop);
    if (TC > 1 && TC < TinyTripCountUnrollThreshold)
      return 1;
  
@@ -5552,15 +5562,15 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
                         std::max(1U, (R.MaxLocalUsers - 1)));
  
    // Clamp the unroll factor ranges to reasonable factors.
-  unsigned MaxUnrollSize = TTI.getMaximumUnrollFactor();
+  unsigned MaxInterleaveSize = TTI.getMaxInterleaveFactor();
  
    // Check if the user has overridden the unroll max.
    if (VF == 1) {
-    if (ForceTargetMaxScalarUnrollFactor.getNumOccurrences() > 0)
-      MaxUnrollSize = ForceTargetMaxScalarUnrollFactor;
+    if (ForceTargetMaxScalarInterleaveFactor.getNumOccurrences() > 0)
+      MaxInterleaveSize = ForceTargetMaxScalarInterleaveFactor;
    } else {
-    if (ForceTargetMaxVectorUnrollFactor.getNumOccurrences() > 0)
-      MaxUnrollSize = ForceTargetMaxVectorUnrollFactor;
+    if (ForceTargetMaxVectorInterleaveFactor.getNumOccurrences() > 0)
+      MaxInterleaveSize = ForceTargetMaxVectorInterleaveFactor;
    }
  
    // If we did not calculate the cost for VF (because the user selected the VF)
@@ -5570,8 +5580,8 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
  
    // Clamp the calculated UF to be between the 1 and the max unroll factor
    // that the target allows.
-  if (UF > MaxUnrollSize)
-    UF = MaxUnrollSize;
+  if (UF > MaxInterleaveSize)
+    UF = MaxInterleaveSize;
    else if (UF < 1)
      UF = 1;
  
@@ -5602,6 +5612,18 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
      unsigned StoresUF = UF / (Legal->NumStores ? Legal->NumStores : 1);
      unsigned LoadsUF = UF /  (Legal->NumLoads ? Legal->NumLoads : 1);
  
+    // If we have a scalar reduction (vector reductions are already dealt with
+    // by this point), we can increase the critical path length if the loop
+    // we're unrolling is inside another loop. Limit, by default to 2, so the
+    // critical path only gets increased by one reduction operation.
+    if (Legal->getReductionVars()->size() &&
+        TheLoop->getLoopDepth() > 1) {
+      unsigned F = static_cast<unsigned>(MaxNestedScalarReductionUF);
+      SmallUF = std::min(SmallUF, F);
+      StoresUF = std::min(StoresUF, F);
+      LoadsUF = std::min(LoadsUF, F);
+    }
+
      if (EnableLoadStoreRuntimeUnroll && std::max(StoresUF, LoadsUF) > SmallUF) {
        DEBUG(dbgs() << "LV: Unrolling to saturate store or load ports.\n");
        return std::max(StoresUF, LoadsUF);
@@ -5703,6 +5725,10 @@ LoopVectorizationCostModel::calculateRegisterUsage() {
      // Ignore instructions that are never used within the loop.
      if (!Ends.count(I)) continue;
  
+    // Ignore ephemeral values.
+    if (EphValues.count(I))
+      continue;
+
      // Remove all of the instructions that end at this location.
      InstrList &List = TransposeEnds[i];
      for (unsigned int j=0, e = List.size(); j < e; ++j)
@@ -5743,6 +5769,10 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
        if (isa<DbgInfoIntrinsic>(it))
          continue;
  
+      // Ignore ephemeral values.
+      if (EphValues.count(it))
+        continue;
+
        unsigned C = getInstructionCost(it, VF);
  
        // Check if we should override the cost.
@@ -5876,18 +5906,31 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
        TargetTransformInfo::OK_AnyValue;
      TargetTransformInfo::OperandValueKind Op2VK =
        TargetTransformInfo::OK_AnyValue;
+    TargetTransformInfo::OperandValueProperties Op1VP =
+        TargetTransformInfo::OP_None;
+    TargetTransformInfo::OperandValueProperties Op2VP =
+        TargetTransformInfo::OP_None;
      Value *Op2 = I->getOperand(1);
  
      // Check for a splat of a constant or for a non uniform vector of constants.
-    if (isa<ConstantInt>(Op2))
+    if (isa<ConstantInt>(Op2)) {
+      ConstantInt *CInt = cast<ConstantInt>(Op2);
+      if (CInt && CInt->getValue().isPowerOf2())
+        Op2VP = TargetTransformInfo::OP_PowerOf2;
        Op2VK = TargetTransformInfo::OK_UniformConstantValue;
-    else if (isa<ConstantVector>(Op2) || isa<ConstantDataVector>(Op2)) {
+    } else if (isa<ConstantVector>(Op2) || isa<ConstantDataVector>(Op2)) {
        Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
-      if (cast<Constant>(Op2)->getSplatValue() != nullptr)
+      Constant *SplatValue = cast<Constant>(Op2)->getSplatValue();
+      if (SplatValue) {
+        ConstantInt *CInt = dyn_cast<ConstantInt>(SplatValue);
+        if (CInt && CInt->getValue().isPowerOf2())
+          Op2VP = TargetTransformInfo::OP_PowerOf2;
          Op2VK = TargetTransformInfo::OK_UniformConstantValue;
+      }
      }
  
-    return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, Op2VK);
+    return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, Op2VK,
+                                      Op1VP, Op2VP);
    }
    case Instruction::Select: {
      SelectInst *SI = cast<SelectInst>(I);
@@ -6030,6 +6073,7 @@ static const char lv_name[] = "Loop Vectorization";
  INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
  INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
  INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
  INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfo)
  INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)