[opaque pointer type] API migration for GEP constant factories

[oota-llvm.git] / lib / Transforms / Scalar / LoopUnrollPass.cpp
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp

index d1daaa684ad1954d5a08652e77246c72d2c27651..600cbde7b4219a06b3422e14e495c63584e65195 100644 (file)
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -16,6 +16,7 @@
  #include "llvm/ADT/SetVector.h"
  #include "llvm/Analysis/AssumptionCache.h"
  #include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/InstructionSimplify.h"
  #include "llvm/Analysis/LoopPass.h"
  #include "llvm/Analysis/ScalarEvolution.h"
  #include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -23,14 +24,13 @@
  #include "llvm/IR/DataLayout.h"
  #include "llvm/IR/DiagnosticInfo.h"
  #include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstVisitor.h"
  #include "llvm/IR/IntrinsicInst.h"
  #include "llvm/IR/Metadata.h"
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/raw_ostream.h"
  #include "llvm/Transforms/Utils/UnrollLoop.h"
-#include "llvm/IR/InstVisitor.h"
-#include "llvm/Analysis/InstructionSimplify.h"
  #include <climits>
  
  using namespace llvm;
@@ -42,7 +42,7 @@ UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
    cl::desc("The cut-off point for automatic loop unrolling"));
  
  static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(
-    "unroll-max-iteration-count-to-analyze", cl::init(1000), cl::Hidden,
+    "unroll-max-iteration-count-to-analyze", cl::init(0), cl::Hidden,
      cl::desc("Don't allow loop unrolling to simulate more than this number of"
               "iterations when checking full unroll profitability"));
  
@@ -213,9 +213,8 @@ namespace {
  
        PartialThreshold = UserThreshold ? CurrentThreshold : UP.PartialThreshold;
        if (!UserThreshold &&
-          L->getHeader()->getParent()->getAttributes().
-              hasAttribute(AttributeSet::FunctionIndex,
-                           Attribute::OptimizeForSize)) {
+          L->getHeader()->getParent()->hasFnAttribute(
+              Attribute::OptimizeForSize)) {
          Threshold = UP.OptSizeThreshold;
          PartialThreshold = UP.PartialOptSizeThreshold;
        }
@@ -260,6 +259,7 @@ static bool isLoadFromConstantInitializer(Value *V) {
    return false;
  }
  
+namespace {
  struct FindConstantPointers {
    bool LoadCanBeConstantFolded;
    bool IndexIsConstant;
@@ -357,11 +357,12 @@ class UnrollAnalyzer : public InstVisitor<UnrollAnalyzer, bool> {
        if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
          RHS = SimpleRHS;
      Value *SimpleV = nullptr;
+    const DataLayout &DL = I.getModule()->getDataLayout();
      if (auto FI = dyn_cast<FPMathOperator>(&I))
        SimpleV =
-          SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags());
+          SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
      else
-      SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS);
+      SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
  
      if (SimpleV && CountedInstructions.insert(&I).second)
        NumberOfOptimizedInstructions += TTI.getUserCost(&I);
@@ -453,10 +454,17 @@ public:
  
    // Given a list of loads that could be constant-folded (LoadBaseAddresses),
    // estimate number of optimized instructions after substituting the concrete
-  // values for the given Iteration.
-  // Fill in SimplifiedValues map for future use in DCE-estimation.
-  unsigned estimateNumberOfSimplifiedInstructions(unsigned Iteration) {
-    SmallVector<Instruction *, 8> Worklist;
+  // values for the given Iteration. Also track how many instructions become
+  // dead through this process.
+  unsigned estimateNumberOfOptimizedInstructions(unsigned Iteration) {
+    // We keep a set vector for the worklist so that we don't wast space in the
+    // worklist queuing up the same instruction repeatedly. This can happen due
+    // to multiple operands being the same instruction or due to the same
+    // instruction being an operand of lots of things that end up dead or
+    // simplified.
+    SmallSetVector<Instruction *, 8> Worklist;
+
+    // Clear the simplified values and counts for this iteration.
      SimplifiedValues.clear();
      CountedInstructions.clear();
      NumberOfOptimizedInstructions = 0;
@@ -468,14 +476,8 @@ public:
        if (CountedInstructions.insert(LI).second)
          NumberOfOptimizedInstructions += TTI.getUserCost(LI);
  
-      for (User *U : LI->users()) {
-        Instruction *UI = dyn_cast<Instruction>(U);
-        if (!UI)
-          continue;
-        if (!L->contains(UI))
-          continue;
-        Worklist.push_back(UI);
-      }
+      for (User *U : LI->users())
+        Worklist.insert(cast<Instruction>(U));
      }
  
      // And then we try to simplify every user of every instruction from the
@@ -483,31 +485,17 @@ public:
      // its users as well.
      while (!Worklist.empty()) {
        Instruction *I = Worklist.pop_back_val();
+      if (!L->contains(I))
+        continue;
        if (!visit(I))
          continue;
-      for (User *U : I->users()) {
-        Instruction *UI = dyn_cast<Instruction>(U);
-        if (!UI)
-          continue;
-        if (!L->contains(UI))
-          continue;
-        Worklist.push_back(UI);
-      }
+      for (User *U : I->users())
+        Worklist.insert(cast<Instruction>(U));
      }
-    return NumberOfOptimizedInstructions;
-  }
  
-  // Given a list of potentially simplifed instructions, estimate number of
-  // instructions that would become dead if we do perform the simplification.
-  unsigned estimateNumberOfDeadInstructions() {
-    NumberOfOptimizedInstructions = 0;
-
-    // We keep a set vector for the worklist so that we don't wast space in the
-    // worklist queuing up the same instruction repeatedly. This can happen due
-    // to multiple operands being the same instruction or due to the same
-    // instruction being an operand of lots of things that end up dead or
-    // simplified.
-    SmallSetVector<Instruction *, 8> Worklist;
+    // Now that we know the potentially simplifed instructions, estimate number
+    // of instructions that would become dead if we do perform the
+    // simplification.
  
      // The dead instructions are held in a separate set. This is used to
      // prevent us from re-examining instructions and make sure we only count
@@ -519,7 +507,8 @@ public:
      auto EnqueueOperands = [&](Instruction &I) {
        for (auto *Op : I.operand_values())
          if (auto *OpI = dyn_cast<Instruction>(Op))
-          Worklist.insert(OpI);
+          if (!OpI->use_empty())
+            Worklist.insert(OpI);
      };
  
      // Start by initializing worklist with simplified instructions.
@@ -541,17 +530,10 @@ public:
          continue;
        if (DeadInstructions.count(I))
          continue;
-      if (I->getNumUses() == 0)
-        continue;
-      bool AllUsersFolded = true;
-      for (User *U : I->users()) {
-        Instruction *UI = dyn_cast<Instruction>(U);
-        if (!SimplifiedValues[UI] && !DeadInstructions.count(UI)) {
-          AllUsersFolded = false;
-          break;
-        }
-      }
-      if (AllUsersFolded) {
+
+      if (std::all_of(I->user_begin(), I->user_end(), [&](User *U) {
+            return DeadInstructions.count(cast<Instruction>(U));
+          })) {
          NumberOfOptimizedInstructions += TTI.getUserCost(I);
          DeadInstructions.insert(I);
          EnqueueOperands(*I);
@@ -560,6 +542,7 @@ public:
      return NumberOfOptimizedInstructions;
    }
  };
+} // namespace
  
  // Complete loop unrolling can make some loads constant, and we need to know if
  // that would expose any further optimization opportunities.
@@ -583,11 +566,10 @@ approximateNumberOfOptimizedInstructions(const Loop *L, ScalarEvolution &SE,
    unsigned IterationsNumberForEstimate =
        std::min<unsigned>(UnrollMaxIterationsCountToAnalyze, TripCount);
    unsigned NumberOfOptimizedInstructions = 0;
-  for (unsigned i = 0; i < IterationsNumberForEstimate; ++i) {
+  for (unsigned i = 0; i < IterationsNumberForEstimate; ++i)
      NumberOfOptimizedInstructions +=
-        UA.estimateNumberOfSimplifiedInstructions(i);
-    NumberOfOptimizedInstructions += UA.estimateNumberOfDeadInstructions();
-  }
+        UA.estimateNumberOfOptimizedInstructions(i);
+
    NumberOfOptimizedInstructions *= TripCount / IterationsNumberForEstimate;
  
    return NumberOfOptimizedInstructions;
@@ -640,6 +622,11 @@ static bool HasUnrollDisablePragma(const Loop *L) {
    return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable");
  }
  
+// Returns true if the loop has an runtime unroll(disable) pragma.
+static bool HasRuntimeUnrollDisablePragma(const Loop *L) {
+  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
+}
+
  // If loop has an unroll_count pragma return the (necessarily
  // positive) value from the pragma.  Otherwise return 0.
  static unsigned UnrollCountPragmaValue(const Loop *L) {
@@ -828,6 +815,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
    // Reduce count based on the type of unrolling and the threshold values.
    unsigned OriginalCount = Count;
    bool AllowRuntime = UserRuntime ? CurrentRuntime : UP.Runtime;
+  if (HasRuntimeUnrollDisablePragma(L)) {
+    AllowRuntime = false;
+  }
    if (Unrolling == Partial) {
      bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
      if (!AllowPartial && !CountSetExplicitly) {