#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
-#include "llvm/IR/InstVisitor.h"
-#include "llvm/Analysis/InstructionSimplify.h"
#include <climits>
using namespace llvm;
cl::desc("The cut-off point for automatic loop unrolling"));
static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(
- "unroll-max-iteration-count-to-analyze", cl::init(1000), cl::Hidden,
+ "unroll-max-iteration-count-to-analyze", cl::init(0), cl::Hidden,
cl::desc("Don't allow loop unrolling to simulate more than this number of"
"iterations when checking full unroll profitability"));
PartialThreshold = UserThreshold ? CurrentThreshold : UP.PartialThreshold;
if (!UserThreshold &&
- L->getHeader()->getParent()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize)) {
+ L->getHeader()->getParent()->hasFnAttribute(
+ Attribute::OptimizeForSize)) {
Threshold = UP.OptSizeThreshold;
PartialThreshold = UP.PartialOptSizeThreshold;
}
return false;
}
+namespace {
struct FindConstantPointers {
bool LoadCanBeConstantFolded;
bool IndexIsConstant;
if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
RHS = SimpleRHS;
Value *SimpleV = nullptr;
+ const DataLayout &DL = I.getModule()->getDataLayout();
if (auto FI = dyn_cast<FPMathOperator>(&I))
SimpleV =
- SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags());
+ SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
else
- SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS);
+ SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
if (SimpleV && CountedInstructions.insert(&I).second)
NumberOfOptimizedInstructions += TTI.getUserCost(&I);
// Given a list of loads that could be constant-folded (LoadBaseAddresses),
// estimate number of optimized instructions after substituting the concrete
- // values for the given Iteration.
- // Fill in SimplifiedValues map for future use in DCE-estimation.
- unsigned estimateNumberOfSimplifiedInstructions(unsigned Iteration) {
+ // values for the given Iteration. Also track how many instructions become
+ // dead through this process.
+ unsigned estimateNumberOfOptimizedInstructions(unsigned Iteration) {
+ // We keep a set vector for the worklist so that we don't wast space in the
+ // worklist queuing up the same instruction repeatedly. This can happen due
+ // to multiple operands being the same instruction or due to the same
+ // instruction being an operand of lots of things that end up dead or
+ // simplified.
SmallSetVector<Instruction *, 8> Worklist;
+
+ // Clear the simplified values and counts for this iteration.
SimplifiedValues.clear();
CountedInstructions.clear();
NumberOfOptimizedInstructions = 0;
if (CountedInstructions.insert(LI).second)
NumberOfOptimizedInstructions += TTI.getUserCost(LI);
- for (User *U : LI->users()) {
- Instruction *UI = dyn_cast<Instruction>(U);
- if (!UI)
- continue;
- Worklist.insert(UI);
- }
+ for (User *U : LI->users())
+ Worklist.insert(cast<Instruction>(U));
}
// And then we try to simplify every user of every instruction from the
continue;
if (!visit(I))
continue;
- for (User *U : I->users()) {
- Instruction *UI = dyn_cast<Instruction>(U);
- if (!UI)
- continue;
- Worklist.insert(UI);
- }
+ for (User *U : I->users())
+ Worklist.insert(cast<Instruction>(U));
}
- return NumberOfOptimizedInstructions;
- }
-
- // Given a list of potentially simplifed instructions, estimate number of
- // instructions that would become dead if we do perform the simplification.
- unsigned estimateNumberOfDeadInstructions() {
- NumberOfOptimizedInstructions = 0;
- // We keep a set vector for the worklist so that we don't wast space in the
- // worklist queuing up the same instruction repeatedly. This can happen due
- // to multiple operands being the same instruction or due to the same
- // instruction being an operand of lots of things that end up dead or
- // simplified.
- SmallSetVector<Instruction *, 8> Worklist;
+ // Now that we know the potentially simplifed instructions, estimate number
+ // of instructions that would become dead if we do perform the
+ // simplification.
// The dead instructions are held in a separate set. This is used to
// prevent us from re-examining instructions and make sure we only count
return NumberOfOptimizedInstructions;
}
};
+} // namespace
// Complete loop unrolling can make some loads constant, and we need to know if
// that would expose any further optimization opportunities.
unsigned IterationsNumberForEstimate =
std::min<unsigned>(UnrollMaxIterationsCountToAnalyze, TripCount);
unsigned NumberOfOptimizedInstructions = 0;
- for (unsigned i = 0; i < IterationsNumberForEstimate; ++i) {
+ for (unsigned i = 0; i < IterationsNumberForEstimate; ++i)
NumberOfOptimizedInstructions +=
- UA.estimateNumberOfSimplifiedInstructions(i);
- NumberOfOptimizedInstructions += UA.estimateNumberOfDeadInstructions();
- }
+ UA.estimateNumberOfOptimizedInstructions(i);
+
NumberOfOptimizedInstructions *= TripCount / IterationsNumberForEstimate;
return NumberOfOptimizedInstructions;
return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable");
}
+// Returns true if the loop has an runtime unroll(disable) pragma.
+static bool HasRuntimeUnrollDisablePragma(const Loop *L) {
+ return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
+}
+
// If loop has an unroll_count pragma return the (necessarily
// positive) value from the pragma. Otherwise return 0.
static unsigned UnrollCountPragmaValue(const Loop *L) {
// Reduce count based on the type of unrolling and the threshold values.
unsigned OriginalCount = Count;
bool AllowRuntime = UserRuntime ? CurrentRuntime : UP.Runtime;
+ if (HasRuntimeUnrollDisablePragma(L)) {
+ AllowRuntime = false;
+ }
if (Unrolling == Partial) {
bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
if (!AllowPartial && !CountSetExplicitly) {