///
virtual int getInlineCost(CallSite CS) = 0;
+ // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
+ // higher threshold to determine if the function call should be inlined.
+ ///
+ virtual float getInlineFudgeFactor(CallSite CS) = 0;
+
private:
// InlineThreshold - Cache the value here for easy access.
unsigned InlineThreshold;
// NumInsts, NumBlocks - Keep track of how large each function is, which is
// used to estimate the code size cost of inlining it.
unsigned NumInsts, NumBlocks;
+
+ // NumVectorInsts - Keep track how many instrctions produce vector values.
+ // The inliner is being more aggressive with inlining vector kernels.
+ unsigned NumVectorInsts;
// ArgumentWeights - Each formal argument of the function is inspected to
// see if it is used in any contexts where making it a constant or alloca
// entry here.
std::vector<ArgInfo> ArgumentWeights;
- FunctionInfo() : NumInsts(0), NumBlocks(0) {}
+ FunctionInfo() : NumInsts(0), NumBlocks(0), NumVectorInsts(0) {}
/// analyzeFunction - Fill in the current structure with information gleaned
/// from the specified function.
// getInlineCost - The heuristic used to determine if we should inline the
// function call or not.
//
- int getInlineCost(CallSite CS, SmallPtrSet<const Function *, 16> &NeverInline);
+ int getInlineCost(CallSite CS,
+ SmallPtrSet<const Function *, 16> &NeverInline);
+
+ // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
+ // higher threshold to determine if the function call should be inlined.
+ float getInlineFudgeFactor(CallSite CS);
};
}
int getInlineCost(CallSite CS) {
return CA.getInlineCost(CS, NeverInline);
}
+ float getInlineFudgeFactor(CallSite CS) {
+ return CA.getInlineFudgeFactor(CS);
+ }
virtual bool doInitialization(CallGraph &CG);
};
char SimpleInliner::ID = 0;
STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
namespace {
- cl::opt<int> // FIXME: 200 is VERY conservative
- InlineLimit("inline-threshold", cl::Hidden, cl::init(200),
- cl::desc("Control the amount of inlining to perform (default = 200)"));
+ cl::opt<int>
+ InlineLimit("inline-threshold", cl::Hidden, cl::init(400),
+ cl::desc("Control the amount of inlining to perform (default = 400)"));
}
Inliner::Inliner(const void *ID)
// try to do so.
CallSite CS = CallSites[CSi];
int InlineCost = getInlineCost(CS);
- if (InlineCost >= (int)InlineThreshold) {
+ float FudgeFactor = getInlineFudgeFactor(CS);
+
+ if (InlineCost >= (int)(InlineThreshold * FudgeFactor)) {
DOUT << " NOT Inlining: cost=" << InlineCost
<< ", Call: " << *CS.getInstruction();
} else {
/// analyzeFunction - Fill in the current structure with information gleaned
/// from the specified function.
void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
- unsigned NumInsts = 0, NumBlocks = 0;
+ unsigned NumInsts = 0, NumBlocks = 0, NumVectorInsts = 0;
// Look at the size of the callee. Each basic block counts as 20 units, and
// each instruction counts as 5.
for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
II != E; ++II) {
if (isa<DbgInfoIntrinsic>(II)) continue; // Debug intrinsics don't count.
+ if (isa<PHINode>(II)) continue; // PHI nodes don't count.
+
+ if (isa<InsertElementInst>(II) || isa<ExtractElementInst>(II) ||
+ isa<ShuffleVectorInst>(II) || isa<VectorType>(II->getType()))
+ ++NumVectorInsts;
// Noop casts, including ptr <-> int, don't count.
if (const CastInst *CI = dyn_cast<CastInst>(II)) {
isa<PtrToIntInst>(CI))
continue;
} else if (const GetElementPtrInst *GEPI =
- dyn_cast<GetElementPtrInst>(II)) {
+ dyn_cast<GetElementPtrInst>(II)) {
// If a GEP has all constant indices, it will probably be folded with
// a load/store.
bool AllConstant = true;
++NumBlocks;
}
- this->NumBlocks = NumBlocks;
- this->NumInsts = NumInsts;
+ this->NumBlocks = NumBlocks;
+ this->NumInsts = NumInsts;
+ this->NumVectorInsts = NumVectorInsts;
// Check out all of the arguments to the function, figuring out how much
// code can be eliminated if one of the arguments is a constant.
//
InlineCost += Caller->size()/20;
-
// Look at the size of the callee. Each basic block counts as 20 units, and
// each instruction counts as 5.
InlineCost += CalleeFI.NumInsts*5 + CalleeFI.NumBlocks*20;
+
return InlineCost;
}
+// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
+// higher threshold to determine if the function call should be inlined.
+float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
+ Function *Callee = CS.getCalledFunction();
+
+ // Get information about the callee...
+ FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
+
+ // If we haven't calculated this information yet, do so now.
+ if (CalleeFI.NumBlocks == 0)
+ CalleeFI.analyzeFunction(Callee);
+
+ // Be more aggressive if the function contains a good chunk (if it mades up
+ // at least 10% of the instructions) of vector instructions.
+ if (CalleeFI.NumVectorInsts > CalleeFI.NumInsts/10)
+ return 1.5f;
+ return 1.0f;
+}