From 8d84d5b62cdf2a772d51338136c7022a6e1ff931 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 24 Mar 2008 06:37:48 +0000 Subject: [PATCH] Increasing the inline limit from (overly conservative) 200 to 300. Given each BB costs 20 and each instruction costs 5, 200 means a 4 BB function + 24 instructions (actually less because caller's size also contributes to it). Furthermore, double the limit when more than 10% of the callee instructions are vector instructions. Multimedia kernels tend to love inlining. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48725 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Transforms/IPO/InlinerPass.h | 5 ++++ include/llvm/Transforms/Utils/InlineCost.h | 13 +++++++-- lib/Transforms/IPO/InlineSimple.cpp | 3 ++ lib/Transforms/IPO/Inliner.cpp | 10 ++++--- lib/Transforms/Utils/InlineCost.cpp | 34 ++++++++++++++++++---- 5 files changed, 54 insertions(+), 11 deletions(-) diff --git a/include/llvm/Transforms/IPO/InlinerPass.h b/include/llvm/Transforms/IPO/InlinerPass.h index 26cf4e44a26..d34641def09 100644 --- a/include/llvm/Transforms/IPO/InlinerPass.h +++ b/include/llvm/Transforms/IPO/InlinerPass.h @@ -55,6 +55,11 @@ struct Inliner : public CallGraphSCCPass { /// virtual int getInlineCost(CallSite CS) = 0; + // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a + // higher threshold to determine if the function call should be inlined. + /// + virtual float getInlineFudgeFactor(CallSite CS) = 0; + private: // InlineThreshold - Cache the value here for easy access. unsigned InlineThreshold; diff --git a/include/llvm/Transforms/Utils/InlineCost.h b/include/llvm/Transforms/Utils/InlineCost.h index 373dc7bd190..6018a803c6f 100644 --- a/include/llvm/Transforms/Utils/InlineCost.h +++ b/include/llvm/Transforms/Utils/InlineCost.h @@ -41,6 +41,10 @@ namespace llvm { // NumInsts, NumBlocks - Keep track of how large each function is, which is // used to estimate the code size cost of inlining it. unsigned NumInsts, NumBlocks; + + // NumVectorInsts - Keep track how many instrctions produce vector values. + // The inliner is being more aggressive with inlining vector kernels. + unsigned NumVectorInsts; // ArgumentWeights - Each formal argument of the function is inspected to // see if it is used in any contexts where making it a constant or alloca @@ -48,7 +52,7 @@ namespace llvm { // entry here. std::vector ArgumentWeights; - FunctionInfo() : NumInsts(0), NumBlocks(0) {} + FunctionInfo() : NumInsts(0), NumBlocks(0), NumVectorInsts(0) {} /// analyzeFunction - Fill in the current structure with information gleaned /// from the specified function. @@ -73,7 +77,12 @@ namespace llvm { // getInlineCost - The heuristic used to determine if we should inline the // function call or not. // - int getInlineCost(CallSite CS, SmallPtrSet &NeverInline); + int getInlineCost(CallSite CS, + SmallPtrSet &NeverInline); + + // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a + // higher threshold to determine if the function call should be inlined. + float getInlineFudgeFactor(CallSite CS); }; } diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index fe0ea5a4e89..a007103a599 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -40,6 +40,9 @@ namespace { int getInlineCost(CallSite CS) { return CA.getInlineCost(CS, NeverInline); } + float getInlineFudgeFactor(CallSite CS) { + return CA.getInlineFudgeFactor(CS); + } virtual bool doInitialization(CallGraph &CG); }; char SimpleInliner::ID = 0; diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index acd360464ca..f33f368fc0e 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -31,9 +31,9 @@ STATISTIC(NumInlined, "Number of functions inlined"); STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); namespace { - cl::opt // FIXME: 200 is VERY conservative - InlineLimit("inline-threshold", cl::Hidden, cl::init(200), - cl::desc("Control the amount of inlining to perform (default = 200)")); + cl::opt + InlineLimit("inline-threshold", cl::Hidden, cl::init(400), + cl::desc("Control the amount of inlining to perform (default = 400)")); } Inliner::Inliner(const void *ID) @@ -140,7 +140,9 @@ bool Inliner::runOnSCC(const std::vector &SCC) { // try to do so. CallSite CS = CallSites[CSi]; int InlineCost = getInlineCost(CS); - if (InlineCost >= (int)InlineThreshold) { + float FudgeFactor = getInlineFudgeFactor(CS); + + if (InlineCost >= (int)(InlineThreshold * FudgeFactor)) { DOUT << " NOT Inlining: cost=" << InlineCost << ", Call: " << *CS.getInstruction(); } else { diff --git a/lib/Transforms/Utils/InlineCost.cpp b/lib/Transforms/Utils/InlineCost.cpp index 1e1d1e410f3..4349d0e5dfd 100644 --- a/lib/Transforms/Utils/InlineCost.cpp +++ b/lib/Transforms/Utils/InlineCost.cpp @@ -93,7 +93,7 @@ unsigned InlineCostAnalyzer::FunctionInfo:: /// analyzeFunction - Fill in the current structure with information gleaned /// from the specified function. void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { - unsigned NumInsts = 0, NumBlocks = 0; + unsigned NumInsts = 0, NumBlocks = 0, NumVectorInsts = 0; // Look at the size of the callee. Each basic block counts as 20 units, and // each instruction counts as 5. @@ -101,6 +101,11 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); II != E; ++II) { if (isa(II)) continue; // Debug intrinsics don't count. + if (isa(II)) continue; // PHI nodes don't count. + + if (isa(II) || isa(II) || + isa(II) || isa(II->getType())) + ++NumVectorInsts; // Noop casts, including ptr <-> int, don't count. if (const CastInst *CI = dyn_cast(II)) { @@ -108,7 +113,7 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { isa(CI)) continue; } else if (const GetElementPtrInst *GEPI = - dyn_cast(II)) { + dyn_cast(II)) { // If a GEP has all constant indices, it will probably be folded with // a load/store. bool AllConstant = true; @@ -126,8 +131,9 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { ++NumBlocks; } - this->NumBlocks = NumBlocks; - this->NumInsts = NumInsts; + this->NumBlocks = NumBlocks; + this->NumInsts = NumInsts; + this->NumVectorInsts = NumVectorInsts; // Check out all of the arguments to the function, figuring out how much // code can be eliminated if one of the arguments is a constant. @@ -233,10 +239,28 @@ int InlineCostAnalyzer::getInlineCost(CallSite CS, // InlineCost += Caller->size()/20; - // Look at the size of the callee. Each basic block counts as 20 units, and // each instruction counts as 5. InlineCost += CalleeFI.NumInsts*5 + CalleeFI.NumBlocks*20; + return InlineCost; } +// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a +// higher threshold to determine if the function call should be inlined. +float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) { + Function *Callee = CS.getCalledFunction(); + + // Get information about the callee... + FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI.NumBlocks == 0) + CalleeFI.analyzeFunction(Callee); + + // Be more aggressive if the function contains a good chunk (if it mades up + // at least 10% of the instructions) of vector instructions. + if (CalleeFI.NumVectorInsts > CalleeFI.NumInsts/10) + return 1.5f; + return 1.0f; +} -- 2.34.1