From 8d84d5b62cdf2a772d51338136c7022a6e1ff931 Mon Sep 17 00:00:00 2001
From: Evan Cheng <evan.cheng@apple.com>
Date: Mon, 24 Mar 2008 06:37:48 +0000
Subject: [PATCH] Increasing the inline limit from (overly conservative) 200 to
 300. Given each BB costs 20 and each instruction costs 5, 200 means a 4 BB
 function + 24 instructions (actually less because caller's size also
 contributes to it).

Furthermore, double the limit when more than 10% of the callee instructions are vector instructions. Multimedia kernels tend to love inlining.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48725 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Transforms/IPO/InlinerPass.h  |  5 ++++
 include/llvm/Transforms/Utils/InlineCost.h | 13 +++++++--
 lib/Transforms/IPO/InlineSimple.cpp        |  3 ++
 lib/Transforms/IPO/Inliner.cpp             | 10 ++++---
 lib/Transforms/Utils/InlineCost.cpp        | 34 ++++++++++++++++++----
 5 files changed, 54 insertions(+), 11 deletions(-)
diff --git a/include/llvm/Transforms/IPO/InlinerPass.h b/include/llvm/Transforms/IPO/InlinerPass.h
index 26cf4e44a26..d34641def09 100644
--- a/include/llvm/Transforms/IPO/InlinerPass.h
+++ b/include/llvm/Transforms/IPO/InlinerPass.h
@@ -55,6 +55,11 @@ struct Inliner : public CallGraphSCCPass {
   ///
   virtual int getInlineCost(CallSite CS) = 0;
 
+  // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
+  // higher threshold to determine if the function call should be inlined.
+  ///
+  virtual float getInlineFudgeFactor(CallSite CS) = 0;
+
 private:
   // InlineThreshold - Cache the value here for easy access.
   unsigned InlineThreshold;
diff --git a/include/llvm/Transforms/Utils/InlineCost.h b/include/llvm/Transforms/Utils/InlineCost.h
index 373dc7bd190..6018a803c6f 100644
--- a/include/llvm/Transforms/Utils/InlineCost.h
+++ b/include/llvm/Transforms/Utils/InlineCost.h
@@ -41,6 +41,10 @@ namespace llvm {
       // NumInsts, NumBlocks - Keep track of how large each function is, which is
       // used to estimate the code size cost of inlining it.
       unsigned NumInsts, NumBlocks;
+
+      // NumVectorInsts - Keep track how many instrctions produce vector values.
+      // The inliner is being more aggressive with inlining vector kernels.
+      unsigned NumVectorInsts;
       
       // ArgumentWeights - Each formal argument of the function is inspected to
       // see if it is used in any contexts where making it a constant or alloca
@@ -48,7 +52,7 @@ namespace llvm {
       // entry here.
       std::vector<ArgInfo> ArgumentWeights;
       
-      FunctionInfo() : NumInsts(0), NumBlocks(0) {}
+      FunctionInfo() : NumInsts(0), NumBlocks(0), NumVectorInsts(0) {}
       
       /// analyzeFunction - Fill in the current structure with information gleaned
       /// from the specified function.
@@ -73,7 +77,12 @@ namespace llvm {
     // getInlineCost - The heuristic used to determine if we should inline the
     // function call or not.
     //
-    int getInlineCost(CallSite CS, SmallPtrSet<const Function *, 16> &NeverInline);
+    int getInlineCost(CallSite CS,
+                      SmallPtrSet<const Function *, 16> &NeverInline);
+
+    // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
+    // higher threshold to determine if the function call should be inlined.
+    float getInlineFudgeFactor(CallSite CS);
   };
 }
 
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index fe0ea5a4e89..a007103a599 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -40,6 +40,9 @@ namespace {
     int getInlineCost(CallSite CS) {
       return CA.getInlineCost(CS, NeverInline);
     }
+    float getInlineFudgeFactor(CallSite CS) {
+      return CA.getInlineFudgeFactor(CS);
+    }
     virtual bool doInitialization(CallGraph &CG);
   };
   char SimpleInliner::ID = 0;
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index acd360464ca..f33f368fc0e 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -31,9 +31,9 @@ STATISTIC(NumInlined, "Number of functions inlined");
 STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
 
 namespace {
-  cl::opt<int>             // FIXME: 200 is VERY conservative
-  InlineLimit("inline-threshold", cl::Hidden, cl::init(200),
-        cl::desc("Control the amount of inlining to perform (default = 200)"));
+  cl::opt<int>
+  InlineLimit("inline-threshold", cl::Hidden, cl::init(400),
+        cl::desc("Control the amount of inlining to perform (default = 400)"));
 }
 
 Inliner::Inliner(const void *ID) 
@@ -140,7 +140,9 @@ bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) {
         // try to do so.
         CallSite CS = CallSites[CSi];
         int InlineCost = getInlineCost(CS);
-        if (InlineCost >= (int)InlineThreshold) {
+        float FudgeFactor = getInlineFudgeFactor(CS);
+
+        if (InlineCost >= (int)(InlineThreshold * FudgeFactor)) {
           DOUT << "    NOT Inlining: cost=" << InlineCost
                << ", Call: " << *CS.getInstruction();
         } else {
diff --git a/lib/Transforms/Utils/InlineCost.cpp b/lib/Transforms/Utils/InlineCost.cpp
index 1e1d1e410f3..4349d0e5dfd 100644
--- a/lib/Transforms/Utils/InlineCost.cpp
+++ b/lib/Transforms/Utils/InlineCost.cpp
@@ -93,7 +93,7 @@ unsigned InlineCostAnalyzer::FunctionInfo::
 /// analyzeFunction - Fill in the current structure with information gleaned
 /// from the specified function.
 void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
-  unsigned NumInsts = 0, NumBlocks = 0;
+  unsigned NumInsts = 0, NumBlocks = 0, NumVectorInsts = 0;
 
   // Look at the size of the callee.  Each basic block counts as 20 units, and
   // each instruction counts as 5.
@@ -101,6 +101,11 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
     for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
          II != E; ++II) {
       if (isa<DbgInfoIntrinsic>(II)) continue;  // Debug intrinsics don't count.
+      if (isa<PHINode>(II)) continue;           // PHI nodes don't count.
+
+      if (isa<InsertElementInst>(II) || isa<ExtractElementInst>(II) ||
+          isa<ShuffleVectorInst>(II) || isa<VectorType>(II->getType()))
+        ++NumVectorInsts; 
       
       // Noop casts, including ptr <-> int,  don't count.
       if (const CastInst *CI = dyn_cast<CastInst>(II)) {
@@ -108,7 +113,7 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
             isa<PtrToIntInst>(CI))
           continue;
       } else if (const GetElementPtrInst *GEPI =
-                         dyn_cast<GetElementPtrInst>(II)) {
+                 dyn_cast<GetElementPtrInst>(II)) {
         // If a GEP has all constant indices, it will probably be folded with
         // a load/store.
         bool AllConstant = true;
@@ -126,8 +131,9 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
     ++NumBlocks;
   }
 
-  this->NumBlocks = NumBlocks;
-  this->NumInsts  = NumInsts;
+  this->NumBlocks      = NumBlocks;
+  this->NumInsts       = NumInsts;
+  this->NumVectorInsts = NumVectorInsts;
 
   // Check out all of the arguments to the function, figuring out how much
   // code can be eliminated if one of the arguments is a constant.
@@ -233,10 +239,28 @@ int InlineCostAnalyzer::getInlineCost(CallSite CS,
   //
   InlineCost += Caller->size()/20;
   
-  
   // Look at the size of the callee.  Each basic block counts as 20 units, and
   // each instruction counts as 5.
   InlineCost += CalleeFI.NumInsts*5 + CalleeFI.NumBlocks*20;
+
   return InlineCost;
 }
 
+// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
+// higher threshold to determine if the function call should be inlined.
+float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
+  Function *Callee = CS.getCalledFunction();
+  
+  // Get information about the callee...
+  FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI.NumBlocks == 0)
+    CalleeFI.analyzeFunction(Callee);
+
+  // Be more aggressive if the function contains a good chunk (if it mades up
+  // at least 10% of the instructions) of vector instructions.
+  if (CalleeFI.NumVectorInsts > CalleeFI.NumInsts/10)
+    return 1.5f;
+  return 1.0f;
+}
-- 
2.34.1