Micro-optimize this.

[oota-llvm.git] / lib / Analysis / InlineCost.cpp
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp

index acc3f202f2795720c9df60f70c477ec56261f3c5..98dbb69fe6a56e89b7e346fce879861ab2545c35 100644 (file)
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -159,15 +159,27 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
        // it.  This is a hack because we depend on the user marking their local
        // variables as volatile if they are live across a setjmp call, and they
        // probably won't do this in callers.
-      if (Function *F = CS.getCalledFunction())
+      if (Function *F = CS.getCalledFunction()) {
          if (F->isDeclaration() && 
              (F->getName() == "setjmp" || F->getName() == "_setjmp"))
            NeverInline = true;
+       
+        // If this call is to function itself, then the function is recursive.
+        // Inlining it into other functions is a bad idea, because this is
+        // basically just a form of loop peeling, and our metrics aren't useful
+        // for that case.
+        if (F == BB->getParent())
+          NeverInline = true;
+      }
  
        if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) {
          // Each argument to a call takes on average one instruction to set up.
          NumInsts += CS.arg_size();
-        ++NumCalls;
+
+        // We don't want inline asm to count as a call - that would prevent loop
+        // unrolling. The argument setup cost is still real, though.
+        if (!isa<InlineAsm>(CS.getCalledValue()))
+          ++NumCalls;
        }
      }
      
@@ -251,9 +263,15 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
  //
  InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
                                 SmallPtrSet<const Function*, 16> &NeverInline) {
+  return getInlineCost(CS, CS.getCalledFunction(), NeverInline);
+}
+
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
+                               Function *Callee,
+                               SmallPtrSet<const Function*, 16> &NeverInline) {
    Instruction *TheCall = CS.getInstruction();
-  Function *Callee = CS.getCalledFunction();
    Function *Caller = TheCall->getParent()->getParent();
+  bool isDirectCall = CS.getCalledFunction() == Callee;
  
    // Don't inline functions which can be redefined at link-time to mean
    // something else.  Don't inline functions marked noinline or call sites
@@ -263,23 +281,16 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
        CS.isNoInline())
      return llvm::InlineCost::getNever();
  
-  // Don't inline directly recursive calls, for now. Inlining a directly
-  // recursive call is effectively unrolling a loop, so it calls for different
-  // heuristics, which aren't implemented yet. Until then, err on the
-  // conservative side.
-  if (Callee == Caller)
-    return llvm::InlineCost::getNever();
-
    // InlineCost - This value measures how good of an inline candidate this call
    // site is to inline.  A lower inline cost make is more likely for the call to
    // be inlined.  This value may go negative.
    //
    int InlineCost = 0;
-  
+
    // If there is only one call of the function, and it has internal linkage,
    // make it almost guaranteed to be inlined.
    //
-  if (Callee->hasLocalLinkage() && Callee->hasOneUse())
+  if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall)
      InlineCost += InlineConstants::LastCallToStaticBonus;
    
    // If this function uses the coldcc calling convention, prefer not to inline
@@ -319,8 +330,13 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
      FunctionInfo &CallerFI = CachedFunctionInfo[Caller];
  
      // If we haven't calculated this information yet, do so now.
-    if (CallerFI.Metrics.NumBlocks == 0)
+    if (CallerFI.Metrics.NumBlocks == 0) {
        CallerFI.analyzeFunction(Caller);
+     
+      // Recompute the CalleeFI pointer, getting Caller could have invalidated
+      // it.
+      CalleeFI = &CachedFunctionInfo[Callee];
+    }
  
      // Don't inline a callee with dynamic alloca into a caller without them.
      // Functions containing dynamic alloca's are inefficient in various ways;
@@ -426,6 +442,8 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) {
      return;
    }
    
+  // Since CalleeMetrics were already calculated, we know that the CallerMetrics
+  // reference isn't invalidated: both were in the DenseMap.  
    CallerMetrics.NeverInline |= CalleeMetrics.NeverInline;
    CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca;
  
@@ -441,6 +459,11 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) {
    else
      CallerMetrics.NumInsts = 0;
    
-  // We are not updating the argumentweights. We have already determined that
+  // We are not updating the argument weights. We have already determined that
    // Caller is a fairly large function, so we accept the loss of precision.
  }
+
+/// clear - empty the cache of inline costs
+void InlineCostAnalyzer::clear() {
+  CachedFunctionInfo.clear();
+}