Clean up handling of always-inline functions in the inliner.

author Bob Wilson <bob.wilson@apple.com>

Mon, 19 Nov 2012 07:04:35 +0000 (07:04 +0000)

committer Bob Wilson <bob.wilson@apple.com>

Mon, 19 Nov 2012 07:04:35 +0000 (07:04 +0000)
author Bob Wilson <bob.wilson@apple.com>
Mon, 19 Nov 2012 07:04:35 +0000 (07:04 +0000)
committer Bob Wilson <bob.wilson@apple.com>
Mon, 19 Nov 2012 07:04:35 +0000 (07:04 +0000)
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h

index af07d38169055708102b207e9f82e1424ad7242a..82a3a566c920718a3e42ec730c9dc32304f5c2f1 100644 (file)
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -129,6 +129,9 @@ namespace llvm {
      //  Note: This is used by out-of-tree passes, please do not remove without
      //  adding a replacement API.
      InlineCost getInlineCost(CallSite CS, Function *Callee, int Threshold);
+
+    /// \brief Minimal filter to detect invalid constructs for inlining.
+    bool isInlineViable(Function &Callee);
    };
  }
  
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp

index 0da5413463832f4ddca74602434eb9166c66cfc1..458e25503d7b2bc4fd50114f3abb8337f1c9d22f 100644 (file)
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -49,7 +49,6 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
  
    int Threshold;
    int Cost;
-  const bool AlwaysInline;
  
    bool IsCallerRecursive;
    bool IsRecursiveCall;
@@ -128,7 +127,6 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
  public:
    CallAnalyzer(const DataLayout *TD, Function &Callee, int Threshold)
      : TD(TD), F(Callee), Threshold(Threshold), Cost(0),
-      AlwaysInline(F.getFnAttributes().hasAttribute(Attributes::AlwaysInline)),
        IsCallerRecursive(false), IsRecursiveCall(false),
        ExposesReturnsTwice(false), HasDynamicAlloca(false), AllocatedSize(0),
        NumInstructions(0), NumVectorInstructions(0),
@@ -142,7 +140,6 @@ public:
  
    int getThreshold() { return Threshold; }
    int getCost() { return Cost; }
-  bool isAlwaysInline() { return AlwaysInline; }
  
    // Keep a bunch of stats about the cost savings found so we can print them
    // out when debugging.
@@ -281,9 +278,8 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
                        Ty->getPrimitiveSizeInBits());
    }
  
-  // We will happily inline static alloca instructions or dynamic alloca
-  // instructions in always-inline situations.
-  if (AlwaysInline || I.isStaticAlloca())
+  // We will happily inline static alloca instructions.
+  if (I.isStaticAlloca())
      return Base::visitAlloca(I);
  
    // FIXME: This is overly conservative. Dynamic allocas are inefficient for
@@ -743,7 +739,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
  
      // Check if we've past the threshold so we don't spin in huge basic
      // blocks that will never inline.
-    if (!AlwaysInline && Cost > (Threshold + VectorBonus))
+    if (Cost > (Threshold + VectorBonus))
        return false;
    }
  
@@ -805,70 +801,69 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
    int SingleBBBonus = Threshold / 2;
    Threshold += SingleBBBonus;
  
-  // Unless we are always-inlining, perform some tweaks to the cost and
-  // threshold based on the direct callsite information.
-  if (!AlwaysInline) {
-    // We want to more aggressively inline vector-dense kernels, so up the
-    // threshold, and we'll lower it if the % of vector instructions gets too
-    // low.
-    assert(NumInstructions == 0);
-    assert(NumVectorInstructions == 0);
-    FiftyPercentVectorBonus = Threshold;
-    TenPercentVectorBonus = Threshold / 2;
-
-    // Give out bonuses per argument, as the instructions setting them up will
-    // be gone after inlining.
-    for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
-      if (TD && CS.isByValArgument(I)) {
-        // We approximate the number of loads and stores needed by dividing the
-        // size of the byval type by the target's pointer size.
-        PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
-        unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType());
-        unsigned PointerSize = TD->getPointerSizeInBits();
-        // Ceiling division.
-        unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
-
-        // If it generates more than 8 stores it is likely to be expanded as an
-        // inline memcpy so we take that as an upper bound. Otherwise we assume
-        // one load and one store per word copied.
-        // FIXME: The maxStoresPerMemcpy setting from the target should be used
-        // here instead of a magic number of 8, but it's not available via
-        // DataLayout.
-        NumStores = std::min(NumStores, 8U);
-
-        Cost -= 2 * NumStores * InlineConstants::InstrCost;
-      } else {
-        // For non-byval arguments subtract off one instruction per call
-        // argument.
-        Cost -= InlineConstants::InstrCost;
-      }
+  // Perform some tweaks to the cost and threshold based on the direct
+  // callsite information.
+
+  // We want to more aggressively inline vector-dense kernels, so up the
+  // threshold, and we'll lower it if the % of vector instructions gets too
+  // low.
+  assert(NumInstructions == 0);
+  assert(NumVectorInstructions == 0);
+  FiftyPercentVectorBonus = Threshold;
+  TenPercentVectorBonus = Threshold / 2;
+
+  // Give out bonuses per argument, as the instructions setting them up will
+  // be gone after inlining.
+  for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
+    if (TD && CS.isByValArgument(I)) {
+      // We approximate the number of loads and stores needed by dividing the
+      // size of the byval type by the target's pointer size.
+      PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
+      unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType());
+      unsigned PointerSize = TD->getPointerSizeInBits();
+      // Ceiling division.
+      unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
+
+      // If it generates more than 8 stores it is likely to be expanded as an
+      // inline memcpy so we take that as an upper bound. Otherwise we assume
+      // one load and one store per word copied.
+      // FIXME: The maxStoresPerMemcpy setting from the target should be used
+      // here instead of a magic number of 8, but it's not available via
+      // DataLayout.
+      NumStores = std::min(NumStores, 8U);
+
+      Cost -= 2 * NumStores * InlineConstants::InstrCost;
+    } else {
+      // For non-byval arguments subtract off one instruction per call
+      // argument.
+      Cost -= InlineConstants::InstrCost;
      }
+  }
  
-    // If there is only one call of the function, and it has internal linkage,
-    // the cost of inlining it drops dramatically.
-    if (F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction())
-      Cost += InlineConstants::LastCallToStaticBonus;
-
-    // If the instruction after the call, or if the normal destination of the
-    // invoke is an unreachable instruction, the function is noreturn. As such,
-    // there is little point in inlining this unless there is literally zero
-    // cost.
-    Instruction *Instr = CS.getInstruction();
-    if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
-      if (isa<UnreachableInst>(II->getNormalDest()->begin()))
-        Threshold = 1;
-    } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
+  // If there is only one call of the function, and it has internal linkage,
+  // the cost of inlining it drops dramatically.
+  if (F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction())
+    Cost += InlineConstants::LastCallToStaticBonus;
+
+  // If the instruction after the call, or if the normal destination of the
+  // invoke is an unreachable instruction, the function is noreturn. As such,
+  // there is little point in inlining this unless there is literally zero
+  // cost.
+  Instruction *Instr = CS.getInstruction();
+  if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
+    if (isa<UnreachableInst>(II->getNormalDest()->begin()))
        Threshold = 1;
+  } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
+    Threshold = 1;
  
-    // If this function uses the coldcc calling convention, prefer not to inline
-    // it.
-    if (F.getCallingConv() == CallingConv::Cold)
-      Cost += InlineConstants::ColdccPenalty;
+  // If this function uses the coldcc calling convention, prefer not to inline
+  // it.
+  if (F.getCallingConv() == CallingConv::Cold)
+    Cost += InlineConstants::ColdccPenalty;
  
-    // Check if we're done. This can happen due to bonuses and penalties.
-    if (Cost > Threshold)
-      return false;
-  }
+  // Check if we're done. This can happen due to bonuses and penalties.
+  if (Cost > Threshold)
+    return false;
  
    if (F.empty())
      return true;
@@ -930,7 +925,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
    for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
      // Bail out the moment we cross the threshold. This means we'll under-count
      // the cost, but only when undercounting doesn't matter.
-    if (!AlwaysInline && Cost > (Threshold + VectorBonus))
+    if (Cost > (Threshold + VectorBonus))
        break;
  
      BasicBlock *BB = BBWorklist[Idx];
@@ -1015,7 +1010,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
  
    Threshold += VectorBonus;
  
-  return AlwaysInline || Cost < Threshold;
+  return Cost < Threshold;
  }
  
  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1040,10 +1035,22 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, int Threshold) {
  
  InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee,
                                               int Threshold) {
+  // Cannot inline indirect calls.
+  if (!Callee)
+    return llvm::InlineCost::getNever();
+
+  // Calls to functions with always-inline attributes should be inlined
+  // whenever possible.
+  if (Callee->getFnAttributes().hasAttribute(Attributes::AlwaysInline)) {
+    if (isInlineViable(*Callee))
+      return llvm::InlineCost::getAlways();
+    return llvm::InlineCost::getNever();
+  }
+
    // Don't inline functions which can be redefined at link-time to mean
    // something else.  Don't inline functions marked noinline or call sites
    // marked noinline.
-  if (!Callee || Callee->mayBeOverridden() ||
+  if (Callee->mayBeOverridden() ||
        Callee->getFnAttributes().hasAttribute(Attributes::NoInline) ||
        CS.isNoInline())
      return llvm::InlineCost::getNever();
@@ -1059,9 +1066,36 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee,
    // Check if there was a reason to force inlining or no inlining.
    if (!ShouldInline && CA.getCost() < CA.getThreshold())
      return InlineCost::getNever();
-  if (ShouldInline && (CA.isAlwaysInline() ||
-                       CA.getCost() >= CA.getThreshold()))
+  if (ShouldInline && CA.getCost() >= CA.getThreshold())
      return InlineCost::getAlways();
  
    return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
  }
+
+bool InlineCostAnalyzer::isInlineViable(Function &F) {
+  bool ReturnsTwice =F.getFnAttributes().hasAttribute(Attributes::ReturnsTwice);
+  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+    // Disallow inlining of functions which contain an indirect branch.
+    if (isa<IndirectBrInst>(BI->getTerminator()))
+      return false;
+
+    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
+         ++II) {
+      CallSite CS(II);
+      if (!CS)
+        continue;
+
+      // Disallow recursive calls.
+      if (&F == CS.getCalledFunction())
+        return false;
+
+      // Disallow calls which expose returns-twice to a function not previously
+      // attributed as such.
+      if (!ReturnsTwice && CS.isCall() &&
+          cast<CallInst>(CS.getInstruction())->canReturnTwice())
+        return false;
+    }
+  }
+
+  return true;
+}
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp

index b1c36c15db0bcaaaf6d5055256e228228ca22744..6f4b810acc933c19a2062a2f06c7a0bfd85d2c59 100644 (file)
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -32,6 +32,7 @@ namespace {
  
    // AlwaysInliner only inlines functions that are mark as "always inline".
    class AlwaysInliner : public Inliner {
+    InlineCostAnalyzer CA;
    public:
      // Use extremely low threshold.
      AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/true) {
@@ -63,35 +64,6 @@ Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) {
    return new AlwaysInliner(InsertLifetime);
  }
  
-/// \brief Minimal filter to detect invalid constructs for inlining.
-static bool isInlineViable(Function &F) {
-  bool ReturnsTwice =F.getFnAttributes().hasAttribute(Attributes::ReturnsTwice);
-  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
-    // Disallow inlining of functions which contain an indirect branch.
-    if (isa<IndirectBrInst>(BI->getTerminator()))
-      return false;
-
-    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
-         ++II) {
-      CallSite CS(II);
-      if (!CS)
-        continue;
-
-      // Disallow recursive calls.
-      if (&F == CS.getCalledFunction())
-        return false;
-
-      // Disallow calls which expose returns-twice to a function not previously
-      // attributed as such.
-      if (!ReturnsTwice && CS.isCall() &&
-          cast<CallInst>(CS.getInstruction())->canReturnTwice())
-        return false;
-    }
-  }
-
-  return true;
-}
-
  /// \brief Get the inline cost for the always-inliner.
  ///
  /// The always inliner *only* handles functions which are marked with the
@@ -106,27 +78,21 @@ static bool isInlineViable(Function &F) {
  /// likely not worth it in practice.
  InlineCost AlwaysInliner::getInlineCost(CallSite CS) {
    Function *Callee = CS.getCalledFunction();
-  // We assume indirect calls aren't calling an always-inline function.
-  if (!Callee) return InlineCost::getNever();
-
-  // We can't inline calls to external functions.
-  // FIXME: We shouldn't even get here.
-  if (Callee->isDeclaration()) return InlineCost::getNever();
-
-  // Return never for anything not marked as always inline.
-  if (!Callee->getFnAttributes().hasAttribute(Attributes::AlwaysInline))
-    return InlineCost::getNever();
  
-  // Do some minimal analysis to preclude non-viable functions.
-  if (!isInlineViable(*Callee))
-    return InlineCost::getNever();
+  // Only inline direct calls to functions with always-inline attributes
+  // that are viable for inlining. FIXME: We shouldn't even get here for
+  // declarations.
+  if (Callee && !Callee->isDeclaration() &&
+      Callee->getFnAttributes().hasAttribute(Attributes::AlwaysInline) &&
+      CA.isInlineViable(*Callee))
+    return InlineCost::getAlways();
  
-  // Otherwise, force inlining.
-  return InlineCost::getAlways();
+  return InlineCost::getNever();
  }
  
  // doInitialization - Initializes the vector of functions that have not
  // been annotated with the "always inline" attribute.
  bool AlwaysInliner::doInitialization(CallGraph &CG) {
+  CA.setDataLayout(getAnalysisIfAvailable<DataLayout>());
    return false;
  }
author	Bob Wilson <bob.wilson@apple.com>
	Mon, 19 Nov 2012 07:04:35 +0000 (07:04 +0000)
committer	Bob Wilson <bob.wilson@apple.com>
	Mon, 19 Nov 2012 07:04:35 +0000 (07:04 +0000)
include/llvm/Analysis/InlineCost.h		patch \| blob \| history
lib/Analysis/InlineCost.cpp		patch \| blob \| history
lib/Transforms/IPO/InlineAlways.cpp		patch \| blob \| history