return true;
}
+/// InlineHistoryIncludes - Return true if the specified inline history ID
+/// indicates an inline history that includes the specified function.
+static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
+ const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) {
+ while (InlineHistoryID != -1) {
+ assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
+ "Invalid inline history ID");
+ if (InlineHistory[InlineHistoryID].first == F)
+ return true;
+ InlineHistoryID = InlineHistory[InlineHistoryID].second;
+ }
+ return false;
+}
+
+
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraph>();
const TargetData *TD = getAnalysisIfAvailable<TargetData>();
// Scan through and identify all call sites ahead of time so that we only
// inline call sites in the original functions, not call sites that result
// from inlining other functions.
- SmallVector<CallSite, 16> CallSites;
+ SmallVector<std::pair<CallSite, int>, 16> CallSites;
+
+ // When inlining a callee produces new call sites, we want to keep track of
+ // the fact that they were inlined from the callee. This allows us to avoid
+ // infinite inlining in some obscure cases. To represent this, we use an
+ // index into the InlineHistory vector.
+ SmallVector<std::pair<Function*, int>, 8> InlineHistory;
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
Function *F = (*I)->getFunction();
if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
continue;
- CallSites.push_back(CS);
+ CallSites.push_back(std::make_pair(CS, -1));
}
}
// current SCC to the end of the list.
unsigned FirstCallInSCC = CallSites.size();
for (unsigned i = 0; i < FirstCallInSCC; ++i)
- if (Function *F = CallSites[i].getCalledFunction())
+ if (Function *F = CallSites[i].first.getCalledFunction())
if (SCCFunctions.count(F))
std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
// Iterate over the outer loop because inlining functions can cause indirect
// calls to become direct calls.
for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
- CallSite CS = CallSites[CSi];
+ CallSite CS = CallSites[CSi].first;
Function *Caller = CS.getCaller();
Function *Callee = CS.getCalledFunction();
// We can only inline direct calls to non-declarations.
if (Callee == 0 || Callee->isDeclaration()) continue;
+ // If this call sites was obtained by inlining another function, verify
+ // that the include path for the function did not include the callee
+ // itself. If so, we'd be recursively inlinling the same function,
+ // which would provide the same callsites, which would cause us to
+ // infinitely inline.
+ int InlineHistoryID = CallSites[CSi].second;
+ if (InlineHistoryID != -1 &&
+ InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
+ continue;
+
+
// If the policy determines that we should inline this function,
// try to do so.
if (!shouldInline(CS))
if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas))
continue;
++NumInlined;
-
+
// If inlining this function devirtualized any call sites, throw them
// onto our worklist to process. They are useful inline candidates.
- for (unsigned i = 0, e = InlineInfo.DevirtualizedCalls.size();
- i != e; ++i) {
- Value *Ptr = InlineInfo.DevirtualizedCalls[i];
- CallSites.push_back(CallSite(Ptr));
+ if (!InlineInfo.DevirtualizedCalls.empty()) {
+ // Create a new inline history entry for this, so that we remember
+ // that these new callsites came about due to inlining Callee.
+ int NewHistoryID = InlineHistory.size();
+ InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
+
+ for (unsigned i = 0, e = InlineInfo.DevirtualizedCalls.size();
+ i != e; ++i) {
+ Value *Ptr = InlineInfo.DevirtualizedCalls[i];
+ CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
+ }
}
// Update the cached cost info with the inlined call.
; This effectively is just peeling off the first iteration of a loop, and the
; inliner heuristics are not set up for this.
-; RUN: opt -inline %s -S | grep "call void @foo(i32 42)"
+; RUN: opt -inline %s -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.3"
define internal void @foo(i32 %x) nounwind ssp {
entry:
- %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
%0 = icmp slt i32 %x, 0 ; <i1> [#uses=1]
br i1 %0, label %return, label %bb
ret void
}
+
+;; CHECK: @bonk
+;; CHECK: call void @foo(i32 42)
define void @bonk() nounwind ssp {
entry:
call void @foo(i32 42) nounwind ssp
ret void
}
+
+
+
+;; Here is an indirect case that should not be infinitely inlined.
+
+define internal void @f1(i32 %x, i8* %Foo, i8* %Bar) nounwind ssp {
+entry:
+ %0 = bitcast i8* %Bar to void (i32, i8*, i8*)*
+ %1 = sub nsw i32 %x, 1
+ call void %0(i32 %1, i8* %Foo, i8* %Bar) nounwind
+ volatile store i32 42, i32* @g, align 4
+ ret void
+}
+
+define internal void @f2(i32 %x, i8* %Foo, i8* %Bar) nounwind ssp {
+entry:
+ %0 = icmp slt i32 %x, 0 ; <i1> [#uses=1]
+ br i1 %0, label %return, label %bb
+
+bb: ; preds = %entry
+ %1 = bitcast i8* %Foo to void (i32, i8*, i8*)* ; <void (i32, i8*, i8*)*> [#uses=1]
+ call void %1(i32 %x, i8* %Foo, i8* %Bar) nounwind
+ volatile store i32 13, i32* @g, align 4
+ ret void
+
+return: ; preds = %entry
+ ret void
+}
+
+
+; CHECK: @top_level
+; CHECK: call void @f2(i32 122
+; Here we inline one instance of the cycle, but we don't want to completely
+; unroll it.
+define void @top_level() nounwind ssp {
+entry:
+ call void @f2(i32 123, i8* bitcast (void (i32, i8*, i8*)* @f1 to i8*), i8* bitcast (void (i32, i8*, i8*)* @f2 to i8*)) nounwind ssp
+ ret void
+}