"sample-profile-inline-hot-threshold", cl::init(0.1), cl::value_desc("N"),
cl::desc("Inlined functions that account for more than N% of all samples "
"collected in the parent function, will be inlined again."));
+static cl::opt<double> SampleProfileGlobalHotThreshold(
+ "sample-profile-global-hot-threshold", cl::init(30), cl::value_desc("N"),
+ cl::desc("Top-level functions that account for more than N% of all samples "
+ "collected in the profile, will be marked as hot for the inliner "
+ "to consider."));
+static cl::opt<double> SampleProfileGlobalColdThreshold(
+ "sample-profile-global-cold-threshold", cl::init(0.5), cl::value_desc("N"),
+ cl::desc("Top-level functions that account for less than N% of all samples "
+ "collected in the profile, will be marked as cold for the inliner "
+ "to consider."));
namespace {
typedef DenseMap<const BasicBlock *, uint64_t> BlockWeightMap;
SampleProfileLoader(StringRef Name = SampleProfileFile)
: ModulePass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Reader(),
- Samples(nullptr), Filename(Name), ProfileIsValid(false) {
+ Samples(nullptr), Filename(Name), ProfileIsValid(false),
+ TotalCollectedSamples(0) {
initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
}
const FunctionSamples *findCalleeFunctionSamples(const CallInst &I) const;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
bool inlineHotFunctions(Function &F);
+ bool emitInlineHints(Function &F);
void printEdgeWeight(raw_ostream &OS, Edge E);
void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
/// \brief Flag indicating whether the profile input loaded successfully.
bool ProfileIsValid;
+
+ /// \brief Total number of samples collected in this profile.
+ ///
+ /// This is the sum of all the samples collected in all the functions executed
+ /// at runtime.
+ uint64_t TotalCollectedSamples;
};
class SampleCoverageTracker {
return FS;
}
+/// \brief Emit an inline hint if \p F is globally hot or cold.
+///
+/// If \p F consumes a significant fraction of samples (indicated by
+/// SampleProfileGlobalHotThreshold), apply the InlineHint attribute for the
+/// inliner to consider the function hot.
+///
+/// If \p F consumes a small fraction of samples (indicated by
+/// SampleProfileGlobalColdThreshold), apply the Cold attribute for the inliner
+/// to consider the function cold.
+///
+/// FIXME - This setting of inline hints is sub-optimal. Instead of marking a
+/// function globally hot or cold, we should be annotating individual callsites.
+/// This is not currently possible, but work on the inliner will eventually
+/// provide this ability. See http://reviews.llvm.org/D15003 for details and
+/// discussion.
+///
+/// \returns True if either attribute was applied to \p F.
+bool SampleProfileLoader::emitInlineHints(Function &F) {
+ if (TotalCollectedSamples == 0)
+ return false;
+
+ uint64_t FunctionSamples = Samples->getTotalSamples();
+ double SamplesPercent =
+ (double)FunctionSamples / (double)TotalCollectedSamples * 100.0;
+
+ // If the function collected more samples than the hot threshold, mark
+ // it globally hot.
+ if (SamplesPercent >= SampleProfileGlobalHotThreshold) {
+ F.addFnAttr(llvm::Attribute::InlineHint);
+ emitOptimizationRemark(
+ F.getContext(), DEBUG_TYPE, F, DebugLoc(),
+ Twine("Applied inline hint to globally hot function '" + F.getName() +
+ "' with " + Twine(std::to_string(SamplesPercent)) +
+ "% of samples (threshold: " +
+ Twine(std::to_string(SampleProfileGlobalHotThreshold)) + "%)"));
+ return true;
+ }
+
+ // If the function collected fewer samples than the cold threshold, mark
+ // it globally cold.
+ if (SamplesPercent <= SampleProfileGlobalColdThreshold) {
+ F.addFnAttr(llvm::Attribute::Cold);
+ emitOptimizationRemark(
+ F.getContext(), DEBUG_TYPE, F, DebugLoc(),
+ Twine("Applied cold hint to globally cold function '" + F.getName() +
+ "' with " + Twine(std::to_string(SamplesPercent)) +
+ "% of samples (threshold: " +
+ Twine(std::to_string(SampleProfileGlobalColdThreshold)) + "%)"));
+ return true;
+ }
+
+ return false;
+}
+
/// \brief Iteratively inline hot callsites of a function.
///
/// Iteratively traverse all callsites of the function \p F, and find if
DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
<< ": " << getFunctionLoc(F) << "\n");
+ Changed |= emitInlineHints(F);
+
Changed |= inlineHotFunctions(F);
// Compute basic block weights.
if (!ProfileIsValid)
return false;
+ // Compute the total number of samples collected in this profile.
+ for (const auto &I : Reader->getProfiles())
+ TotalCollectedSamples += I.second.getTotalSamples();
+
bool retval = false;
for (auto &F : M)
if (!F.isDeclaration()) {
--- /dev/null
+; RUN: opt %s -sample-profile -sample-profile-file=%S/Inputs/inline-hint.prof -pass-remarks=sample-profile -o /dev/null 2>&1 | FileCheck %s
+;
+; CHECK: Applied cold hint to globally cold function '_Z7cold_fnRxi' with 0.1
+define void @_Z7cold_fnRxi() !dbg !4 {
+entry:
+ ret void, !dbg !29
+}
+
+; CHECK: Applied inline hint to globally hot function '_Z6hot_fnRxi' with 70.0
+define void @_Z6hot_fnRxi() #0 !dbg !10 {
+entry:
+ ret void, !dbg !38
+}
+
+!llvm.module.flags = !{!17, !18}
+!llvm.ident = !{!19}
+
+!1 = !DIFile(filename: "inline-hint.cc", directory: ".")
+!2 = !{}
+!3 = !{!4, !10, !11, !14}
+!4 = distinct !DISubprogram(name: "cold_fn", linkageName: "_Z7cold_fnRxi", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7, !9}
+!7 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !8, size: 64, align: 64)
+!8 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = distinct !DISubprogram(name: "hot_fn", linkageName: "_Z6hot_fnRxi", scope: !1, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!11 = distinct !DISubprogram(name: "compute", linkageName: "_Z7computex", scope: !1, file: !1, line: 11, type: !12, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!12 = !DISubroutineType(types: !13)
+!13 = !{!8, !8}
+!14 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 21, type: !15, isLocal: false, isDefinition: true, scopeLine: 21, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!15 = !DISubroutineType(types: !16)
+!16 = !{!9}
+!17 = !{i32 2, !"Dwarf Version", i32 4}
+!18 = !{i32 2, !"Debug Info Version", i32 3}
+!19 = !{!"clang version 3.8.0 (trunk 254067) (llvm/trunk 254079)"}
+!29 = !DILocation(line: 5, column: 1, scope: !4)
+!38 = !DILocation(line: 9, column: 1, scope: !10)