From: Chandler Carruth Date: Fri, 8 Jan 2016 10:55:52 +0000 (+0000) Subject: [attrs] Split the late-revisit pattern for deducing norecurse in X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=e96fb9ab15d4c9c20f2427190274339f38846945;p=oota-llvm.git [attrs] Split the late-revisit pattern for deducing norecurse in a top-down manner into a true top-down or RPO pass over the call graph. There are specific patterns of function attributes, notably the norecurse attribute, which are most effectively propagated top-down because all they us caller information. Walk in RPO over the call graph SCCs takes the form of a module pass run immediately after the CGSCC pass managers postorder walk of the SCCs, trying again to deduce norerucrse for each singular SCC in the call graph. This removes a very legacy pass manager specific trick of using a lazy revisit list traversed during finalization of the CGSCC pass. There is no analogous finalization step in the new pass manager, and a lazy revisit list is just trying to produce an RPO iteration of the call graph. We can do that more directly if more expensively. It seems unlikely that this will be the expensive part of any compilation though as we never examine the function bodies here. Even in an LTO run over a very large module, this should be a reasonable fast set of operations over a reasonably small working set -- the function call graph itself. In the future, if this really is a compile time performance issue, we can look at building support for both post order and RPO traversals directly into a pass manager that builds and maintains the PO list of SCCs. Differential Revision: http://reviews.llvm.org/D15785 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257163 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index cb2b1394e92..90fbc1d891b 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -132,7 +132,6 @@ void initializeEarlyCSELegacyPassPass(PassRegistry &); void initializeEliminateAvailableExternallyPass(PassRegistry&); void initializeExpandISelPseudosPass(PassRegistry&); void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&); -void initializeFunctionAttrsPass(PassRegistry&); void initializeGCMachineCodeAnalysisPass(PassRegistry&); void initializeGCModuleInfoPass(PassRegistry&); void initializeGVNPass(PassRegistry&); @@ -227,6 +226,7 @@ void initializePostDomOnlyViewerPass(PassRegistry&); void initializePostDomPrinterPass(PassRegistry&); void initializePostDomViewerPass(PassRegistry&); void initializePostDominatorTreePass(PassRegistry&); +void initializePostOrderFunctionAttrsPass(PassRegistry&); void initializePostRASchedulerPass(PassRegistry&); void initializePostMachineSchedulerPass(PassRegistry&); void initializePrintFunctionPassWrapperPass(PassRegistry&); @@ -242,6 +242,7 @@ void initializeRegionOnlyPrinterPass(PassRegistry&); void initializeRegionOnlyViewerPass(PassRegistry&); void initializeRegionPrinterPass(PassRegistry&); void initializeRegionViewerPass(PassRegistry&); +void initializeReversePostOrderFunctionAttrsPass(PassRegistry&); void initializeRewriteStatepointsForGCPass(PassRegistry&); void initializeSafeStackPass(PassRegistry&); void initializeSCCPPass(PassRegistry&); diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index 29fcd93a2a1..d695d11a636 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -157,7 +157,8 @@ namespace { (void) llvm::createPostDomTree(); (void) llvm::createInstructionNamerPass(); (void) llvm::createMetaRenamerPass(); - (void) llvm::createFunctionAttrsPass(); + (void) llvm::createPostOrderFunctionAttrsPass(); + (void) llvm::createReversePostOrderFunctionAttrsPass(); (void) llvm::createMergeFunctionsPass(); (void) llvm::createPrintModulePass(*(llvm::raw_ostream*)nullptr); (void) llvm::createPrintFunctionPass(*(llvm::raw_ostream*)nullptr); diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h index 0c374a070ce..78d2fadc519 100644 --- a/include/llvm/Transforms/IPO.h +++ b/include/llvm/Transforms/IPO.h @@ -183,12 +183,20 @@ ModulePass *createBlockExtractorPass(); ModulePass *createStripDeadPrototypesPass(); //===----------------------------------------------------------------------===// -/// createFunctionAttrsPass - This pass discovers functions that do not access -/// memory, or only read memory, and gives them the readnone/readonly attribute. -/// It also discovers function arguments that are not captured by the function -/// and marks them with the nocapture attribute. +/// createPostOrderFunctionAttrsPass - This pass walks SCCs of the call graph +/// in post-order to deduce and propagate function attributes. It can discover +/// functions that do not access memory, or only read memory, and give them the +/// readnone/readonly attribute. It also discovers function arguments that are +/// not captured by the function and marks them with the nocapture attribute. /// -Pass *createFunctionAttrsPass(); +Pass *createPostOrderFunctionAttrsPass(); + +//===----------------------------------------------------------------------===// +/// createReversePostOrderFunctionAttrsPass - This pass walks SCCs of the call +/// graph in RPO to deduce and propagate function attributes. Currently it +/// only handles synthesizing norecurse attributes. +/// +Pass *createReversePostOrderFunctionAttrsPass(); //===----------------------------------------------------------------------===// /// createMergeFunctionsPass - This pass discovers identical functions and diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 6baaaa4b139..66df23bab1b 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -92,7 +92,8 @@ void LTOCodeGenerator::initializeLTOPasses() { initializeSROALegacyPassPass(R); initializeSROA_DTPass(R); initializeSROA_SSAUpPass(R); - initializeFunctionAttrsPass(R); + initializePostOrderFunctionAttrsPass(R); + initializeReversePostOrderFunctionAttrsPass(R); initializeGlobalsAAWrapperPassPass(R); initializeLICMPass(R); initializeMergedLoadStoreMotionPass(R); diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 6dcfb3f8300..527fdd1885a 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -6,16 +6,11 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file implements a simple interprocedural pass which walks the -// call-graph, looking for functions which do not access or only read -// non-local memory, and marking them readnone/readonly. It does the -// same with function arguments independently, marking them readonly/ -// readnone/nocapture. Finally, well-known library call declarations -// are marked with all attributes that are consistent with the -// function's standard definition. This pass is implemented as a -// bottom-up traversal of the call-graph. -// +/// +/// \file +/// This file implements interprocedural passes which walk the +/// call-graph deducing and/or propagating function attributes. +/// //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO.h" @@ -57,19 +52,14 @@ typedef SmallSetVector SCCNodeSet; } namespace { -struct FunctionAttrs : public CallGraphSCCPass { +struct PostOrderFunctionAttrs : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - FunctionAttrs() : CallGraphSCCPass(ID) { - initializeFunctionAttrsPass(*PassRegistry::getPassRegistry()); + PostOrderFunctionAttrs() : CallGraphSCCPass(ID) { + initializePostOrderFunctionAttrsPass(*PassRegistry::getPassRegistry()); } bool runOnSCC(CallGraphSCC &SCC) override; - bool doInitialization(CallGraph &CG) override { - Revisit.clear(); - return false; - } - bool doFinalization(CallGraph &CG) override; - + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); @@ -79,20 +69,19 @@ struct FunctionAttrs : public CallGraphSCCPass { private: TargetLibraryInfo *TLI; - SmallVector Revisit; }; } -char FunctionAttrs::ID = 0; -INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs", +char PostOrderFunctionAttrs::ID = 0; +INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrs, "functionattrs", "Deduce function attributes", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(FunctionAttrs, "functionattrs", +INITIALIZE_PASS_END(PostOrderFunctionAttrs, "functionattrs", "Deduce function attributes", false, false) -Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); } +Pass *llvm::createPostOrderFunctionAttrsPass() { return new PostOrderFunctionAttrs(); } namespace { /// The three kinds of memory access relevant to 'readonly' and @@ -949,8 +938,7 @@ static bool setDoesNotRecurse(Function &F) { return true; } -static bool addNoRecurseAttrs(const CallGraphSCC &SCC, - SmallVectorImpl &Revisit) { +static bool addNoRecurseAttrs(const CallGraphSCC &SCC) { // Try and identify functions that do not recurse. // If the SCC contains multiple nodes we know for sure there is recursion. @@ -973,32 +961,11 @@ static bool addNoRecurseAttrs(const CallGraphSCC &SCC, // Function calls a potentially recursive function. return setDoesNotRecurse(*F); - // We know that F is not obviously recursive, but we haven't been able to - // prove that it doesn't actually recurse. Add it to the Revisit list to try - // again top-down later. - Revisit.push_back(F); + // Nothing else we can deduce usefully during the postorder traversal. return false; } -static bool addNoRecurseAttrsTopDownOnly(Function *F) { - // If F is internal and all uses are in norecurse functions, then F is also - // norecurse. - if (F->doesNotRecurse()) - return false; - if (F->hasInternalLinkage()) { - for (auto *U : F->users()) - if (auto *I = dyn_cast(U)) { - if (!I->getParent()->getParent()->doesNotRecurse()) - return false; - } else { - return false; - } - return setDoesNotRecurse(*F); - } - return false; -} - -bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) { +bool PostOrderFunctionAttrs::runOnSCC(CallGraphSCC &SCC) { TLI = &getAnalysis().getTLI(); bool Changed = false; @@ -1040,19 +1007,100 @@ bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) { Changed |= addNoAliasAttrs(SCCNodes); Changed |= addNonNullAttrs(SCCNodes, *TLI); } - - Changed |= addNoRecurseAttrs(SCC, Revisit); + + Changed |= addNoRecurseAttrs(SCC); return Changed; } -bool FunctionAttrs::doFinalization(CallGraph &CG) { +namespace { +/// A pass to do RPO deduction and propagation of function attributes. +/// +/// This pass provides a general RPO or "top down" propagation of +/// function attributes. For a few (rare) cases, we can deduce significantly +/// more about function attributes by working in RPO, so this pass +/// provides the compliment to the post-order pass above where the majority of +/// deduction is performed. +// FIXME: Currently there is no RPO CGSCC pass structure to slide into and so +// this is a boring module pass, but eventually it should be an RPO CGSCC pass +// when such infrastructure is available. +struct ReversePostOrderFunctionAttrs : public ModulePass { + static char ID; // Pass identification, replacement for typeid + ReversePostOrderFunctionAttrs() : ModulePass(ID) { + initializeReversePostOrderFunctionAttrsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + } +}; +} + +char ReversePostOrderFunctionAttrs::ID = 0; +INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrs, "rpo-functionattrs", + "Deduce function attributes in RPO", false, false) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_END(ReversePostOrderFunctionAttrs, "rpo-functionattrs", + "Deduce function attributes in RPO", false, false) + +Pass *llvm::createReversePostOrderFunctionAttrsPass() { + return new ReversePostOrderFunctionAttrs(); +} + +static bool addNoRecurseAttrsTopDown(Function &F) { + // We check the preconditions for the function prior to calling this to avoid + // the cost of building up a reversible post-order list. We assert them here + // to make sure none of the invariants this relies on were violated. + assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!"); + assert(!F.doesNotRecurse() && + "This function has already been deduced as norecurs!"); + assert(F.hasInternalLinkage() && + "Can only do top-down deduction for internal linkage functions!"); + + // If F is internal and all of its uses are calls from a non-recursive + // functions, then none of its calls could in fact recurse without going + // through a function marked norecurse, and so we can mark this function too + // as norecurse. Note that the uses must actually be calls -- otherwise + // a pointer to this function could be returned from a norecurse function but + // this function could be recursively (indirectly) called. Note that this + // also detects if F is directly recursive as F is not yet marked as + // a norecurse function. + for (auto *U : F.users()) { + auto *I = dyn_cast(U); + if (!I) + return false; + CallSite CS(I); + if (!CS || !CS.getParent()->getParent()->doesNotRecurse()) + return false; + } + return setDoesNotRecurse(F); +} + +bool ReversePostOrderFunctionAttrs::runOnModule(Module &M) { + // We only have a post-order SCC traversal (because SCCs are inherently + // discovered in post-order), so we accumulate them in a vector and then walk + // it in reverse. This is simpler than using the RPO iterator infrastructure + // because we need to combine SCC detection and the PO walk of the call + // graph. We can also cheat egregiously because we're primarily interested in + // synthesizing norecurse and so we can only save the singular SCCs as SCCs + // with multiple functions in them will clearly be recursive. + auto &CG = getAnalysis().getCallGraph(); + SmallVector Worklist; + for (scc_iterator I = scc_begin(&CG); !I.isAtEnd(); ++I) { + if (I->size() != 1) + continue; + + Function *F = I->front()->getFunction(); + if (F && !F->isDeclaration() && !F->doesNotRecurse() && + F->hasInternalLinkage()) + Worklist.push_back(F); + } + bool Changed = false; - // When iterating over SCCs we visit functions in a bottom-up fashion. Some of - // the rules we have for identifying norecurse functions work best with a - // top-down walk, so look again at all the functions we previously marked as - // worth revisiting, in top-down order. - for (auto &F : reverse(Revisit)) - if (F) - Changed |= addNoRecurseAttrsTopDownOnly(cast((Value*)F)); + for (auto *F : reverse(Worklist)) + Changed |= addNoRecurseAttrsTopDown(*F); + return Changed; } diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index 7ea6c08b2e6..89629cf06e0 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -28,7 +28,6 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeDAEPass(Registry); initializeDAHPass(Registry); initializeForceFunctionAttrsLegacyPassPass(Registry); - initializeFunctionAttrsPass(Registry); initializeGlobalDCEPass(Registry); initializeGlobalOptPass(Registry); initializeIPCPPass(Registry); @@ -42,6 +41,8 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeLowerBitSetsPass(Registry); initializeMergeFunctionsPass(Registry); initializePartialInlinerPass(Registry); + initializePostOrderFunctionAttrsPass(Registry); + initializeReversePostOrderFunctionAttrsPass(Registry); initializePruneEHPass(Registry); initializeStripDeadPrototypesLegacyPassPass(Registry); initializeStripSymbolsPass(Registry); @@ -71,7 +72,7 @@ void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) { } void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createFunctionAttrsPass()); + unwrap(PM)->add(createPostOrderFunctionAttrsPass()); } void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) { diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 9876efa7b23..faada9c2a7d 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -251,7 +251,7 @@ void PassManagerBuilder::populateModulePassManager( Inliner = nullptr; } if (!DisableUnitAtATime) - MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs + MPM.add(createPostOrderFunctionAttrsPass()); if (OptLevel > 2) MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args @@ -346,6 +346,9 @@ void PassManagerBuilder::populateModulePassManager( // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); + if (!DisableUnitAtATime) + MPM.add(createReversePostOrderFunctionAttrsPass()); + if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) { // Remove avail extern fns and globals definitions if we aren't // compiling an object file for later LTO. For LTO we want to preserve @@ -502,7 +505,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createIPSCCPPass()); // Now that we internalized some globals, see if we can hack on them! - PM.add(createFunctionAttrsPass()); // Add norecurse if possible. + PM.add(createPostOrderFunctionAttrsPass()); + PM.add(createReversePostOrderFunctionAttrsPass()); PM.add(createGlobalOptimizerPass()); // Promote any localized global vars. PM.add(createPromoteMemoryToRegisterPass()); @@ -551,7 +555,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { PM.add(createScalarReplAggregatesPass()); // Run a few AA driven optimizations here and now, to cleanup the code. - PM.add(createFunctionAttrsPass()); // Add nocapture. + PM.add(createPostOrderFunctionAttrsPass()); // Add nocapture. PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. PM.add(createLICMPass()); // Hoist loop invariants. diff --git a/test/Transforms/FunctionAttrs/norecurse.ll b/test/Transforms/FunctionAttrs/norecurse.ll index 47481191d27..d5a2d820840 100644 --- a/test/Transforms/FunctionAttrs/norecurse.ll +++ b/test/Transforms/FunctionAttrs/norecurse.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -functionattrs -S | FileCheck %s +; RUN: opt < %s -basicaa -functionattrs -rpo-functionattrs -S | FileCheck %s ; CHECK: define i32 @leaf() #0 define i32 @leaf() {