//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DataStructure.h"
-#include "llvm/Analysis/DSGraph.h"
#include "llvm/Module.h"
#include "llvm/DerivedTypes.h"
+#include "Support/Debug.h"
#include "Support/Statistic.h"
+#include "DSCallSiteIterator.h"
namespace {
RegisterAnalysis<TDDataStructures> // Register the pass
- Y("tddatastructure", "Top-down Data Structure Analysis Closure");
+ Y("tddatastructure", "Top-down Data Structure Analysis");
+
+ Statistic<> NumTDInlines("tddatastructures", "Number of graphs inlined");
+}
+
+/// FunctionHasCompleteArguments - This function returns true if it is safe not
+/// to mark arguments to the function complete.
+///
+/// FIXME: Need to check if all callers have been found, or rather if a
+/// funcpointer escapes!
+///
+static bool FunctionHasCompleteArguments(Function &F) {
+ return F.hasInternalLinkage();
}
// run - Calculate the top down data structure graphs for each function in the
//
bool TDDataStructures::run(Module &M) {
BUDataStructures &BU = getAnalysis<BUDataStructures>();
- GlobalsGraph = new DSGraph();
+ GlobalsGraph = new DSGraph(BU.getGlobalsGraph());
+
+ // Figure out which functions must not mark their arguments complete because
+ // they are accessible outside this compilation unit.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!FunctionHasCompleteArguments(*I))
+ ArgsRemainIncomplete.insert(I);
+
+ // We want to traverse the call graph in reverse post-order. To do this, we
+ // calculate a post-order traversal, then reverse it.
+ hash_set<DSGraph*> VisitedGraph;
+ std::vector<DSGraph*> PostOrder;
+ const BUDataStructures::ActualCalleesTy &ActualCallees =
+ getAnalysis<BUDataStructures>().getActualCallees();
// Calculate top-down from main...
if (Function *F = M.getMainFunction())
- calculateGraph(*F);
+ ComputePostOrder(*F, VisitedGraph, PostOrder, ActualCallees);
+
+ // Next calculate the graphs for each unreachable function...
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ ComputePostOrder(*I, VisitedGraph, PostOrder, ActualCallees);
- // Next calculate the graphs for each function unreachable function...
- for (Module::reverse_iterator I = M.rbegin(), E = M.rend(); I != E; ++I)
- if (!I->isExternal())
- calculateGraph(*I);
+ VisitedGraph.clear(); // Release memory!
+
+ // Visit each of the graphs in reverse post-order now!
+ while (!PostOrder.empty()) {
+ inlineGraphIntoCallees(*PostOrder.back());
+ PostOrder.pop_back();
+ }
- GraphDone.clear(); // Free temporary memory...
+ ArgsRemainIncomplete.clear();
return false;
}
+
+DSGraph &TDDataStructures::getOrCreateDSGraph(Function &F) {
+ DSGraph *&G = DSInfo[&F];
+ if (G == 0) { // Not created yet? Clone BU graph...
+ G = new DSGraph(getAnalysis<BUDataStructures>().getDSGraph(F));
+ G->getAuxFunctionCalls().clear();
+ G->setPrintAuxCalls();
+ G->setGlobalsGraph(GlobalsGraph);
+ }
+ return *G;
+}
+
+
+void TDDataStructures::ComputePostOrder(Function &F,hash_set<DSGraph*> &Visited,
+ std::vector<DSGraph*> &PostOrder,
+ const BUDataStructures::ActualCalleesTy &ActualCallees) {
+ if (F.isExternal()) return;
+ DSGraph &G = getOrCreateDSGraph(F);
+ if (Visited.count(&G)) return;
+ Visited.insert(&G);
+
+ // Recursively traverse all of the callee graphs.
+ const std::vector<DSCallSite> &FunctionCalls = G.getFunctionCalls();
+
+ for (unsigned i = 0, e = FunctionCalls.size(); i != e; ++i) {
+ Instruction *CallI = FunctionCalls[i].getCallSite().getInstruction();
+ std::pair<BUDataStructures::ActualCalleesTy::const_iterator,
+ BUDataStructures::ActualCalleesTy::const_iterator>
+ IP = ActualCallees.equal_range(CallI);
+
+ for (BUDataStructures::ActualCalleesTy::const_iterator I = IP.first;
+ I != IP.second; ++I)
+ ComputePostOrder(*I->second, Visited, PostOrder, ActualCallees);
+ }
+
+ PostOrder.push_back(&G);
+}
+
+
+
+
+
// releaseMemory - If the pass pipeline is done with this pass, we can release
// our memory... here...
//
// has no way to extend the lifetime of the pass, which screws up ds-aa.
//
void TDDataStructures::releaseMyMemory() {
- for (hash_map<const Function*, DSGraph*>::iterator I = DSInfo.begin(),
- E = DSInfo.end(); I != E; ++I)
- delete I->second;
+ for (hash_map<Function*, DSGraph*>::iterator I = DSInfo.begin(),
+ E = DSInfo.end(); I != E; ++I) {
+ I->second->getReturnNodes().erase(I->first);
+ if (I->second->getReturnNodes().empty())
+ delete I->second;
+ }
// Empty map so next time memory is released, data structures are not
// re-deleted.
GlobalsGraph = 0;
}
-/// ResolveCallSite - This method is used to link the actual arguments together
-/// with the formal arguments for a function call in the top-down closure. This
-/// method assumes that the call site arguments have been mapped into nodes
-/// local to the specified graph.
-///
-void TDDataStructures::ResolveCallSite(DSGraph &Graph,
- const DSCallSite &CallSite) {
- // Resolve all of the function formal arguments...
- Function &F = Graph.getFunction();
- Function::aiterator AI = F.abegin();
-
- for (unsigned i = 0, e = CallSite.getNumPtrArgs(); i != e; ++i, ++AI) {
- // Advance the argument iterator to the first pointer argument...
- while (!DS::isPointerType(AI->getType())) ++AI;
-
- // TD ...Merge the formal arg scalar with the actual arg node
- DSNodeHandle &NodeForFormal = Graph.getNodeForValue(AI);
- assert(NodeForFormal.getNode() && "Pointer argument has no dest node!");
- NodeForFormal.mergeWith(CallSite.getPtrArg(i));
- }
-
- // Merge returned node in the caller with the "return" node in callee
- if (CallSite.getRetVal().getNode() && Graph.getRetNode().getNode())
- Graph.getRetNode().mergeWith(CallSite.getRetVal());
-}
+void TDDataStructures::inlineGraphIntoCallees(DSGraph &Graph) {
+ // Recompute the Incomplete markers and eliminate unreachable nodes.
+ Graph.removeTriviallyDeadNodes();
+ Graph.maskIncompleteMarkers();
-DSGraph &TDDataStructures::getOrCreateDSGraph(Function &F) {
- DSGraph *&G = DSInfo[&F];
- if (G == 0) { // Not created yet? Clone BU graph...
- G = new DSGraph(getAnalysis<BUDataStructures>().getDSGraph(F));
- G->getAuxFunctionCalls().clear();
- G->setPrintAuxCalls();
- G->setGlobalsGraph(GlobalsGraph);
+ // If any of the functions has incomplete incoming arguments, don't mark any
+ // of them as complete.
+ bool HasIncompleteArgs = false;
+ const DSGraph::ReturnNodesTy &GraphReturnNodes = Graph.getReturnNodes();
+ for (DSGraph::ReturnNodesTy::const_iterator I = GraphReturnNodes.begin(),
+ E = GraphReturnNodes.end(); I != E; ++I)
+ if (ArgsRemainIncomplete.count(I->first)) {
+ HasIncompleteArgs = true;
+ break;
+ }
+
+ // Now fold in the necessary globals from the GlobalsGraph. A global G
+ // must be folded in if it exists in the current graph (i.e., is not dead)
+ // and it was not inlined from any of my callers. If it was inlined from
+ // a caller, it would have been fully consistent with the GlobalsGraph
+ // in the caller so folding in is not necessary. Otherwise, this node came
+ // solely from this function's BU graph and so has to be made consistent.
+ //
+ Graph.updateFromGlobalGraph();
+
+ // Recompute the Incomplete markers. Depends on whether args are complete
+ unsigned Flags
+ = HasIncompleteArgs ? DSGraph::MarkFormalArgs : DSGraph::IgnoreFormalArgs;
+ Graph.markIncompleteNodes(Flags | DSGraph::IgnoreGlobals);
+
+ // Delete dead nodes. Treat globals that are unreachable as dead also.
+ Graph.removeDeadNodes(DSGraph::RemoveUnreachableGlobals);
+
+ // We are done with computing the current TD Graph! Now move on to
+ // inlining the current graph into the graphs for its callees, if any.
+ //
+ const std::vector<DSCallSite> &FunctionCalls = Graph.getFunctionCalls();
+ if (FunctionCalls.empty()) {
+ DEBUG(std::cerr << " [TD] No callees for: " << Graph.getFunctionNames()
+ << "\n");
+ return;
}
- return *G;
-}
-void TDDataStructures::calculateGraph(Function &F) {
- // Make sure this graph has not already been calculated, and that we don't get
- // into an infinite loop with mutually recursive functions.
+ // Now that we have information about all of the callees, propagate the
+ // current graph into the callees. Clone only the reachable subgraph at
+ // each call-site, not the entire graph (even though the entire graph
+ // would be cloned only once, this should still be better on average).
//
- if (GraphDone.count(&F)) return;
- GraphDone.insert(&F);
+ DEBUG(std::cerr << " [TD] Inlining '" << Graph.getFunctionNames() <<"' into "
+ << FunctionCalls.size() << " call nodes.\n");
- // Get the current functions graph...
- DSGraph &Graph = getOrCreateDSGraph(F);
+ const BUDataStructures::ActualCalleesTy &ActualCallees =
+ getAnalysis<BUDataStructures>().getActualCallees();
- // Recompute the Incomplete markers and eliminate unreachable nodes.
- Graph.maskIncompleteMarkers();
- // FIXME: Need to check if all callers have been found, or rather if a
- // funcpointer escapes!
- unsigned Flags = F.hasInternalLinkage() ?
- DSGraph::IgnoreFormalArgs : DSGraph::MarkFormalArgs;
- Graph.markIncompleteNodes(Flags | DSGraph::IgnoreGlobals);
- Graph.removeDeadNodes(DSGraph::RemoveUnreachableGlobals);
+ // Loop over all the call sites and all the callees at each call site.
+ // Clone and merge the reachable subgraph from the call into callee's graph.
+ //
+ for (unsigned i = 0, e = FunctionCalls.size(); i != e; ++i) {
+ Instruction *CallI = FunctionCalls[i].getCallSite().getInstruction();
+ // For each function in the invoked function list at this call site...
+ std::pair<BUDataStructures::ActualCalleesTy::const_iterator,
+ BUDataStructures::ActualCalleesTy::const_iterator>
+ IP = ActualCallees.equal_range(CallI);
- const std::vector<DSCallSite> &CallSites = Graph.getFunctionCalls();
- if (CallSites.empty()) {
- DEBUG(std::cerr << " [TD] No callees for: " << F.getName() << "\n");
- } else {
- // Loop over all of the call sites, building a multi-map from Callees to
- // DSCallSite*'s. With this map we can then loop over each callee, cloning
- // this graph once into it, then resolving arguments.
- //
- std::multimap<Function*, const DSCallSite*> CalleeSites;
- for (unsigned i = 0, e = CallSites.size(); i != e; ++i) {
- const DSCallSite &CS = CallSites[i];
- if (CS.isDirectCall()) {
- if (!CS.getCalleeFunc()->isExternal()) // If it's not external
- CalleeSites.insert(std::make_pair(CS.getCalleeFunc(), &CS));// Keep it
- } else {
- const std::vector<GlobalValue*> &Callees =
- CS.getCalleeNode()->getGlobals();
-
- // Loop over all of the functions that this call may invoke...
- for (unsigned c = 0, e = Callees.size(); c != e; ++c)
- if (Function *F = dyn_cast<Function>(Callees[c]))// If this is a fn...
- if (!F->isExternal()) // If it's not extern
- CalleeSites.insert(std::make_pair(F, &CS)); // Keep track of it!
- }
- }
+ // Multiple callees may have the same graph, so try to inline and merge
+ // only once for each <callSite,calleeGraph> pair, not once for each
+ // <callSite,calleeFunction> pair; the latter will be correct but slower.
+ hash_set<DSGraph*> GraphsSeen;
- // Now that we have information about all of the callees, propagate the
- // current graph into the callees.
- //
- DEBUG(std::cerr << " [TD] Inlining '" << F.getName() << "' into "
- << CalleeSites.size() << " callees.\n");
-
- // Loop over all the callees...
- for (std::multimap<Function*, const DSCallSite*>::iterator
- I = CalleeSites.begin(), E = CalleeSites.end(); I != E; )
- if (I->first == &F) { // Bottom-up pass takes care of self loops!
- ++I;
- } else {
- // For each callee...
- Function *Callee = I->first;
- DSGraph &CG = getOrCreateDSGraph(*Callee); // Get the callee's graph...
-
- DEBUG(std::cerr << "\t [TD] Inlining into callee '" << Callee->getName()
- << "'\n");
-
- // Clone our current graph into the callee...
- hash_map<Value*, DSNodeHandle> OldValMap;
- hash_map<const DSNode*, DSNodeHandle> OldNodeMap;
- CG.cloneInto(Graph, OldValMap, OldNodeMap,
- DSGraph::StripModRefBits |
- DSGraph::KeepAllocaBit | DSGraph::DontCloneCallNodes |
- DSGraph::DontCloneAuxCallNodes);
- OldValMap.clear(); // We don't care about the ValMap
-
- // Loop over all of the invocation sites of the callee, resolving
- // arguments to our graph. This loop may iterate multiple times if the
- // current function calls this callee multiple times with different
- // signatures.
- //
- for (; I != E && I->first == Callee; ++I) {
- // Map call site into callee graph
- DSCallSite NewCS(*I->second, OldNodeMap);
-
- // Resolve the return values...
- NewCS.getRetVal().mergeWith(CG.getRetNode());
-
- // Resolve all of the arguments...
- Function::aiterator AI = Callee->abegin();
- for (unsigned i = 0, e = NewCS.getNumPtrArgs();
- i != e && AI != Callee->aend(); ++i, ++AI) {
- // Advance the argument iterator to the first pointer argument...
- while (AI != Callee->aend() && !DS::isPointerType(AI->getType()))
- ++AI;
- if (AI == Callee->aend()) break;
-
- // Add the link from the argument scalar to the provided value
- DSNodeHandle &NH = CG.getNodeForValue(AI);
- assert(NH.getNode() && "Pointer argument without scalarmap entry?");
- NH.mergeWith(NewCS.getPtrArg(i));
- }
+ // Loop over each actual callee at this call site
+ for (BUDataStructures::ActualCalleesTy::const_iterator I = IP.first;
+ I != IP.second; ++I) {
+ DSGraph& CalleeGraph = getDSGraph(*I->second);
+ assert(&CalleeGraph != &Graph && "TD need not inline graph into self!");
+
+ // if this callee graph is already done at this site, skip this callee
+ if (GraphsSeen.find(&CalleeGraph) != GraphsSeen.end())
+ continue;
+ GraphsSeen.insert(&CalleeGraph);
+
+ // Get the root nodes for cloning the reachable subgraph into each callee:
+ // -- all global nodes that appear in both the caller and the callee
+ // -- return value at this call site, if any
+ // -- actual arguments passed at this call site
+ // -- callee node at this call site, if this is an indirect call (this may
+ // not be needed for merging, but allows us to create CS and therefore
+ // simplify the merging below).
+ hash_set<const DSNode*> RootNodeSet;
+ for (DSGraph::ScalarMapTy::const_iterator
+ SI = CalleeGraph.getScalarMap().begin(),
+ SE = CalleeGraph.getScalarMap().end(); SI != SE; ++SI)
+ if (GlobalValue* GV = dyn_cast<GlobalValue>(SI->first)) {
+ DSGraph::ScalarMapTy::const_iterator GI=Graph.getScalarMap().find(GV);
+ if (GI != Graph.getScalarMap().end())
+ RootNodeSet.insert(GI->second.getNode());
}
- // Done with the nodemap...
- OldNodeMap.clear();
-
- // Recompute the Incomplete markers and eliminate unreachable nodes.
- CG.removeTriviallyDeadNodes();
- CG.maskIncompleteMarkers();
- CG.markIncompleteNodes(DSGraph::MarkFormalArgs |DSGraph::IgnoreGlobals);
- CG.removeDeadNodes(DSGraph::RemoveUnreachableGlobals);
- }
-
- DEBUG(std::cerr << " [TD] Done inlining into callees for: " << F.getName()
- << " [" << Graph.getGraphSize() << "+"
- << Graph.getFunctionCalls().size() << "]\n");
-
- // Loop over all the callees... making sure they are all resolved now...
- Function *LastFunc = 0;
- for (std::multimap<Function*, const DSCallSite*>::iterator
- I = CalleeSites.begin(), E = CalleeSites.end(); I != E; ++I)
- if (I->first != LastFunc) { // Only visit each callee once...
- LastFunc = I->first;
- calculateGraph(*I->first);
- }
+ if (const DSNode* RetNode = FunctionCalls[i].getRetVal().getNode())
+ RootNodeSet.insert(RetNode);
+
+ for (unsigned j=0, N=FunctionCalls[i].getNumPtrArgs(); j < N; ++j)
+ if (const DSNode* ArgTarget = FunctionCalls[i].getPtrArg(j).getNode())
+ RootNodeSet.insert(ArgTarget);
+
+ if (FunctionCalls[i].isIndirectCall())
+ RootNodeSet.insert(FunctionCalls[i].getCalleeNode());
+
+ DEBUG(std::cerr << " [TD] Resolving arguments for callee graph '"
+ << CalleeGraph.getFunctionNames()
+ << "': " << I->second->getFunctionType()->getNumParams()
+ << " args\n at call site (DSCallSite*) 0x"
+ << &FunctionCalls[i] << "\n");
+
+ DSGraph::NodeMapTy NodeMapInCallee; // map from nodes to clones in callee
+ DSGraph::NodeMapTy CompletedMap; // unused map for nodes not to do
+ CalleeGraph.cloneReachableSubgraph(Graph, RootNodeSet,
+ NodeMapInCallee, CompletedMap,
+ DSGraph::StripModRefBits |
+ DSGraph::KeepAllocaBit);
+
+ // Transform our call site info into the cloned version for CalleeGraph
+ DSCallSite CS(FunctionCalls[i], NodeMapInCallee);
+
+ // Get the formal argument and return nodes for the called function
+ // and merge them with the cloned subgraph. Global nodes were merged
+ // already by cloneReachableSubgraph() above.
+ CalleeGraph.getCallSiteForArguments(*I->second).mergeWith(CS);
+
+ ++NumTDInlines;
+ }
}
+
+ DEBUG(std::cerr << " [TD] Done inlining into callees for: "
+ << Graph.getFunctionNames() << " [" << Graph.getGraphSize() << "+"
+ << Graph.getFunctionCalls().size() << "]\n");
}