// Code generation pass that transforms code to identify where Cilk keywords
// should be inserted. This relies on `llvm-dis -c' to print out the keywords.
//----------------------------------------------------------------------------
-
-
-class Cilkifier: public InstVisitor<Cilkifier>
-{
+class Cilkifier: public InstVisitor<Cilkifier> {
Function* DummySyncFunc;
// Data used when transforming each function.
};
-Cilkifier::Cilkifier(Module& M)
-{
+Cilkifier::Cilkifier(Module& M) {
// create the dummy Sync function and add it to the Module
DummySyncFunc = M.getOrInsertFunction(DummySyncFuncName, Type::VoidTy, 0);
}
void Cilkifier::TransformFunc(Function* F,
const hash_set<Function*>& _cilkFunctions,
- PgmDependenceGraph& _depGraph)
-{
+ PgmDependenceGraph& _depGraph) {
// Memoize the information for this function
cilkFunctions = &_cilkFunctions;
depGraph = &_depGraph;
stmtsVisited.insert(I);
// If there is a dependence from root to I, insert Sync and return
- if (depsOfRoot.find(I) != depsOfRoot.end())
- { // Insert a sync before I and stop searching along this path.
- // If I is a Phi instruction, the dependence can only be an SSA dep.
- // and we need to insert the sync in the predecessor on the appropriate
- // incoming edge!
- CallInst* syncI = 0;
- if (PHINode* phiI = dyn_cast<PHINode>(I))
- { // check all operands of the Phi and insert before each one
- for (unsigned i = 0, N = phiI->getNumIncomingValues(); i < N; ++i)
- if (phiI->getIncomingValue(i) == root)
- syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "",
- phiI->getIncomingBlock(i)->getTerminator());
- }
- else
- syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "", I);
-
- // Remember the sync for each spawn to eliminate redundant ones later
- spawnToSyncsMap[cast<CallInst>(root)].insert(syncI);
+ if (depsOfRoot.find(I) != depsOfRoot.end()) {
+ // Insert a sync before I and stop searching along this path.
+ // If I is a Phi instruction, the dependence can only be an SSA dep.
+ // and we need to insert the sync in the predecessor on the appropriate
+ // incoming edge!
+ CallInst* syncI = 0;
+ if (PHINode* phiI = dyn_cast<PHINode>(I)) {
+ // check all operands of the Phi and insert before each one
+ for (unsigned i = 0, N = phiI->getNumIncomingValues(); i < N; ++i)
+ if (phiI->getIncomingValue(i) == root)
+ syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "",
+ phiI->getIncomingBlock(i)->getTerminator());
+ } else
+ syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "", I);
+
+ // Remember the sync for each spawn to eliminate redundant ones later
+ spawnToSyncsMap[cast<CallInst>(root)].insert(syncI);
- return;
- }
+ return;
+ }
// else visit unvisited successors
- if (BranchInst* brI = dyn_cast<BranchInst>(I))
- { // visit first instruction in each successor BB
- for (unsigned i = 0, N = brI->getNumSuccessors(); i < N; ++i)
- if (stmtsVisited.find(&brI->getSuccessor(i)->front())
- == stmtsVisited.end())
- DFSVisitInstr(&brI->getSuccessor(i)->front(), root, depsOfRoot);
- }
- else
+ if (BranchInst* brI = dyn_cast<BranchInst>(I)) {
+ // visit first instruction in each successor BB
+ for (unsigned i = 0, N = brI->getNumSuccessors(); i < N; ++i)
+ if (stmtsVisited.find(&brI->getSuccessor(i)->front())
+ == stmtsVisited.end())
+ DFSVisitInstr(&brI->getSuccessor(i)->front(), root, depsOfRoot);
+ } else
if (Instruction* nextI = I->getNext())
if (stmtsVisited.find(nextI) == stmtsVisited.end())
DFSVisitInstr(nextI, root, depsOfRoot);
std::vector<const PHINode*> phiUsers;
hash_set<const PHINode*> phisSeen; // ensures we don't visit a phi twice
for (Value::use_iterator UI=CI.use_begin(), UE=CI.use_end(); UI != UE; ++UI)
- if (const PHINode* phiUser = dyn_cast<PHINode>(*UI))
- {
- if (phisSeen.find(phiUser) == phisSeen.end())
- {
- phiUsers.push_back(phiUser);
- phisSeen.insert(phiUser);
- }
+ if (const PHINode* phiUser = dyn_cast<PHINode>(*UI)) {
+ if (phisSeen.find(phiUser) == phisSeen.end()) {
+ phiUsers.push_back(phiUser);
+ phisSeen.insert(phiUser);
}
+ }
else
depsOfRoot.insert(cast<Instruction>(*UI));
// Now we've found the non-Phi users and immediate phi users.
// Recursively walk the phi users and add their non-phi users.
- for (const PHINode* phiUser; !phiUsers.empty(); phiUsers.pop_back())
- {
- phiUser = phiUsers.back();
- for (Value::use_const_iterator UI=phiUser->use_begin(),
- UE=phiUser->use_end(); UI != UE; ++UI)
- if (const PHINode* pn = dyn_cast<PHINode>(*UI))
- {
- if (phisSeen.find(pn) == phisSeen.end())
- {
- phiUsers.push_back(pn);
- phisSeen.insert(pn);
- }
- }
- else
- depsOfRoot.insert(cast<Instruction>(*UI));
- }
+ for (const PHINode* phiUser; !phiUsers.empty(); phiUsers.pop_back()) {
+ phiUser = phiUsers.back();
+ for (Value::use_const_iterator UI=phiUser->use_begin(),
+ UE=phiUser->use_end(); UI != UE; ++UI)
+ if (const PHINode* pn = dyn_cast<PHINode>(*UI)) {
+ if (phisSeen.find(pn) == phisSeen.end()) {
+ phiUsers.push_back(pn);
+ phisSeen.insert(pn);
+ }
+ } else
+ depsOfRoot.insert(cast<Instruction>(*UI));
+ }
// Walk paths of the CFG starting at the call instruction and insert
// one sync before the first dependence on each path, if any.
- if (! depsOfRoot.empty())
- {
- stmtsVisited.clear(); // start a new DFS for this CallInst
- assert(CI.getNext() && "Call instruction cannot be a terminator!");
- DFSVisitInstr(CI.getNext(), &CI, depsOfRoot);
- }
+ if (! depsOfRoot.empty()) {
+ stmtsVisited.clear(); // start a new DFS for this CallInst
+ assert(CI.getNext() && "Call instruction cannot be a terminator!");
+ DFSVisitInstr(CI.getNext(), &CI, depsOfRoot);
+ }
// Now, eliminate all users of the SSA value of the CallInst, i.e.,
// if the call instruction returns a value, delete the return value
// Now we've found all CallInsts reachable from each CallInst.
// Find those CallInsts that are parallel with at least one other CallInst
// by counting total inEdges and outEdges.
- //
unsigned long totalNumCalls = completed.size();
- if (totalNumCalls == 1)
- { // Check first for the special case of a single call instruction not
- // in any loop. It is not parallel, even if it has no dependences
- // (this is why it is a special case).
- //
- // FIXME:
- // THIS CASE IS NOT HANDLED RIGHT NOW, I.E., THERE IS NO
- // PARALLELISM FOR CALLS IN DIFFERENT ITERATIONS OF A LOOP.
- //
- return;
- }
+ if (totalNumCalls == 1) {
+ // Check first for the special case of a single call instruction not
+ // in any loop. It is not parallel, even if it has no dependences
+ // (this is why it is a special case).
+ //
+ // FIXME:
+ // THIS CASE IS NOT HANDLED RIGHT NOW, I.E., THERE IS NO
+ // PARALLELISM FOR CALLS IN DIFFERENT ITERATIONS OF A LOOP.
+ return;
+ }
hash_map<CallInst*, unsigned long> numDeps;
for (hash_map<CallInst*, DependentsSet>::iterator II = dependents.begin(),
- IE = dependents.end(); II != IE; ++II)
- {
- CallInst* fromCI = II->first;
- numDeps[fromCI] += II->second.size();
- for (Dependents_iterator DI = II->second.begin(), DE = II->second.end();
- DI != DE; ++DI)
- numDeps[*DI]++; // *DI can be reached from II->first
- }
+ IE = dependents.end(); II != IE; ++II) {
+ CallInst* fromCI = II->first;
+ numDeps[fromCI] += II->second.size();
+ for (Dependents_iterator DI = II->second.begin(), DE = II->second.end();
+ DI != DE; ++DI)
+ numDeps[*DI]++; // *DI can be reached from II->first
+ }
for (hash_map<CallInst*, DependentsSet>::iterator
II = dependents.begin(), IE = dependents.end(); II != IE; ++II)
stmtsVisited.insert(I);
if (CallInst* CI = dyn_cast<CallInst>(I))
-
// FIXME: Ignoring parallelism in a loop. Here we're actually *ignoring*
// a self-dependence in order to get the count comparison right above.
// When we include loop parallelism, self-dependences should be included.
- //
- if (CI != root)
-
- { // CallInst root has a path to CallInst I and any calls reachable from I
- depsOfRoot.insert(CI);
- if (completed[CI])
- { // We have already visited I so we know all nodes it can reach!
- DependentsSet& depsOfI = dependents[CI];
- depsOfRoot.insert(depsOfI.begin(), depsOfI.end());
- return;
- }
+ if (CI != root) {
+ // CallInst root has a path to CallInst I and any calls reachable from I
+ depsOfRoot.insert(CI);
+ if (completed[CI]) {
+ // We have already visited I so we know all nodes it can reach!
+ DependentsSet& depsOfI = dependents[CI];
+ depsOfRoot.insert(depsOfI.begin(), depsOfI.end());
+ return;
}
+ }
// If we reach here, we need to visit all children of I
for (PgmDependenceGraph::iterator DI = depGraph.outDepBegin(*I);
- ! DI.fini(); ++DI)
- {
- Instruction* sink = &DI->getSink()->getInstr();
- if (stmtsVisited.find(sink) == stmtsVisited.end())
- VisitOutEdges(sink, root, depsOfRoot);
- }
+ ! DI.fini(); ++DI) {
+ Instruction* sink = &DI->getSink()->getInstr();
+ if (stmtsVisited.find(sink) == stmtsVisited.end())
+ VisitOutEdges(sink, root, depsOfRoot);
+ }
}
-void FindParallelCalls::visitCallInst(CallInst& CI)
-{
+void FindParallelCalls::visitCallInst(CallInst& CI) {
if (completed[&CI])
return;
stmtsVisited.clear(); // clear flags to do a fresh DFS
// Visit all children of CI using a recursive walk through dep graph
DependentsSet& depsOfRoot = dependents[&CI];
for (PgmDependenceGraph::iterator DI = depGraph.outDepBegin(CI);
- ! DI.fini(); ++DI)
- {
- Instruction* sink = &DI->getSink()->getInstr();
- if (stmtsVisited.find(sink) == stmtsVisited.end())
- VisitOutEdges(sink, &CI, depsOfRoot);
- }
+ ! DI.fini(); ++DI) {
+ Instruction* sink = &DI->getSink()->getInstr();
+ if (stmtsVisited.find(sink) == stmtsVisited.end())
+ VisitOutEdges(sink, &CI, depsOfRoot);
+ }
completed[&CI] = true;
}
//----------------------------------------------------------------------------
namespace {
- class Parallelize: public Pass
- {
+ class Parallelize: public Pass {
public:
/// Driver functions to transform a program
///
}
-static Function* FindMain(Module& M)
-{
- for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
- if (FI->getName() == std::string("main"))
- return FI;
- return NULL;
-}
-
-
-bool Parallelize::run(Module& M)
-{
+bool Parallelize::run(Module& M) {
hash_set<Function*> parallelFunctions;
hash_set<Function*> safeParallelFunctions;
hash_set<const GlobalValue*> indirectlyCalled;
// If there is no main (i.e., for an incomplete program), we can do nothing.
// If there is a main, mark main as a parallel function.
- //
- Function* mainFunc = FindMain(M);
+ Function* mainFunc = M.getMainFunction();
if (!mainFunc)
return false;
// (1) Find candidate parallel functions and mark them as Cilk functions
- //
for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
- if (! FI->isExternal())
- {
- Function* F = FI;
- DSGraph& tdg = getAnalysis<TDDataStructures>().getDSGraph(*F);
-
- // All the hard analysis work gets done here!
- //
- FindParallelCalls finder(*F,
- getAnalysis<PgmDependenceGraph>().getGraph(*F));
- /* getAnalysis<MemoryDepAnalysis>().getGraph(*F)); */
-
- // Now we know which call instructions are useful to parallelize.
- // Remember those callee functions.
- //
- for (std::vector<CallInst*>::iterator
- CII = finder.parallelCalls.begin(),
- CIE = finder.parallelCalls.end(); CII != CIE; ++CII)
- {
- // Check if this is a direct call...
- if ((*CII)->getCalledFunction() != NULL)
- { // direct call: if this is to a non-external function,
- // mark it as a parallelizable function
- if (! (*CII)->getCalledFunction()->isExternal())
- parallelFunctions.insert((*CII)->getCalledFunction());
- }
- else
- { // Indirect call: mark all potential callees as bad
- std::vector<GlobalValue*> callees =
- tdg.getNodeForValue((*CII)->getCalledValue())
- .getNode()->getGlobals();
- indirectlyCalled.insert(callees.begin(), callees.end());
- }
- }
+ if (! FI->isExternal()) {
+ Function* F = FI;
+ DSGraph& tdg = getAnalysis<TDDataStructures>().getDSGraph(*F);
+
+ // All the hard analysis work gets done here!
+ FindParallelCalls finder(*F,
+ getAnalysis<PgmDependenceGraph>().getGraph(*F));
+ /* getAnalysis<MemoryDepAnalysis>().getGraph(*F)); */
+
+ // Now we know which call instructions are useful to parallelize.
+ // Remember those callee functions.
+ for (std::vector<CallInst*>::iterator
+ CII = finder.parallelCalls.begin(),
+ CIE = finder.parallelCalls.end(); CII != CIE; ++CII) {
+ // Check if this is a direct call...
+ if ((*CII)->getCalledFunction() != NULL) {
+ // direct call: if this is to a non-external function,
+ // mark it as a parallelizable function
+ if (! (*CII)->getCalledFunction()->isExternal())
+ parallelFunctions.insert((*CII)->getCalledFunction());
+ } else {
+ // Indirect call: mark all potential callees as bad
+ std::vector<GlobalValue*> callees =
+ tdg.getNodeForValue((*CII)->getCalledValue())
+ .getNode()->getGlobals();
+ indirectlyCalled.insert(callees.begin(), callees.end());
+ }
}
+ }
// Remove all indirectly called functions from the list of Cilk functions.
- //
for (hash_set<Function*>::iterator PFI = parallelFunctions.begin(),
PFE = parallelFunctions.end(); PFI != PFE; ++PFI)
if (indirectlyCalled.count(*PFI) == 0)
// This should identify both functions and calls to such functions
// to the code generator.
// (4) Also, insert calls to sync at appropriate points.
- //
Cilkifier cilkifier(M);
for (hash_set<Function*>::iterator CFI = safeParallelFunctions.begin(),
- CFE = safeParallelFunctions.end(); CFI != CFE; ++CFI)
- {
- cilkifier.TransformFunc(*CFI, safeParallelFunctions,
- getAnalysis<PgmDependenceGraph>().getGraph(**CFI));
- /* getAnalysis<MemoryDepAnalysis>().getGraph(**CFI)); */
- }
+ CFE = safeParallelFunctions.end(); CFI != CFE; ++CFI) {
+ cilkifier.TransformFunc(*CFI, safeParallelFunctions,
+ getAnalysis<PgmDependenceGraph>().getGraph(**CFI));
+ /* getAnalysis<MemoryDepAnalysis>().getGraph(**CFI)); */
+ }
return true;
}
// Code generation pass that transforms code to identify where Cilk keywords
// should be inserted. This relies on `llvm-dis -c' to print out the keywords.
//----------------------------------------------------------------------------
-
-
-class Cilkifier: public InstVisitor<Cilkifier>
-{
+class Cilkifier: public InstVisitor<Cilkifier> {
Function* DummySyncFunc;
// Data used when transforming each function.
};
-Cilkifier::Cilkifier(Module& M)
-{
+Cilkifier::Cilkifier(Module& M) {
// create the dummy Sync function and add it to the Module
DummySyncFunc = M.getOrInsertFunction(DummySyncFuncName, Type::VoidTy, 0);
}
void Cilkifier::TransformFunc(Function* F,
const hash_set<Function*>& _cilkFunctions,
- PgmDependenceGraph& _depGraph)
-{
+ PgmDependenceGraph& _depGraph) {
// Memoize the information for this function
cilkFunctions = &_cilkFunctions;
depGraph = &_depGraph;
stmtsVisited.insert(I);
// If there is a dependence from root to I, insert Sync and return
- if (depsOfRoot.find(I) != depsOfRoot.end())
- { // Insert a sync before I and stop searching along this path.
- // If I is a Phi instruction, the dependence can only be an SSA dep.
- // and we need to insert the sync in the predecessor on the appropriate
- // incoming edge!
- CallInst* syncI = 0;
- if (PHINode* phiI = dyn_cast<PHINode>(I))
- { // check all operands of the Phi and insert before each one
- for (unsigned i = 0, N = phiI->getNumIncomingValues(); i < N; ++i)
- if (phiI->getIncomingValue(i) == root)
- syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "",
- phiI->getIncomingBlock(i)->getTerminator());
- }
- else
- syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "", I);
-
- // Remember the sync for each spawn to eliminate redundant ones later
- spawnToSyncsMap[cast<CallInst>(root)].insert(syncI);
+ if (depsOfRoot.find(I) != depsOfRoot.end()) {
+ // Insert a sync before I and stop searching along this path.
+ // If I is a Phi instruction, the dependence can only be an SSA dep.
+ // and we need to insert the sync in the predecessor on the appropriate
+ // incoming edge!
+ CallInst* syncI = 0;
+ if (PHINode* phiI = dyn_cast<PHINode>(I)) {
+ // check all operands of the Phi and insert before each one
+ for (unsigned i = 0, N = phiI->getNumIncomingValues(); i < N; ++i)
+ if (phiI->getIncomingValue(i) == root)
+ syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "",
+ phiI->getIncomingBlock(i)->getTerminator());
+ } else
+ syncI = new CallInst(DummySyncFunc, std::vector<Value*>(), "", I);
+
+ // Remember the sync for each spawn to eliminate redundant ones later
+ spawnToSyncsMap[cast<CallInst>(root)].insert(syncI);
- return;
- }
+ return;
+ }
// else visit unvisited successors
- if (BranchInst* brI = dyn_cast<BranchInst>(I))
- { // visit first instruction in each successor BB
- for (unsigned i = 0, N = brI->getNumSuccessors(); i < N; ++i)
- if (stmtsVisited.find(&brI->getSuccessor(i)->front())
- == stmtsVisited.end())
- DFSVisitInstr(&brI->getSuccessor(i)->front(), root, depsOfRoot);
- }
- else
+ if (BranchInst* brI = dyn_cast<BranchInst>(I)) {
+ // visit first instruction in each successor BB
+ for (unsigned i = 0, N = brI->getNumSuccessors(); i < N; ++i)
+ if (stmtsVisited.find(&brI->getSuccessor(i)->front())
+ == stmtsVisited.end())
+ DFSVisitInstr(&brI->getSuccessor(i)->front(), root, depsOfRoot);
+ } else
if (Instruction* nextI = I->getNext())
if (stmtsVisited.find(nextI) == stmtsVisited.end())
DFSVisitInstr(nextI, root, depsOfRoot);
std::vector<const PHINode*> phiUsers;
hash_set<const PHINode*> phisSeen; // ensures we don't visit a phi twice
for (Value::use_iterator UI=CI.use_begin(), UE=CI.use_end(); UI != UE; ++UI)
- if (const PHINode* phiUser = dyn_cast<PHINode>(*UI))
- {
- if (phisSeen.find(phiUser) == phisSeen.end())
- {
- phiUsers.push_back(phiUser);
- phisSeen.insert(phiUser);
- }
+ if (const PHINode* phiUser = dyn_cast<PHINode>(*UI)) {
+ if (phisSeen.find(phiUser) == phisSeen.end()) {
+ phiUsers.push_back(phiUser);
+ phisSeen.insert(phiUser);
}
+ }
else
depsOfRoot.insert(cast<Instruction>(*UI));
// Now we've found the non-Phi users and immediate phi users.
// Recursively walk the phi users and add their non-phi users.
- for (const PHINode* phiUser; !phiUsers.empty(); phiUsers.pop_back())
- {
- phiUser = phiUsers.back();
- for (Value::use_const_iterator UI=phiUser->use_begin(),
- UE=phiUser->use_end(); UI != UE; ++UI)
- if (const PHINode* pn = dyn_cast<PHINode>(*UI))
- {
- if (phisSeen.find(pn) == phisSeen.end())
- {
- phiUsers.push_back(pn);
- phisSeen.insert(pn);
- }
- }
- else
- depsOfRoot.insert(cast<Instruction>(*UI));
- }
+ for (const PHINode* phiUser; !phiUsers.empty(); phiUsers.pop_back()) {
+ phiUser = phiUsers.back();
+ for (Value::use_const_iterator UI=phiUser->use_begin(),
+ UE=phiUser->use_end(); UI != UE; ++UI)
+ if (const PHINode* pn = dyn_cast<PHINode>(*UI)) {
+ if (phisSeen.find(pn) == phisSeen.end()) {
+ phiUsers.push_back(pn);
+ phisSeen.insert(pn);
+ }
+ } else
+ depsOfRoot.insert(cast<Instruction>(*UI));
+ }
// Walk paths of the CFG starting at the call instruction and insert
// one sync before the first dependence on each path, if any.
- if (! depsOfRoot.empty())
- {
- stmtsVisited.clear(); // start a new DFS for this CallInst
- assert(CI.getNext() && "Call instruction cannot be a terminator!");
- DFSVisitInstr(CI.getNext(), &CI, depsOfRoot);
- }
+ if (! depsOfRoot.empty()) {
+ stmtsVisited.clear(); // start a new DFS for this CallInst
+ assert(CI.getNext() && "Call instruction cannot be a terminator!");
+ DFSVisitInstr(CI.getNext(), &CI, depsOfRoot);
+ }
// Now, eliminate all users of the SSA value of the CallInst, i.e.,
// if the call instruction returns a value, delete the return value
// Now we've found all CallInsts reachable from each CallInst.
// Find those CallInsts that are parallel with at least one other CallInst
// by counting total inEdges and outEdges.
- //
unsigned long totalNumCalls = completed.size();
- if (totalNumCalls == 1)
- { // Check first for the special case of a single call instruction not
- // in any loop. It is not parallel, even if it has no dependences
- // (this is why it is a special case).
- //
- // FIXME:
- // THIS CASE IS NOT HANDLED RIGHT NOW, I.E., THERE IS NO
- // PARALLELISM FOR CALLS IN DIFFERENT ITERATIONS OF A LOOP.
- //
- return;
- }
+ if (totalNumCalls == 1) {
+ // Check first for the special case of a single call instruction not
+ // in any loop. It is not parallel, even if it has no dependences
+ // (this is why it is a special case).
+ //
+ // FIXME:
+ // THIS CASE IS NOT HANDLED RIGHT NOW, I.E., THERE IS NO
+ // PARALLELISM FOR CALLS IN DIFFERENT ITERATIONS OF A LOOP.
+ return;
+ }
hash_map<CallInst*, unsigned long> numDeps;
for (hash_map<CallInst*, DependentsSet>::iterator II = dependents.begin(),
- IE = dependents.end(); II != IE; ++II)
- {
- CallInst* fromCI = II->first;
- numDeps[fromCI] += II->second.size();
- for (Dependents_iterator DI = II->second.begin(), DE = II->second.end();
- DI != DE; ++DI)
- numDeps[*DI]++; // *DI can be reached from II->first
- }
+ IE = dependents.end(); II != IE; ++II) {
+ CallInst* fromCI = II->first;
+ numDeps[fromCI] += II->second.size();
+ for (Dependents_iterator DI = II->second.begin(), DE = II->second.end();
+ DI != DE; ++DI)
+ numDeps[*DI]++; // *DI can be reached from II->first
+ }
for (hash_map<CallInst*, DependentsSet>::iterator
II = dependents.begin(), IE = dependents.end(); II != IE; ++II)
stmtsVisited.insert(I);
if (CallInst* CI = dyn_cast<CallInst>(I))
-
// FIXME: Ignoring parallelism in a loop. Here we're actually *ignoring*
// a self-dependence in order to get the count comparison right above.
// When we include loop parallelism, self-dependences should be included.
- //
- if (CI != root)
-
- { // CallInst root has a path to CallInst I and any calls reachable from I
- depsOfRoot.insert(CI);
- if (completed[CI])
- { // We have already visited I so we know all nodes it can reach!
- DependentsSet& depsOfI = dependents[CI];
- depsOfRoot.insert(depsOfI.begin(), depsOfI.end());
- return;
- }
+ if (CI != root) {
+ // CallInst root has a path to CallInst I and any calls reachable from I
+ depsOfRoot.insert(CI);
+ if (completed[CI]) {
+ // We have already visited I so we know all nodes it can reach!
+ DependentsSet& depsOfI = dependents[CI];
+ depsOfRoot.insert(depsOfI.begin(), depsOfI.end());
+ return;
}
+ }
// If we reach here, we need to visit all children of I
for (PgmDependenceGraph::iterator DI = depGraph.outDepBegin(*I);
- ! DI.fini(); ++DI)
- {
- Instruction* sink = &DI->getSink()->getInstr();
- if (stmtsVisited.find(sink) == stmtsVisited.end())
- VisitOutEdges(sink, root, depsOfRoot);
- }
+ ! DI.fini(); ++DI) {
+ Instruction* sink = &DI->getSink()->getInstr();
+ if (stmtsVisited.find(sink) == stmtsVisited.end())
+ VisitOutEdges(sink, root, depsOfRoot);
+ }
}
-void FindParallelCalls::visitCallInst(CallInst& CI)
-{
+void FindParallelCalls::visitCallInst(CallInst& CI) {
if (completed[&CI])
return;
stmtsVisited.clear(); // clear flags to do a fresh DFS
// Visit all children of CI using a recursive walk through dep graph
DependentsSet& depsOfRoot = dependents[&CI];
for (PgmDependenceGraph::iterator DI = depGraph.outDepBegin(CI);
- ! DI.fini(); ++DI)
- {
- Instruction* sink = &DI->getSink()->getInstr();
- if (stmtsVisited.find(sink) == stmtsVisited.end())
- VisitOutEdges(sink, &CI, depsOfRoot);
- }
+ ! DI.fini(); ++DI) {
+ Instruction* sink = &DI->getSink()->getInstr();
+ if (stmtsVisited.find(sink) == stmtsVisited.end())
+ VisitOutEdges(sink, &CI, depsOfRoot);
+ }
completed[&CI] = true;
}
//----------------------------------------------------------------------------
namespace {
- class Parallelize: public Pass
- {
+ class Parallelize: public Pass {
public:
/// Driver functions to transform a program
///
}
-static Function* FindMain(Module& M)
-{
- for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
- if (FI->getName() == std::string("main"))
- return FI;
- return NULL;
-}
-
-
-bool Parallelize::run(Module& M)
-{
+bool Parallelize::run(Module& M) {
hash_set<Function*> parallelFunctions;
hash_set<Function*> safeParallelFunctions;
hash_set<const GlobalValue*> indirectlyCalled;
// If there is no main (i.e., for an incomplete program), we can do nothing.
// If there is a main, mark main as a parallel function.
- //
- Function* mainFunc = FindMain(M);
+ Function* mainFunc = M.getMainFunction();
if (!mainFunc)
return false;
// (1) Find candidate parallel functions and mark them as Cilk functions
- //
for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
- if (! FI->isExternal())
- {
- Function* F = FI;
- DSGraph& tdg = getAnalysis<TDDataStructures>().getDSGraph(*F);
-
- // All the hard analysis work gets done here!
- //
- FindParallelCalls finder(*F,
- getAnalysis<PgmDependenceGraph>().getGraph(*F));
- /* getAnalysis<MemoryDepAnalysis>().getGraph(*F)); */
-
- // Now we know which call instructions are useful to parallelize.
- // Remember those callee functions.
- //
- for (std::vector<CallInst*>::iterator
- CII = finder.parallelCalls.begin(),
- CIE = finder.parallelCalls.end(); CII != CIE; ++CII)
- {
- // Check if this is a direct call...
- if ((*CII)->getCalledFunction() != NULL)
- { // direct call: if this is to a non-external function,
- // mark it as a parallelizable function
- if (! (*CII)->getCalledFunction()->isExternal())
- parallelFunctions.insert((*CII)->getCalledFunction());
- }
- else
- { // Indirect call: mark all potential callees as bad
- std::vector<GlobalValue*> callees =
- tdg.getNodeForValue((*CII)->getCalledValue())
- .getNode()->getGlobals();
- indirectlyCalled.insert(callees.begin(), callees.end());
- }
- }
+ if (! FI->isExternal()) {
+ Function* F = FI;
+ DSGraph& tdg = getAnalysis<TDDataStructures>().getDSGraph(*F);
+
+ // All the hard analysis work gets done here!
+ FindParallelCalls finder(*F,
+ getAnalysis<PgmDependenceGraph>().getGraph(*F));
+ /* getAnalysis<MemoryDepAnalysis>().getGraph(*F)); */
+
+ // Now we know which call instructions are useful to parallelize.
+ // Remember those callee functions.
+ for (std::vector<CallInst*>::iterator
+ CII = finder.parallelCalls.begin(),
+ CIE = finder.parallelCalls.end(); CII != CIE; ++CII) {
+ // Check if this is a direct call...
+ if ((*CII)->getCalledFunction() != NULL) {
+ // direct call: if this is to a non-external function,
+ // mark it as a parallelizable function
+ if (! (*CII)->getCalledFunction()->isExternal())
+ parallelFunctions.insert((*CII)->getCalledFunction());
+ } else {
+ // Indirect call: mark all potential callees as bad
+ std::vector<GlobalValue*> callees =
+ tdg.getNodeForValue((*CII)->getCalledValue())
+ .getNode()->getGlobals();
+ indirectlyCalled.insert(callees.begin(), callees.end());
+ }
}
+ }
// Remove all indirectly called functions from the list of Cilk functions.
- //
for (hash_set<Function*>::iterator PFI = parallelFunctions.begin(),
PFE = parallelFunctions.end(); PFI != PFE; ++PFI)
if (indirectlyCalled.count(*PFI) == 0)
// This should identify both functions and calls to such functions
// to the code generator.
// (4) Also, insert calls to sync at appropriate points.
- //
Cilkifier cilkifier(M);
for (hash_set<Function*>::iterator CFI = safeParallelFunctions.begin(),
- CFE = safeParallelFunctions.end(); CFI != CFE; ++CFI)
- {
- cilkifier.TransformFunc(*CFI, safeParallelFunctions,
- getAnalysis<PgmDependenceGraph>().getGraph(**CFI));
- /* getAnalysis<MemoryDepAnalysis>().getGraph(**CFI)); */
- }
+ CFE = safeParallelFunctions.end(); CFI != CFE; ++CFI) {
+ cilkifier.TransformFunc(*CFI, safeParallelFunctions,
+ getAnalysis<PgmDependenceGraph>().getGraph(**CFI));
+ /* getAnalysis<MemoryDepAnalysis>().getGraph(**CFI)); */
+ }
return true;
}