From 9f838225658a5c900b5199db36779c56d0adbc11 Mon Sep 17 00:00:00 2001 From: Tanya Lattner Date: Fri, 22 Apr 2005 06:32:48 +0000 Subject: [PATCH] Updated dependence analyzer. Fixed numerous bugs. Same stage scheduling, etc. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@21444 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../ModuloScheduling/DependenceAnalyzer.cpp | 331 ++++++++++++++---- .../ModuloScheduling/DependenceAnalyzer.h | 23 +- .../SparcV9/ModuloScheduling/MSchedGraph.cpp | 323 ++++++++++------- .../SparcV9/ModuloScheduling/MSchedGraph.h | 4 + .../ModuloScheduling/ModuloScheduling.cpp | 241 ++++++++----- .../ModuloScheduling/ModuloScheduling.h | 16 +- 6 files changed, 641 insertions(+), 297 deletions(-) diff --git a/lib/Target/SparcV9/ModuloScheduling/DependenceAnalyzer.cpp b/lib/Target/SparcV9/ModuloScheduling/DependenceAnalyzer.cpp index b99ecdfe495..0fd7c604eda 100644 --- a/lib/Target/SparcV9/ModuloScheduling/DependenceAnalyzer.cpp +++ b/lib/Target/SparcV9/ModuloScheduling/DependenceAnalyzer.cpp @@ -1,4 +1,4 @@ -//===-- DependenceAnalyzer.cpp - DependenceAnalyzer ----------------*- C++ -*-===// +//===-- DependenceAnalyzer.cpp - DependenceAnalyzer ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,94 +16,281 @@ #include "DependenceAnalyzer.h" #include "llvm/Type.h" #include "llvm/Support/Debug.h" -using namespace llvm; +#include "llvm/ADT/Statistic.h" +#include "llvm/Constants.h" +using namespace llvm; -/// Create ModuloSchedulingPass -/// namespace llvm { -FunctionPass *createDependenceAnalyzer() { - return new DependenceAnalyzer(); + + /// Create ModuloSchedulingPass + FunctionPass *createDependenceAnalyzer() { + return new DependenceAnalyzer(); + } } + +Statistic<> NoDeps("depanalyzer-nodeps", "Number of dependences eliminated"); +Statistic<> NumDeps("depanalyzer-deps", + "Number of dependences could not eliminate"); +Statistic<> AdvDeps("depanalyzer-advdeps", + "Number of dependences using advanced techniques"); + +bool DependenceAnalyzer::runOnFunction(Function &F) { + AA = &getAnalysis(); + TD = &getAnalysis(); + SE = &getAnalysis(); + + return false; } - bool DependenceAnalyzer::runOnFunction(Function &F) { - AA = &getAnalysis(); - TD = &getAnalysis(); +static RegisterAnalysisX("depanalyzer", + "Dependence Analyzer"); + +// - Get inter and intra dependences between loads and stores +// +// Overview of Method: +// Step 1: Use alias analysis to determine dependencies if values are loop +// invariant +// Step 2: If pointers are not GEP, then there is a dependence. +// Step 3: Compare GEP base pointers with AA. If no alias, no dependence. +// If may alias, then add a dependence. If must alias, then analyze +// further (Step 4) +// Step 4: do advanced analysis +void DependenceAnalyzer::AnalyzeDeps(Value *val, Value *val2, bool valLoad, + bool val2Load, + std::vector &deps, + BasicBlock *BB, + bool srcBeforeDest) { + + bool loopInvariant = true; - return false; + //Check if both are instructions and prove not loop invariant if possible + if(Instruction *valInst = dyn_cast(val)) + if(valInst->getParent() == BB) + loopInvariant = false; + if(Instruction *val2Inst = dyn_cast(val2)) + if(val2Inst->getParent() == BB) + loopInvariant = false; + + + //If Loop invariant, let AA decide + if(loopInvariant) { + if(AA->alias(val, (unsigned)TD->getTypeSize(val->getType()), + val2,(unsigned)TD->getTypeSize(val2->getType())) + != AliasAnalysis::NoAlias) { + createDep(deps, valLoad, val2Load, srcBeforeDest); + } + else + ++NoDeps; + return; } + + //Otherwise, continue with step 2 - static RegisterAnalysisX("depanalyzer", "Dependence Analyzer"); + GetElementPtrInst *GP = dyn_cast(val); + GetElementPtrInst *GP2 = dyn_cast(val2); - DependenceResult DependenceAnalyzer::getDependenceInfo(Instruction *inst1, Instruction *inst2) { - std::vector deps; + //If both are not GP instructions, we can not do further analysis + if(!GP || !GP2) { + createDep(deps, valLoad, val2Load, srcBeforeDest); + return; + } - DEBUG(std::cerr << "Inst1: " << *inst1 << "\n"); - DEBUG(std::cerr << "Inst2: " << *inst2 << "\n"); + //Otherwise, compare GEP bases (op #0) with Alias Analysis - if(LoadInst *ldInst = dyn_cast(inst1)) { + Value *GPop = GP->getOperand(0); + Value *GP2op = GP2->getOperand(0); + int alias = AA->alias(GPop, (unsigned)TD->getTypeSize(GPop->getType()), + GP2op,(unsigned)TD->getTypeSize(GP2op->getType())); - if(StoreInst *stInst = dyn_cast(inst2)) { - //Get load mem ref - Value *ldOp = ldInst->getOperand(0); - - //Get store mem ref - Value *stOp = stInst->getOperand(1); - - if(AA->alias(ldOp, (unsigned)TD->getTypeSize(ldOp->getType()), - stOp,(unsigned)TD->getTypeSize(stOp->getType())) - != AliasAnalysis::NoAlias) { - - //Anti Dep - deps.push_back(Dependence(0, Dependence::AntiDep)); - } - } - } - else if(StoreInst *stInst = dyn_cast(inst1)) { - - if(LoadInst *ldInst = dyn_cast(inst2)) { - //Get load mem ref - Value *ldOp = ldInst->getOperand(0); - - //Get store mem ref - Value *stOp = stInst->getOperand(1); - - - if(AA->alias(ldOp, (unsigned)TD->getTypeSize(ldOp->getType()), - stOp,(unsigned)TD->getTypeSize(stOp->getType())) - != AliasAnalysis::NoAlias) { - - //Anti Dep - deps.push_back(Dependence(0, Dependence::TrueDep)); - } - } - else if(StoreInst *stInst2 = dyn_cast(inst2)) { - - //Get load mem ref - Value *stOp1 = stInst->getOperand(1); - - //Get store mem ref - Value *stOp2 = stInst2->getOperand(1); - - - if(AA->alias(stOp1, (unsigned)TD->getTypeSize(stOp1->getType()), - stOp2,(unsigned)TD->getTypeSize(stOp2->getType())) - != AliasAnalysis::NoAlias) { - - //Anti Dep - deps.push_back(Dependence(0, Dependence::OutputDep)); - } - } + if(alias == AliasAnalysis::MustAlias) { + //Further dep analysis to do + advancedDepAnalysis(GP, GP2, valLoad, val2Load, deps, srcBeforeDest); + ++AdvDeps; + } + else if(alias == AliasAnalysis::MayAlias) { + createDep(deps, valLoad, val2Load, srcBeforeDest); + } + //Otherwise no dependence since there is no alias + else + ++NoDeps; +} + +// advancedDepAnalysis - Do advanced data dependence tests +void DependenceAnalyzer::advancedDepAnalysis(GetElementPtrInst *gp1, + GetElementPtrInst *gp2, + bool valLoad, + bool val2Load, + std::vector &deps, + bool srcBeforeDest) { - } - else - assert("Expected a load or a store\n"); + //Check if both GEPs are in a simple form: 3 ops, constant 0 as second arg + if(gp1->getNumOperands() != 3 || gp2->getNumOperands() != 3) { + createDep(deps, valLoad, val2Load, srcBeforeDest); + return; + } + + //Check second arg is constant 0 + bool GPok = false; + if(Constant *c1 = dyn_cast(gp1->getOperand(1))) + if(Constant *c2 = dyn_cast(gp2->getOperand(1))) + if(c1->isNullValue() && c2->isNullValue()) + GPok = true; + + if(!GPok) { + createDep(deps, valLoad, val2Load, srcBeforeDest); + return; + + } + + Value *Gep1Idx = gp1->getOperand(2); + Value *Gep2Idx = gp2->getOperand(2); + + if(CastInst *c1 = dyn_cast(Gep1Idx)) + Gep1Idx = c1->getOperand(0); + if(CastInst *c2 = dyn_cast(Gep2Idx)) + Gep2Idx = c2->getOperand(0); + + //Get SCEV for each index into the area + SCEVHandle SV1 = SE->getSCEV(Gep1Idx); + SCEVHandle SV2 = SE->getSCEV(Gep2Idx); + + //Now handle special cases of dependence analysis + SV1->print(std::cerr); + std::cerr << "\n"; + SV2->print(std::cerr); + std::cerr << "\n"; + + //Check if we have an SCEVAddExpr, cause we can only handle those + SCEVAddRecExpr *SVAdd1 = dyn_cast(SV1); + SCEVAddRecExpr *SVAdd2 = dyn_cast(SV2); + + //Default to having a dependence since we can't analyze further + if(!SVAdd1 || !SVAdd2) { + createDep(deps, valLoad, val2Load, srcBeforeDest); + return; + } - DependenceResult dr = DependenceResult(deps); - return dr; + //Check if not Affine, we can't handle those + if(!SVAdd1->isAffine( ) || !SVAdd2->isAffine()) { + createDep(deps, valLoad, val2Load, srcBeforeDest); + return; } + //We know the SCEV is in the form A + B*x, check that B is the same for both + SCEVConstant *B1 = dyn_cast(SVAdd1->getOperand(1)); + SCEVConstant *B2 = dyn_cast(SVAdd2->getOperand(1)); + + if(B1->getValue() != B2->getValue()) { + createDep(deps, valLoad, val2Load, srcBeforeDest); + return; + } + + if(B1->getValue()->getRawValue() != 1 || B2->getValue()->getRawValue() != 1) { + createDep(deps, valLoad, val2Load, srcBeforeDest); + return; + } + + + SCEVConstant *A1 = dyn_cast(SVAdd1->getOperand(0)); + SCEVConstant *A2 = dyn_cast(SVAdd2->getOperand(0)); + + //Come back and deal with nested SCEV! + if(!A1 || !A2) { + createDep(deps, valLoad, val2Load, srcBeforeDest); + return; + } + + //If equal, create dep as normal + if(A1->getValue() == A2->getValue()) { + createDep(deps, valLoad, val2Load, srcBeforeDest); + return; + } + //Eliminate a dep if this is a intra dep + else if(srcBeforeDest) { + ++NoDeps; + return; + } + + //Find constant index difference + int diff = A1->getValue()->getRawValue() - A2->getValue()->getRawValue(); + std::cerr << diff << "\n"; + + if(diff > 0) + createDep(deps, valLoad, val2Load, srcBeforeDest, diff); + + //assert(diff > 0 && "Expected diff to be greater then 0"); +} + +// Create dependences once its determined these two instructions +// references the same memory +void DependenceAnalyzer::createDep(std::vector &deps, + bool valLoad, bool val2Load, + bool srcBeforeDest, int diff) { + + //If the source instruction occurs after the destination instruction + //(execution order), then this dependence is across iterations + if(!srcBeforeDest && (diff==0)) + diff = 1; + + //If load/store pair + if(valLoad && !val2Load) { + //Anti Dep + deps.push_back(Dependence(diff, Dependence::AntiDep)); + ++NumDeps; + } + //If store/load pair + else if(!valLoad && val2Load) { + //True Dep + deps.push_back(Dependence(diff, Dependence::TrueDep)); + ++NumDeps; + } + //If store/store pair + else if(!valLoad && !val2Load) { + //True Dep + deps.push_back(Dependence(diff, Dependence::OutputDep)); + ++NumDeps; + } +} + + + +//Get Dependence Info for a pair of Instructions +DependenceResult DependenceAnalyzer::getDependenceInfo(Instruction *inst1, + Instruction *inst2, + bool srcBeforeDest) { + std::vector deps; + + DEBUG(std::cerr << "Inst1: " << *inst1 << "\n"); + DEBUG(std::cerr << "Inst2: " << *inst2 << "\n"); + + //No self deps + if(inst1 == inst2) + return DependenceResult(deps); + + if(LoadInst *ldInst = dyn_cast(inst1)) { + + if(StoreInst *stInst = dyn_cast(inst2)) + AnalyzeDeps(ldInst->getOperand(0), stInst->getOperand(1), + true, false, deps, ldInst->getParent(), srcBeforeDest); + } + else if(StoreInst *stInst = dyn_cast(inst1)) { + + if(LoadInst *ldInst = dyn_cast(inst2)) + AnalyzeDeps(stInst->getOperand(1), ldInst->getOperand(0), false, true, + deps, ldInst->getParent(), srcBeforeDest); + + else if(StoreInst *stInst2 = dyn_cast(inst2)) + AnalyzeDeps(stInst->getOperand(1), stInst2->getOperand(1), false, false, + deps, stInst->getParent(), srcBeforeDest); + } + else + assert(0 && "Expected a load or a store\n"); + + DependenceResult dr = DependenceResult(deps); + return dr; +} + diff --git a/lib/Target/SparcV9/ModuloScheduling/DependenceAnalyzer.h b/lib/Target/SparcV9/ModuloScheduling/DependenceAnalyzer.h index 6223fb74331..2440ea9d390 100644 --- a/lib/Target/SparcV9/ModuloScheduling/DependenceAnalyzer.h +++ b/lib/Target/SparcV9/ModuloScheduling/DependenceAnalyzer.h @@ -17,10 +17,12 @@ #include "llvm/Function.h" #include "llvm/Pass.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Target/TargetData.h" #include namespace llvm { + //class to represent a dependence struct Dependence { @@ -47,11 +49,25 @@ namespace llvm { class DependenceAnalyzer : public FunctionPass { + + AliasAnalysis *AA; TargetData *TD; + ScalarEvolution *SE; + + void advancedDepAnalysis(GetElementPtrInst *gp1, GetElementPtrInst *gp2, + bool valLoad, bool val2Load, + std::vector &deps, bool srcBeforeDest); + + void AnalyzeDeps(Value *val, Value *val2, bool val1Load, bool val2Load, + std::vector &deps, BasicBlock *BB, + bool srcBeforeDest); + + void createDep(std::vector &deps, bool valLoad, bool val2Load, + bool srcBeforeDest, int diff = 0); public: - DependenceAnalyzer() { AA = 0; TD = 0; } + DependenceAnalyzer() { AA = 0; TD = 0; SE = 0; } virtual bool runOnFunction(Function &F); virtual const char* getPassName() const { return "DependenceAnalyzer"; } @@ -59,10 +75,13 @@ namespace llvm { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); + AU.addRequired(); + AU.setPreservesAll(); } //get dependence info - DependenceResult getDependenceInfo(Instruction *inst1, Instruction *inst2); + DependenceResult getDependenceInfo(Instruction *inst1, Instruction *inst2, + bool srcBeforeDest); }; diff --git a/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.cpp b/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.cpp index 6cd6d94ae1d..7160f8dc956 100644 --- a/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.cpp +++ b/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.cpp @@ -19,6 +19,7 @@ #include "../SparcV9RegisterInfo.h" #include "../MachineCodeForInstruction.h" #include "llvm/BasicBlock.h" +#include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Type.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -33,8 +34,9 @@ using namespace llvm; //MSchedGraphNode constructor MSchedGraphNode::MSchedGraphNode(const MachineInstr* inst, MSchedGraph *graph, unsigned idx, - unsigned late, bool isBranch) - : Inst(inst), Parent(graph), index(idx), latency(late), isBranchInstr(isBranch) { + unsigned late, bool isBranch) + : Inst(inst), Parent(graph), index(idx), latency(late), + isBranchInstr(isBranch) { //Add to the graph graph->addNode(inst, this); @@ -73,7 +75,8 @@ MSchedGraphEdge MSchedGraphNode::getInEdge(MSchedGraphNode *pred) { //Get the iteration difference for the edge from this node to its successor unsigned MSchedGraphNode::getIteDiff(MSchedGraphNode *succ) { - for(std::vector::iterator I = Successors.begin(), E = Successors.end(); + for(std::vector::iterator I = Successors.begin(), + E = Successors.end(); I != E; ++I) { if(I->getDest() == succ) return I->getIteDiff(); @@ -86,7 +89,8 @@ unsigned MSchedGraphNode::getInEdgeNum(MSchedGraphNode *pred) { //Loop over all the successors of our predecessor //return the edge the corresponds to this in edge int count = 0; - for(MSchedGraphNode::succ_iterator I = pred->succ_begin(), E = pred->succ_end(); + for(MSchedGraphNode::succ_iterator I = pred->succ_begin(), + E = pred->succ_end(); I != E; ++I) { if(*I == this) return count; @@ -106,7 +110,8 @@ bool MSchedGraphNode::isSuccessor(MSchedGraphNode *succ) { //Dtermine if pred is a predecessor of this node bool MSchedGraphNode::isPredecessor(MSchedGraphNode *pred) { - if(std::find( Predecessors.begin(), Predecessors.end(), pred) != Predecessors.end()) + if(std::find( Predecessors.begin(), Predecessors.end(), + pred) != Predecessors.end()) return true; else return false; @@ -138,13 +143,16 @@ void MSchedGraph::deleteNode(MSchedGraphNode *node) { } -//Create a graph for a machine block. The ignoreInstrs map is so that we ignore instructions -//associated to the index variable since this is a special case in Modulo Scheduling. -//We only want to deal with the body of the loop. -MSchedGraph::MSchedGraph(const MachineBasicBlock *bb, const TargetMachine &targ, - std::map &ignoreInstrs, - DependenceAnalyzer &DA, std::map &machineTollvm - ) + +//Create a graph for a machine block. The ignoreInstrs map is so that +//we ignore instructions associated to the index variable since this +//is a special case in Modulo Scheduling. We only want to deal with +//the body of the loop. +MSchedGraph::MSchedGraph(const MachineBasicBlock *bb, + const TargetMachine &targ, + std::map &ignoreInstrs, + DependenceAnalyzer &DA, + std::map &machineTollvm) : BB(bb), Target(targ) { //Make sure BB is not null, @@ -160,13 +168,15 @@ MSchedGraph::MSchedGraph(const MachineBasicBlock *bb, const TargetMachine &targ, } //Copies the graph and keeps a map from old to new nodes -MSchedGraph::MSchedGraph(const MSchedGraph &G, std::map &newNodes) +MSchedGraph::MSchedGraph(const MSchedGraph &G, + std::map &newNodes) : BB(G.BB), Target(G.Target) { std::map oldToNew; //Copy all nodes - for(MSchedGraph::const_iterator N = G.GraphMap.begin(), NE = G.GraphMap.end(); - N != NE; ++N) { + for(MSchedGraph::const_iterator N = G.GraphMap.begin(), + NE = G.GraphMap.end(); N != NE; ++N) { + MSchedGraphNode *newNode = new MSchedGraphNode(*(N->second)); oldToNew[&*(N->second)] = newNode; newNodes[newNode] = &*(N->second); @@ -174,7 +184,8 @@ MSchedGraph::MSchedGraph(const MSchedGraph &G, std::mapsecond); @@ -196,15 +207,49 @@ MSchedGraph::MSchedGraph(const MSchedGraph &G, std::mapsecond; } +//Print out graph +void MSchedGraph::print(std::ostream &os) const { + for(MSchedGraph::const_iterator N = GraphMap.begin(), NE = GraphMap.end(); + N != NE; ++N) { + + //Get the node we are dealing with + MSchedGraphNode *node = &*(N->second); + os << "Node Start\n"; + node->print(os); + os << "Successors:\n"; + //print successors + for(unsigned i = 0; i < node->succ_size(); ++i) { + MSchedGraphEdge *edge = node->getSuccessor(i); + MSchedGraphNode *oldDest = edge->getDest(); + oldDest->print(os); + } + os << "Node End\n"; + } +} + +//Calculate total delay +int MSchedGraph::totalDelay() { + int sum = 0; + + for(MSchedGraph::const_iterator N = GraphMap.begin(), NE = GraphMap.end(); + N != NE; ++N) { + + //Get the node we are dealing with + MSchedGraphNode *node = &*(N->second); + sum += node->getLatency(); + } + return sum; +} //Experimental code to add edges from the branch to all nodes dependent upon it. -void hasPath(MSchedGraphNode *node, std::set &visited, - std::set &branches, MSchedGraphNode *startNode, - std::set > &newEdges ) { +void hasPath(MSchedGraphNode *node, std::set &visited, + std::set &branches, MSchedGraphNode *startNode, + std::set > &newEdges ) { visited.insert(node); DEBUG(std::cerr << "Visiting: " << *node << "\n"); @@ -229,7 +274,8 @@ void MSchedGraph::addBranchEdges() { std::set branches; std::set nodes; - for(MSchedGraph::iterator I = GraphMap.begin(), E = GraphMap.end(); I != E; ++I) { + for(MSchedGraph::iterator I = GraphMap.begin(), E = GraphMap.end(); + I != E; ++I) { if(I->second->isBranch()) if(I->second->hasPredecessors()) branches.insert(I->second); @@ -238,7 +284,8 @@ void MSchedGraph::addBranchEdges() { //See if there is a path first instruction to the branches, if so, add an //iteration dependence between that node and the branch std::set > newEdges; - for(MSchedGraph::iterator I = GraphMap.begin(), E = GraphMap.end(); I != E; ++I) { + for(MSchedGraph::iterator I = GraphMap.begin(), E = GraphMap.end(); + I != E; ++I) { std::set visited; hasPath((I->second), visited, branches, (I->second), newEdges); } @@ -275,7 +322,8 @@ void MSchedGraph::addBranchEdges() { //Add edges between the nodes void MSchedGraph::buildNodesAndEdges(std::map &ignoreInstrs, DependenceAnalyzer &DA, - std::map &machineTollvm) { + std::map &machineTollvm) { + //Get Machine target information for calculating latency const TargetInstrInfo *MTI = Target.getInstrInfo(); @@ -289,7 +337,8 @@ void MSchedGraph::buildNodesAndEdges(std::map &ig unsigned index = 0; //Loop over instructions in MBB and add nodes and edges - for (MachineBasicBlock::const_iterator MI = BB->begin(), e = BB->end(); MI != e; ++MI) { + for (MachineBasicBlock::const_iterator MI = BB->begin(), e = BB->end(); + MI != e; ++MI) { //Ignore indvar instructions if(ignoreInstrs.count(MI)) { @@ -329,11 +378,13 @@ void MSchedGraph::buildNodesAndEdges(std::map &ig isBranch = true; //Node is created and added to the graph automatically - MSchedGraphNode *node = new MSchedGraphNode(MI, this, index, delay, isBranch); + MSchedGraphNode *node = new MSchedGraphNode(MI, this, index, delay, + isBranch); DEBUG(std::cerr << "Created Node: " << *node << "\n"); - //Check OpCode to keep track of memory operations to add memory dependencies later. + //Check OpCode to keep track of memory operations to add memory + //dependencies later. if(MTI->isLoad(opCode) || MTI->isStore(opCode)) memInstructions.push_back(node); @@ -359,7 +410,8 @@ void MSchedGraph::buildNodesAndEdges(std::map &ig //Add virtual registers dependencies //Check if any exist in the value map already and create dependencies //between them. - if(mOp.getType() == MachineOperand::MO_VirtualRegister || mOp.getType() == MachineOperand::MO_CCRegister) { + if(mOp.getType() == MachineOperand::MO_VirtualRegister + || mOp.getType() == MachineOperand::MO_CCRegister) { //Make sure virtual register value is not null assert((mOp.getVRegValue() != NULL) && "Null value is defined"); @@ -395,9 +447,11 @@ void MSchedGraph::buildNodesAndEdges(std::map &ig ++index; } - //Loop over LLVM BB, examine phi instructions, and add them to our phiInstr list to process + //Loop over LLVM BB, examine phi instructions, and add them to our + //phiInstr list to process const BasicBlock *llvm_bb = BB->getBasicBlock(); - for(BasicBlock::const_iterator I = llvm_bb->begin(), E = llvm_bb->end(); I != E; ++I) { + for(BasicBlock::const_iterator I = llvm_bb->begin(), E = llvm_bb->end(); + I != E; ++I) { if(const PHINode *PN = dyn_cast(I)) { MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(PN); for (unsigned j = 0; j < tempMvec.size(); j++) { @@ -414,7 +468,8 @@ void MSchedGraph::buildNodesAndEdges(std::map &ig addMachRegEdges(regNumtoNodeMap); //Finally deal with PHI Nodes and Value* - for(std::vector::iterator I = phiInstrs.begin(), E = phiInstrs.end(); I != E; ++I) { + for(std::vector::iterator I = phiInstrs.begin(), + E = phiInstrs.end(); I != E; ++I) { //Get Node for this instruction std::map::iterator X; @@ -431,7 +486,8 @@ void MSchedGraph::buildNodesAndEdges(std::map &ig for(unsigned i=0; i < (*I)->getNumOperands(); ++i) { //Get Operand const MachineOperand &mOp = (*I)->getOperand(i); - if((mOp.getType() == MachineOperand::MO_VirtualRegister || mOp.getType() == MachineOperand::MO_CCRegister) && mOp.isUse()) { + if((mOp.getType() == MachineOperand::MO_VirtualRegister + || mOp.getType() == MachineOperand::MO_CCRegister) && mOp.isUse()) { //find the value in the map if (const Value* srcI = mOp.getVRegValue()) { @@ -444,7 +500,8 @@ void MSchedGraph::buildNodesAndEdges(std::map &ig //those instructions //to this one we are processing if(V != valuetoNodeMap.end()) { - addValueEdges(V->second, node, mOp.isUse(), mOp.isDef(), phiInstrs, 1); + addValueEdges(V->second, node, mOp.isUse(), mOp.isDef(), + phiInstrs, 1); } } } @@ -496,7 +553,8 @@ void MSchedGraph::addMachRegEdges(std::map >& //Loop over all machine registers in the map, and add dependencies //between the instructions that use it typedef std::map > regNodeMap; - for(regNodeMap::iterator I = regNumtoNodeMap.begin(); I != regNumtoNodeMap.end(); ++I) { + for(regNodeMap::iterator I = regNumtoNodeMap.begin(); + I != regNumtoNodeMap.end(); ++I) { //Get the register number int regNum = (*I).first; @@ -527,24 +585,29 @@ void MSchedGraph::addMachRegEdges(std::map >& if(Nodes[j].second->getInst()->getOperand(Nodes[j].first).isDef()) { //Src only uses the register (read) if(srcIsUse) - srcNode->addOutEdge(Nodes[j].second, MSchedGraphEdge::MachineRegister, + srcNode->addOutEdge(Nodes[j].second, + MSchedGraphEdge::MachineRegister, MSchedGraphEdge::AntiDep); else if(srcIsUseandDef) { - srcNode->addOutEdge(Nodes[j].second, MSchedGraphEdge::MachineRegister, + srcNode->addOutEdge(Nodes[j].second, + MSchedGraphEdge::MachineRegister, MSchedGraphEdge::AntiDep); - - srcNode->addOutEdge(Nodes[j].second, MSchedGraphEdge::MachineRegister, + + srcNode->addOutEdge(Nodes[j].second, + MSchedGraphEdge::MachineRegister, MSchedGraphEdge::OutputDep); } else - srcNode->addOutEdge(Nodes[j].second, MSchedGraphEdge::MachineRegister, + srcNode->addOutEdge(Nodes[j].second, + MSchedGraphEdge::MachineRegister, MSchedGraphEdge::OutputDep); } //Dest node is a read else { if(!srcIsUse || srcIsUseandDef) - srcNode->addOutEdge(Nodes[j].second, MSchedGraphEdge::MachineRegister, + srcNode->addOutEdge(Nodes[j].second, + MSchedGraphEdge::MachineRegister, MSchedGraphEdge::TrueDep); } @@ -557,25 +620,29 @@ void MSchedGraph::addMachRegEdges(std::map >& if(Nodes[j].second->getInst()->getOperand(Nodes[j].first).isDef()) { //Src only uses the register (read) if(srcIsUse) - srcNode->addOutEdge(Nodes[j].second, MSchedGraphEdge::MachineRegister, - MSchedGraphEdge::AntiDep, 1); - + srcNode->addOutEdge(Nodes[j].second, + MSchedGraphEdge::MachineRegister, + MSchedGraphEdge::AntiDep, 1); else if(srcIsUseandDef) { - srcNode->addOutEdge(Nodes[j].second, MSchedGraphEdge::MachineRegister, - MSchedGraphEdge::AntiDep, 1); - - srcNode->addOutEdge(Nodes[j].second, MSchedGraphEdge::MachineRegister, - MSchedGraphEdge::OutputDep, 1); + srcNode->addOutEdge(Nodes[j].second, + MSchedGraphEdge::MachineRegister, + MSchedGraphEdge::AntiDep, 1); + + srcNode->addOutEdge(Nodes[j].second, + MSchedGraphEdge::MachineRegister, + MSchedGraphEdge::OutputDep, 1); } else - srcNode->addOutEdge(Nodes[j].second, MSchedGraphEdge::MachineRegister, - MSchedGraphEdge::OutputDep, 1); + srcNode->addOutEdge(Nodes[j].second, + MSchedGraphEdge::MachineRegister, + MSchedGraphEdge::OutputDep, 1); } //Dest node is a read else { if(!srcIsUse || srcIsUseandDef) - srcNode->addOutEdge(Nodes[j].second, MSchedGraphEdge::MachineRegister, - MSchedGraphEdge::TrueDep,1 ); + srcNode->addOutEdge(Nodes[j].second, + MSchedGraphEdge::MachineRegister, + MSchedGraphEdge::TrueDep,1 ); } @@ -589,8 +656,9 @@ void MSchedGraph::addMachRegEdges(std::map >& //Add edges between all loads and stores //Can be less strict with alias analysis and data dependence analysis. -void MSchedGraph::addMemEdges(const std::vector& memInst, DependenceAnalyzer &DA, - std::map &machineTollvm) { +void MSchedGraph::addMemEdges(const std::vector& memInst, + DependenceAnalyzer &DA, + std::map &machineTollvm) { //Get Target machine instruction info const TargetInstrInfo *TMI = Target.getInstrInfo(); @@ -603,68 +671,73 @@ void MSchedGraph::addMemEdges(const std::vector& memInst, Depe //Get the machine opCode to determine type of memory instruction MachineOpCode srcNodeOpCode = srcInst->getOpcode(); + + //All instructions after this one in execution order have an + //iteration delay of 0 + for(unsigned destIndex = 0; destIndex < memInst.size(); ++destIndex) { - //All instructions after this one in execution order have an iteration delay of 0 - for(unsigned destIndex = srcIndex + 1; destIndex < memInst.size(); ++destIndex) { + //No self loops + if(destIndex == srcIndex) + continue; MachineInstr *destInst = (MachineInstr*) memInst[destIndex]->getInst(); DEBUG(std::cerr << "MInst1: " << *srcInst << "\n"); - DEBUG(std::cerr << "Inst1: " << *machineTollvm[srcInst] << "\n"); DEBUG(std::cerr << "MInst2: " << *destInst << "\n"); - DEBUG(std::cerr << "Inst2: " << *machineTollvm[destInst] << "\n"); - - DependenceResult dr = DA.getDependenceInfo(machineTollvm[srcInst], machineTollvm[destInst]); - - for(std::vector::iterator d = dr.dependences.begin(), de = dr.dependences.end(); - d != de; ++d) { - //Add edge from load to store - memInst[srcIndex]->addOutEdge(memInst[destIndex], - MSchedGraphEdge::MemoryDep, - d->getDepType(), d->getIteDiff()); - + + //Assuming instructions without corresponding llvm instructions + //are from constant pools. + if (!machineTollvm.count(srcInst) || !machineTollvm.count(destInst)) + continue; + + bool useDepAnalyzer = true; + + //Some machine loads and stores are generated by casts, so be + //conservative and always add deps + Instruction *srcLLVM = machineTollvm[srcInst]; + Instruction *destLLVM = machineTollvm[destInst]; + if(!isa(srcLLVM) + && !isa(srcLLVM)) { + if(isa(srcLLVM)) { + if(isa(srcLLVM->getOperand(0)) || isa(srcLLVM->getOperand(1))) + continue; + } + useDepAnalyzer = false; + } + if(!isa(destLLVM) + && !isa(destLLVM)) { + if(isa(destLLVM)) { + if(isa(destLLVM->getOperand(0)) || isa(destLLVM->getOperand(1))) + continue; + } + useDepAnalyzer = false; } - } - - //All instructions before the src in execution order have an iteration delay of 1 - for(unsigned destIndex = 0; destIndex < srcIndex; ++destIndex) { - - MachineInstr *destInst = (MachineInstr*) memInst[destIndex]->getInst(); - bool malias = false; - - //source is a Load, so add anti-dependencies (store after load) - if(TMI->isLoad(srcNodeOpCode)) { + //Use dep analysis when we have corresponding llvm loads/stores + if(useDepAnalyzer) { + bool srcBeforeDest = true; + if(destIndex < srcIndex) + srcBeforeDest = false; - //Get the Value* that we are reading from the load, always the first op - const MachineOperand &mOp = srcInst->getOperand(0); - const MachineOperand &mOp2 = destInst->getOperand(0); - - if(mOp.hasAllocatedReg()) - if(mOp.getReg() == SparcV9::g0) - continue; - else - malias = true; - if(mOp2.hasAllocatedReg()) - if(mOp2.getReg() == SparcV9::g0) - continue; - else - malias = true; + DependenceResult dr = DA.getDependenceInfo(machineTollvm[srcInst], + machineTollvm[destInst], + srcBeforeDest); - //Only add the edge if we can't verify that they do not alias - /*if(AA.alias(mOp2.getVRegValue(), - (unsigned)TD.getTypeSize(mOp2.getVRegValue()->getType()), - mOp.getVRegValue(), - (unsigned)TD.getTypeSize(mOp.getVRegValue()->getType())) - != AliasAnalysis::NoAlias) {*/ - if(TMI->isStore(memInst[destIndex]->getInst()->getOpcode())) - memInst[srcIndex]->addOutEdge(memInst[destIndex], - MSchedGraphEdge::MemoryDep, - MSchedGraphEdge::AntiDep, 1); - //} + for(std::vector::iterator d = dr.dependences.begin(), + de = dr.dependences.end(); d != de; ++d) { + //Add edge from load to store + memInst[srcIndex]->addOutEdge(memInst[destIndex], + MSchedGraphEdge::MemoryDep, + d->getDepType(), d->getIteDiff()); + + } } - if(TMI->isStore(srcNodeOpCode)) { - + //Otherwise, we can not do any further analysis and must make a dependence + else { + + //Get the machine opCode to determine type of memory instruction + MachineOpCode destNodeOpCode = destInst->getOpcode(); + //Get the Value* that we are reading from the load, always the first op const MachineOperand &mOp = srcInst->getOperand(0); const MachineOperand &mOp2 = destInst->getOperand(0); @@ -672,33 +745,31 @@ void MSchedGraph::addMemEdges(const std::vector& memInst, Depe if(mOp.hasAllocatedReg()) if(mOp.getReg() == SparcV9::g0) continue; - else - malias = true; if(mOp2.hasAllocatedReg()) if(mOp2.getReg() == SparcV9::g0) continue; + + DEBUG(std::cerr << "Adding dependence for machine instructions\n"); + //Load-Store deps + if(TMI->isLoad(srcNodeOpCode)) { + + if(TMI->isStore(destNodeOpCode)) + memInst[srcIndex]->addOutEdge(memInst[destIndex], + MSchedGraphEdge::MemoryDep, + MSchedGraphEdge::AntiDep, 0); + } + else if(TMI->isStore(srcNodeOpCode)) { + if(TMI->isStore(destNodeOpCode)) + memInst[srcIndex]->addOutEdge(memInst[destIndex], + MSchedGraphEdge::MemoryDep, + MSchedGraphEdge::OutputDep, 0); + else - malias = true; - - //Only add the edge if we can't verify that they do not alias - /*if(AA.alias(mOp2.getVRegValue(), - (unsigned)TD.getTypeSize(mOp2.getVRegValue()->getType()), - mOp.getVRegValue(), - (unsigned)TD.getTypeSize(mOp.getVRegValue()->getType())) - != AliasAnalysis::NoAlias) {*/ - - if(TMI->isStore(memInst[destIndex]->getInst()->getOpcode())) - memInst[srcIndex]->addOutEdge(memInst[destIndex], - MSchedGraphEdge::MemoryDep, - MSchedGraphEdge::OutputDep, 1); - else - memInst[srcIndex]->addOutEdge(memInst[destIndex], - MSchedGraphEdge::MemoryDep, - MSchedGraphEdge::TrueDep, 1); - //} + memInst[srcIndex]->addOutEdge(memInst[destIndex], + MSchedGraphEdge::MemoryDep, + MSchedGraphEdge::TrueDep, 0); + } } - } - } } diff --git a/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.h b/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.h index 12d02d0e279..b0b6e79ed51 100644 --- a/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.h +++ b/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.h @@ -258,6 +258,9 @@ namespace llvm { //Copy constructor with maps to link old nodes to new nodes MSchedGraph(const MSchedGraph &G, std::map &newNodes); + + //Print graph + void print(std::ostream &os) const; //Deconstructor! ~MSchedGraph(); @@ -265,6 +268,7 @@ namespace llvm { //Add or delete nodes from the Graph void addNode(const MachineInstr* MI, MSchedGraphNode *node); void deleteNode(MSchedGraphNode *node); + int totalDelay(); //iterators typedef std::map::iterator iterator; diff --git a/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.cpp b/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.cpp index 4c0e449513f..e5b8d3c53d3 100644 --- a/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.cpp +++ b/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.cpp @@ -74,12 +74,23 @@ static void WriteGraphToFile(std::ostream &O, const std::string &GraphName, //Graph Traits for printing out the dependence graph namespace llvm { + + //Loop statistics Statistic<> ValidLoops("modulosched-validLoops", "Number of candidate loops modulo-scheduled"); - Statistic<> MSLoops("modulosched-schedLoops", "Number of loops successfully modulo-scheduled"); - Statistic<> IncreasedII("modulosched-increasedII", "Number of times we had to increase II"); + Statistic<> JumboBB("modulosched-jumboBB", "Basic Blocks with more then 100 instructions"); + Statistic<> LoopsWithCalls("modulosched-loopCalls", "Loops with calls"); + Statistic<> LoopsWithCondMov("modulosched-loopCondMov", "Loops with conditional moves"); + Statistic<> InvalidLoops("modulosched-invalidLoops", "Loops with unknown trip counts or loop invariant trip counts"); Statistic<> SingleBBLoops("modulosched-singeBBLoops", "Number of single basic block loops"); + + //Scheduling Statistics + Statistic<> MSLoops("modulosched-schedLoops", "Number of loops successfully modulo-scheduled"); Statistic<> NoSched("modulosched-noSched", "No schedule"); Statistic<> SameStage("modulosched-sameStage", "Max stage is 0"); + Statistic<> ResourceConstraint("modulosched-resourceConstraint", "Loops constrained by resources"); + Statistic<> RecurrenceConstraint("modulosched-recurrenceConstraint", "Loops constrained by recurrences"); + Statistic<> FinalIISum("modulosched-finalIISum", "Sum of all final II"); + Statistic<> IISum("modulosched-IISum", "Sum of all theoretical II"); template<> struct DOTGraphTraits : public DefaultDOTGraphTraits { @@ -142,7 +153,7 @@ namespace llvm { /// 3) Scheduling /// bool ModuloSchedulingPass::runOnFunction(Function &F) { - alarm(300); + alarm(100); bool Changed = false; int numMS = 0; @@ -160,9 +171,14 @@ bool ModuloSchedulingPass::runOnFunction(Function &F) { //Iterate over BasicBlocks and put them into our worklist if they are valid for (MachineFunction::iterator BI = MF.begin(); BI != MF.end(); ++BI) - if(MachineBBisValid(BI)) { - Worklist.push_back(&*BI); - ++ValidLoops; + if(MachineBBisValid(BI)) { + if(BI->size() < 100) { + Worklist.push_back(&*BI); + ++ValidLoops; + } + else + ++JumboBB; + std::cerr << "BB Size: " << BI->size() << "\n"; } defaultInst = 0; @@ -174,6 +190,7 @@ bool ModuloSchedulingPass::runOnFunction(Function &F) { BE = Worklist.end(); BI != BE; ++BI) { //Print out BB for debugging + DEBUG(std::cerr << "BB Size: " << (*BI)->size() << "\n"); DEBUG(std::cerr << "ModuloScheduling BB: \n"; (*BI)->print(std::cerr)); //Print out LLVM BB @@ -195,6 +212,7 @@ bool ModuloSchedulingPass::runOnFunction(Function &F) { //Write Graph out to file DEBUG(WriteGraphToFile(std::cerr, F.getName(), MSG)); + DEBUG(MSG->print(std::cerr)); //Calculate Resource II int ResMII = calculateResMII(*BI); @@ -204,11 +222,15 @@ bool ModuloSchedulingPass::runOnFunction(Function &F) { DEBUG(std::cerr << "Number of reccurrences found: " << recurrenceList.size() << "\n"); - - - //Our starting initiation interval is the maximum of RecMII and ResMII + if(RecMII < ResMII) + ++RecurrenceConstraint; + else + ++ResourceConstraint; + II = std::max(RecMII, ResMII); + int mII = II; + IISum += mII; //Print out II, RecMII, and ResMII DEBUG(std::cerr << "II starts out as " << II << " ( RecMII=" << RecMII << " and ResMII=" << ResMII << ")\n"); @@ -252,7 +274,7 @@ bool ModuloSchedulingPass::runOnFunction(Function &F) { }); //Finally schedule nodes - bool haveSched = computeSchedule(*BI); + bool haveSched = computeSchedule(*BI, MSG); //Print out final schedule DEBUG(schedule.print(std::cerr)); @@ -363,9 +385,11 @@ bool ModuloSchedulingPass::MachineBBisValid(const MachineBasicBlock *BI) { MachineOpCode OC = I->getOpcode(); //Look for calls - if(TMI->isCall(OC)) + if(TMI->isCall(OC)) { + ++LoopsWithCalls; return false; - + } + //Look for conditional move if(OC == V9::MOVRZr || OC == V9::MOVRZi || OC == V9::MOVRLEZr || OC == V9::MOVRLEZi || OC == V9::MOVRLZr || OC == V9::MOVRLZi || OC == V9::MOVRNZr || OC == V9::MOVRNZi @@ -373,8 +397,10 @@ bool ModuloSchedulingPass::MachineBBisValid(const MachineBasicBlock *BI) { || OC == V9::MOVRGEZi || OC == V9::MOVLEr || OC == V9::MOVLEi || OC == V9::MOVLEUr || OC == V9::MOVLEUi || OC == V9::MOVFLEr || OC == V9::MOVFLEi || OC == V9::MOVNEr || OC == V9::MOVNEi || OC == V9::MOVNEGr || OC == V9::MOVNEGi - || OC == V9::MOVFNEr || OC == V9::MOVFNEi) + || OC == V9::MOVFNEr || OC == V9::MOVFNEi) { + ++LoopsWithCondMov; return false; + } indexMap[I] = count; @@ -406,14 +432,19 @@ bool ModuloSchedulingPass::MachineBBisValid(const MachineBasicBlock *BI) { if(Instruction *I = dyn_cast(cond)) if(I->getParent() == BB) { - if (!assocIndVar(I, indVar, stack, BB)) + if (!assocIndVar(I, indVar, stack, BB)) { + ++InvalidLoops; return false; + } } - else + else { + ++InvalidLoops; return false; - else + } + else { + ++InvalidLoops; return false; - + } //The indVar set must be >= 3 instructions for this loop to match (FIX ME!) if(indVar.size() < 3 ) return false; @@ -523,7 +554,7 @@ int ModuloSchedulingPass::calculateResMII(const MachineBasicBlock *BI) { //Loop over resources in each cycle and increments their usage count for(unsigned i=0; i < resources.size(); ++i) for(unsigned j=0; j < resources[i].size(); ++j) { - if( resourceUsageCount.find(resources[i][j]) == resourceUsageCount.end()) { + if(!resourceUsageCount.count(resources[i][j])) { resourceUsageCount[resources[i][j]] = 1; } else { @@ -913,67 +944,8 @@ bool ModuloSchedulingPass::circuit(MSchedGraphNode *v, std::vector::iterator I = AkV.begin(), E = AkV.end(); I != E; ++I) { if(*I == s) { //We have a circuit, so add it to our list - - std::vector recc; - //Dump recurrence for now - DEBUG(std::cerr << "Starting Recc\n"); - - int totalDelay = 0; - int totalDistance = 0; - MSchedGraphNode *lastN = 0; - MSchedGraphNode *start = 0; - MSchedGraphNode *end = 0; - - //Loop over recurrence, get delay and distance - for(std::vector::iterator N = stack.begin(), NE = stack.end(); N != NE; ++N) { - totalDelay += (*N)->getLatency(); - if(lastN) { - int iteDiff = (*N)->getInEdge(lastN).getIteDiff(); - totalDistance += iteDiff; - - if(iteDiff > 0) { - start = lastN; - end = *N; - } - } - //Get the original node - lastN = *N; - recc.push_back(newNodes[*N]); - - DEBUG(std::cerr << *lastN << "\n"); - } - - //Get the loop edge - totalDistance += lastN->getIteDiff(*stack.begin()); - - DEBUG(std::cerr << "End Recc\n"); + addRecc(stack, newNodes); f = true; - CircCount++; - - if(start && end) { - //Insert reccurrence into the list - DEBUG(std::cerr << "Ignore Edge from!!: " << *start << " to " << *end << "\n"); - edgesToIgnore.insert(std::make_pair(newNodes[start], (newNodes[end])->getInEdgeNum(newNodes[start]))); - } - else { - //Insert reccurrence into the list - DEBUG(std::cerr << "Ignore Edge from: " << *lastN << " to " << **stack.begin() << "\n"); - edgesToIgnore.insert(std::make_pair(newNodes[lastN], newNodes[(*stack.begin())]->getInEdgeNum(newNodes[lastN]))); - - } - //Adjust II until we get close to the inequality delay - II*distance <= 0 - int RecMII = II; //Starting value - int value = totalDelay-(RecMII * totalDistance); - int lastII = II; - while(value <= 0) { - - lastII = RecMII; - RecMII--; - value = totalDelay-(RecMII * totalDistance); - } - - recurrenceList.insert(std::make_pair(lastII, recc)); - } else if(!blocked.count(*I)) { if(circuit(*I, stack, blocked, SCC, s, B, II, newNodes)) @@ -1000,6 +972,70 @@ bool ModuloSchedulingPass::circuit(MSchedGraphNode *v, std::vector &stack, std::map &newNodes) { + std::vector recc; + //Dump recurrence for now + DEBUG(std::cerr << "Starting Recc\n"); + + int totalDelay = 0; + int totalDistance = 0; + MSchedGraphNode *lastN = 0; + MSchedGraphNode *start = 0; + MSchedGraphNode *end = 0; + + //Loop over recurrence, get delay and distance + for(std::vector::iterator N = stack.begin(), NE = stack.end(); N != NE; ++N) { + DEBUG(std::cerr << **N << "\n"); + totalDelay += (*N)->getLatency(); + if(lastN) { + int iteDiff = (*N)->getInEdge(lastN).getIteDiff(); + totalDistance += iteDiff; + + if(iteDiff > 0) { + start = lastN; + end = *N; + } + } + //Get the original node + lastN = *N; + recc.push_back(newNodes[*N]); + + + } + + //Get the loop edge + totalDistance += lastN->getIteDiff(*stack.begin()); + + DEBUG(std::cerr << "End Recc\n"); + CircCount++; + + if(start && end) { + //Insert reccurrence into the list + DEBUG(std::cerr << "Ignore Edge from!!: " << *start << " to " << *end << "\n"); + edgesToIgnore.insert(std::make_pair(newNodes[start], (newNodes[end])->getInEdgeNum(newNodes[start]))); + } + else { + //Insert reccurrence into the list + DEBUG(std::cerr << "Ignore Edge from: " << *lastN << " to " << **stack.begin() << "\n"); + edgesToIgnore.insert(std::make_pair(newNodes[lastN], newNodes[(*stack.begin())]->getInEdgeNum(newNodes[lastN]))); + + } + //Adjust II until we get close to the inequality delay - II*distance <= 0 + int RecMII = II; //Starting value + int value = totalDelay-(RecMII * totalDistance); + int lastII = II; + while(value < 0) { + + lastII = RecMII; + RecMII--; + value = totalDelay-(RecMII * totalDistance); + } + + recurrenceList.insert(std::make_pair(lastII, recc)); + +} + + void ModuloSchedulingPass::findAllCircuits(MSchedGraph *g, int II) { CircCount = 0; @@ -1086,12 +1122,13 @@ void ModuloSchedulingPass::findAllCircuits(MSchedGraph *g, int II) { if(Vk.size() > 1) { circuit(s, stack, blocked, Vk, s, B, II, newNodes); + //Delete nodes from the graph //Find all nodes up to s and delete them std::vector nodesToRemove; nodesToRemove.push_back(s); for(MSchedGraph::iterator N = MSG->begin(), NE = MSG->end(); N != NE; ++N) { if(N->second < s ) - nodesToRemove.push_back(N->second); + nodesToRemove.push_back(N->second); } for(std::vector::iterator N = nodesToRemove.begin(), NE = nodesToRemove.end(); N != NE; ++N) { DEBUG(std::cerr << "Deleting Node: " << **N << "\n"); @@ -1100,7 +1137,7 @@ void ModuloSchedulingPass::findAllCircuits(MSchedGraph *g, int II) { } else break; - } + } DEBUG(std::cerr << "Num Circuits found: " << CircCount << "\n"); } @@ -1253,17 +1290,21 @@ void ModuloSchedulingPass::pathToRecc(MSchedGraphNode *node, void ModuloSchedulingPass::computePartialOrder() { TIME_REGION(X, "calculatePartialOrder"); + + DEBUG(std::cerr << "Computing Partial Order\n"); - //Only push BA branches onto the final node order, we put other branches after it - //FIXME: Should we really be pushing branches on it a specific order instead of relying - //on BA being there? - std::vector branches; + //Only push BA branches onto the final node order, we put other + //branches after it FIXME: Should we really be pushing branches on + //it a specific order instead of relying on BA being there? - //Steps to add a recurrence to the partial order - // 1) Find reccurrence with the highest RecMII. Add it to the partial order. - // 2) For each recurrence with decreasing RecMII, add it to the partial order along with - // any nodes that connect this recurrence to recurrences already in the partial order - for(std::set > >::reverse_iterator + std::vector branches; + + //Steps to add a recurrence to the partial order 1) Find reccurrence + //with the highest RecMII. Add it to the partial order. 2) For each + //recurrence with decreasing RecMII, add it to the partial order + //along with any nodes that connect this recurrence to recurrences + //already in the partial order + for(std::set > >::reverse_iterator I = recurrenceList.rbegin(), E=recurrenceList.rend(); I !=E; ++I) { std::set new_recurrence; @@ -1296,6 +1337,10 @@ void ModuloSchedulingPass::computePartialOrder() { std::vector path; std::set nodesToAdd; + //Dump recc we are dealing with (minus nodes already in PO) + DEBUG(std::cerr << "Recc: "); + DEBUG(for(std::set::iterator R = new_recurrence.begin(), RE = new_recurrence.end(); R != RE; ++R) { std::cerr << **R ; }); + //Add nodes that connect this recurrence to recurrences in the partial path for(std::set::iterator N = new_recurrence.begin(), NE = new_recurrence.end(); N != NE; ++N) @@ -1318,6 +1363,15 @@ void ModuloSchedulingPass::computePartialOrder() { partialOrder.push_back(new_recurrence); + + //Dump out partial order + DEBUG(for(std::vector >::iterator I = partialOrder.begin(), + E = partialOrder.end(); I !=E; ++I) { + std::cerr << "Start set in PO\n"; + for(std::set::iterator J = I->begin(), JE = I->end(); J != JE; ++J) + std::cerr << "PO:" << **J << "\n"; + }); + } } @@ -1649,7 +1703,7 @@ void ModuloSchedulingPass::orderNodes() { //return FinalNodeOrder; } -bool ModuloSchedulingPass::computeSchedule(const MachineBasicBlock *BB) { +bool ModuloSchedulingPass::computeSchedule(const MachineBasicBlock *BB, MSchedGraph *MSG) { TIME_REGION(X, "computeSchedule"); @@ -1657,7 +1711,7 @@ bool ModuloSchedulingPass::computeSchedule(const MachineBasicBlock *BB) { //FIXME: Should be set to max II of the original loop //Cap II in order to prevent infinite loop - int capII = 100; + int capII = MSG->totalDelay(); while(!success) { @@ -1768,8 +1822,7 @@ bool ModuloSchedulingPass::computeSchedule(const MachineBasicBlock *BB) { success = scheduleNode(*I, EarlyStart, EarlyStart + II - 1); if(!success) { - ++IncreasedII; - ++II; + ++II; schedule.clear(); break; } @@ -1781,11 +1834,11 @@ bool ModuloSchedulingPass::computeSchedule(const MachineBasicBlock *BB) { success = schedule.constructKernel(II, branches, indVarInstrs[BB]); DEBUG(std::cerr << "Done Constructing Schedule Kernel\n"); if(!success) { - ++IncreasedII; ++II; schedule.clear(); } DEBUG(std::cerr << "Final II: " << II << "\n"); + FinalIISum += II; } if(II >= capII) { diff --git a/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.h b/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.h index 9a7bfe78019..e68948479e1 100644 --- a/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.h +++ b/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.h @@ -19,6 +19,8 @@ #include "llvm/Pass.h" #include "DependenceAnalyzer.h" #include "llvm/Target/TargetData.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" #include namespace llvm { @@ -107,7 +109,9 @@ namespace llvm { void unblock(MSchedGraphNode *u, std::set &blocked, std::map > &B); - void searchPath(MSchedGraphNode *node, + void addRecc(std::vector &stack, std::map &newNodes); + + void searchPath(MSchedGraphNode *node, std::vector &path, std::set &nodesToAdd); @@ -117,8 +121,8 @@ namespace llvm { void computePartialOrder(); - bool computeSchedule(const MachineBasicBlock *BB); - bool scheduleNode(MSchedGraphNode *node, + bool computeSchedule(const MachineBasicBlock *BB, MSchedGraph *MSG); + bool scheduleNode(MSchedGraphNode *node, int start, int end); void predIntersect(std::set &CurrentSet, std::set &IntersectResult); @@ -148,6 +152,12 @@ namespace llvm { // getAnalysisUsage virtual void getAnalysisUsage(AnalysisUsage &AU) const { + /// HACK: We don't actually need loopinfo or scev, but we have + /// to say we do so that the pass manager does not delete it + /// before we run. + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); } -- 2.34.1