1 //===- ModuloSchedGraph.cpp - Graph datastructure for Modulo Scheduling ---===//
4 //===----------------------------------------------------------------------===//
6 #include "llvm/CodeGen/InstrSelection.h"
7 #include "llvm/Function.h"
8 #include "llvm/Instructions.h"
10 #include "llvm/CodeGen/MachineCodeForInstruction.h"
11 #include "llvm/CodeGen/MachineInstr.h"
12 #include "llvm/Target/TargetSchedInfo.h"
13 #include "Support/StringExtras.h"
14 #include "Support/STLExtras.h"
15 #include "Support/hash_map"
16 #include "Support/Statistic.h"
17 #include "ModuloScheduling.h"
18 #include "ModuloSchedGraph.h"
23 // FIXME: Should be using #include <cmath>
30 //*********************** Internal Data Structures *************************/
32 // The following two types need to be classes, not typedefs, so we can use
33 // opaque declarations in SchedGraph.h
35 struct RefVec:public std::vector<std::pair<ModuloSchedGraphNode*,int> > {
36 typedef std::vector<std::pair<ModuloSchedGraphNode*,
37 int> >::iterator iterator;
38 typedef std::vector<std::pair<ModuloSchedGraphNode*,
39 int> >::const_iterator const_iterator;
42 struct RegToRefVecMap:public hash_map<int,RefVec> {
43 typedef hash_map<int,RefVec>::iterator iterator;
44 typedef hash_map<int,RefVec>::const_iterator const_iterator;
47 struct ValueToDefVecMap:public hash_map<const Instruction*,RefVec> {
48 typedef hash_map<const Instruction*, RefVec>::iterator iterator;
49 typedef hash_map<const Instruction*,
50 RefVec>::const_iterator const_iterator;
53 // class Modulo SchedGraphNode
56 ModuloSchedGraphNode::ModuloSchedGraphNode(unsigned int _nodeId,
57 const BasicBlock * _bb,
58 const Instruction * _inst,
60 const TargetMachine & target)
61 :SchedGraphNodeCommon(_nodeId, _bb, indexInBB), inst(_inst)
64 //FIXME: find the latency
65 //currently setthe latency to zero
70 //class ModuloScheGraph
72 void ModuloSchedGraph::addDummyEdges()
74 assert(graphRoot->outEdges.size() == 0);
76 for (const_iterator I = begin(); I != end(); ++I) {
77 ModuloSchedGraphNode *node = (ModuloSchedGraphNode *) ((*I).second);
78 assert(node != graphRoot && node != graphLeaf);
79 if (node->beginInEdges() == node->endInEdges())
80 (void) new SchedGraphEdge(graphRoot, node, SchedGraphEdge::CtrlDep,
81 SchedGraphEdge::NonDataDep, 0);
82 if (node->beginOutEdges() == node->endOutEdges())
83 (void) new SchedGraphEdge(node, graphLeaf, SchedGraphEdge::CtrlDep,
84 SchedGraphEdge::NonDataDep, 0);
88 bool isDefinition(const Instruction *I)
90 //if(TerminatorInst::classof(I)||FreeInst::classof(I) || StoreInst::classof(I) || CallInst::classof(I))
97 void ModuloSchedGraph::addDefUseEdges(const BasicBlock *bb)
99 //collect def instructions, store them in vector
100 // const TargetInstrInfo& mii = target.getInstrInfo();
101 const TargetInstrInfo & mii = target.getInstrInfo();
103 typedef std::vector < ModuloSchedGraphNode * >DefVec;
106 //find those def instructions
107 for (BasicBlock::const_iterator I = bb->begin(), E = bb->end(); I != E; ++I) {
108 if (I->getType() != Type::VoidTy) {
109 defVec.push_back(this->getGraphNodeForInst(I));
113 for (unsigned int i = 0; i < defVec.size(); i++) {
114 for (Value::use_const_iterator I = defVec[i]->getInst()->use_begin();
115 I != defVec[i]->getInst()->use_end(); I++) {
116 //for each use of a def, add a flow edge from the def instruction to the ref instruction
119 const Instruction *value = defVec[i]->getInst();
120 Instruction *inst = (Instruction *) (*I);
121 ModuloSchedGraphNode *node = NULL;
123 for (BasicBlock::const_iterator I = bb->begin(), E = bb->end();
125 if ((const Instruction *) I == inst) {
126 node = (*this)[inst];
130 assert(inst != NULL && " Use not an Instruction ?");
132 if (node == NULL) //inst is not an instruction in this block
135 // Add a flow edge from the def instruction to the ref instruction
137 // self loop will not happen in SSA form
138 assert(defVec[i] != node && "same node?");
140 // This is a true dependence, so the delay is equal to the delay of the
143 MachineCodeForInstruction & tempMvec =
144 MachineCodeForInstruction::get(value);
145 for (unsigned j = 0; j < tempMvec.size(); j++) {
146 MachineInstr *temp = tempMvec[j];
147 //delay +=mii.minLatency(temp->getOpCode());
148 delay = std::max(delay, mii.minLatency(temp->getOpCode()));
151 SchedGraphEdge *trueEdge =
152 new SchedGraphEdge(defVec[i], node, value,
153 SchedGraphEdge::TrueDep, delay);
155 // if the ref instruction is before the def instrution
156 // then the def instruction must be a phi instruction
157 // add an anti-dependence edge to from the ref instruction to the def
159 if (node->getOrigIndexInBB() < defVec[i]->getOrigIndexInBB()) {
160 assert(PHINode::classof(inst)
161 && "the ref instruction befre def is not PHINode?");
162 trueEdge->setIteDiff(1);
171 void ModuloSchedGraph::addCDEdges(const BasicBlock * bb) {
172 // find the last instruction in the basic block
173 // see if it is an branch instruction.
174 // If yes, then add an edge from each node expcept the last node to the last
176 const Instruction *inst = &(bb->back());
177 ModuloSchedGraphNode *lastNode = (*this)[inst];
178 if (TerminatorInst::classof(inst))
179 for (BasicBlock::const_iterator I = bb->begin(), E = bb->end(); I != E;
182 ModuloSchedGraphNode *node = (*this)[I];
184 (void) new SchedGraphEdge(node, lastNode, SchedGraphEdge::CtrlDep,
185 SchedGraphEdge::NonDataDep, 0);
191 static const int SG_LOAD_REF = 0;
192 static const int SG_STORE_REF = 1;
193 static const int SG_CALL_REF = 2;
195 static const unsigned int SG_DepOrderArray[][3] = {
196 {SchedGraphEdge::NonDataDep,
197 SchedGraphEdge::AntiDep,
198 SchedGraphEdge::AntiDep},
199 {SchedGraphEdge::TrueDep,
200 SchedGraphEdge::OutputDep,
201 SchedGraphEdge::TrueDep | SchedGraphEdge::OutputDep},
202 {SchedGraphEdge::TrueDep,
203 SchedGraphEdge::AntiDep | SchedGraphEdge::OutputDep,
204 SchedGraphEdge::TrueDep | SchedGraphEdge::AntiDep
205 | SchedGraphEdge::OutputDep}
209 // Add a dependence edge between every pair of machine load/store/call
210 // instructions, where at least one is a store or a call.
211 // Use latency 1 just to ensure that memory operations are ordered;
212 // latency does not otherwise matter (true dependences enforce that).
214 void ModuloSchedGraph::addMemEdges(const BasicBlock * bb) {
216 std::vector<ModuloSchedGraphNode*> memNodeVec;
218 //construct the memNodeVec
219 for (BasicBlock::const_iterator I = bb->begin(), E = bb->end(); I != E; ++I) {
220 if (LoadInst::classof(I) || StoreInst::classof(I)
221 || CallInst::classof(I)) {
222 ModuloSchedGraphNode *node = (*this)[(const Instruction *) I];
223 memNodeVec.push_back(node);
227 // Instructions in memNodeVec are in execution order within the basic block,
228 // so simply look at all pairs <memNodeVec[i], memNodeVec[j: j > i]>.
230 for (unsigned im = 0, NM = memNodeVec.size(); im < NM; im++) {
231 const Instruction *fromInst = memNodeVec[im]->getInst();
232 int fromType = CallInst::classof(fromInst) ? SG_CALL_REF
233 : LoadInst::classof(fromInst) ? SG_LOAD_REF : SG_STORE_REF;
234 for (unsigned jm = im + 1; jm < NM; jm++) {
235 const Instruction *toInst = memNodeVec[jm]->getInst();
236 int toType = CallInst::classof(toInst) ? SG_CALL_REF
237 : LoadInst::classof(toInst) ? SG_LOAD_REF : SG_STORE_REF;
238 if (fromType != SG_LOAD_REF || toType != SG_LOAD_REF) {
239 (void) new SchedGraphEdge(memNodeVec[im], memNodeVec[jm],
240 SchedGraphEdge::MemoryDep,
241 SG_DepOrderArray[fromType][toType], 1);
243 SchedGraphEdge *edge =
244 new SchedGraphEdge(memNodeVec[jm], memNodeVec[im],
245 SchedGraphEdge::MemoryDep,
246 SG_DepOrderArray[toType][fromType], 1);
256 void ModuloSchedGraph::buildNodesforBB(const TargetMachine &target,
257 const BasicBlock *bb,
258 std::vector<ModuloSchedGraphNode*> &memNode,
259 RegToRefVecMap ®ToRefVecMap,
260 ValueToDefVecMap &valueToDefVecMap)
262 //const TargetInstrInfo& mii=target.getInstrInfo();
264 //Build graph nodes for each LLVM instruction and gather def/use info.
265 //Do both together in a single pass over all machine instructions.
268 for (BasicBlock::const_iterator I = bb->begin(), E = bb->end(); I != E;
270 ModuloSchedGraphNode *node =
271 new ModuloSchedGraphNode(getNumNodes(), bb, I, i, target);
273 this->noteModuloSchedGraphNodeForInst(I, node);
276 //this function finds some info about instruction in basic block for later use
277 //findDefUseInfoAtInstr(target, node,
278 //memNode,regToRefVecMap,valueToDefVecMap);
282 bool ModuloSchedGraph::isLoop(const BasicBlock *bb) {
283 //only if the last instruction in the basicblock is branch instruction and
284 //there is at least an option to branch itself
286 const Instruction *inst = &(bb->back());
287 if (BranchInst::classof(inst)) {
288 for (unsigned i = 0; i < ((BranchInst *) inst)->getNumSuccessors();
290 BasicBlock *sb = ((BranchInst *) inst)->getSuccessor(i);
300 bool ModuloSchedGraph::isLoop() {
301 //only if the last instruction in the basicblock is branch instruction and
302 //there is at least an option to branch itself
304 assert(bbVec.size() == 1 && "only 1 basicblock in a graph");
305 const BasicBlock *bb = bbVec[0];
306 const Instruction *inst = &(bb->back());
307 if (BranchInst::classof(inst))
308 for (unsigned i = 0; i < ((BranchInst *) inst)->getNumSuccessors();
310 BasicBlock *sb = ((BranchInst *) inst)->getSuccessor(i);
319 void ModuloSchedGraph::computeNodeASAP(const BasicBlock *bb) {
321 //FIXME: now assume the only backward edges come from the edges from other
322 //nodes to the phi Node so i will ignore all edges to the phi node; after
323 //this, there shall be no recurrence.
325 unsigned numNodes = bb->size();
326 for (unsigned i = 2; i < numNodes + 2; i++) {
327 ModuloSchedGraphNode *node = getNode(i);
328 node->setPropertyComputed(false);
331 for (unsigned i = 2; i < numNodes + 2; i++) {
332 ModuloSchedGraphNode *node = getNode(i);
334 if (i == 2 || node->getNumInEdges() == 0) {
335 node->setPropertyComputed(true);
338 for (ModuloSchedGraphNode::const_iterator I = node->beginInEdges(), E =
339 node->endInEdges(); I != E; I++) {
340 SchedGraphEdge *edge = *I;
341 ModuloSchedGraphNode *pred =
342 (ModuloSchedGraphNode *) (edge->getSrc());
343 assert(pred->getPropertyComputed()
344 && "pred node property is not computed!");
346 pred->ASAP + edge->getMinDelay() -
347 edge->getIteDiff() * this->MII;
348 node->ASAP = std::max(node->ASAP, temp);
350 node->setPropertyComputed(true);
354 void ModuloSchedGraph::computeNodeALAP(const BasicBlock *bb) {
355 unsigned numNodes = bb->size();
357 for (unsigned i = numNodes + 1; i >= 2; i--) {
358 ModuloSchedGraphNode *node = getNode(i);
359 node->setPropertyComputed(false);
360 //cerr<< " maxASAP= " <<maxASAP<<" node->ASAP= "<< node->ASAP<<"\n";
361 maxASAP = std::max(maxASAP, node->ASAP);
364 //cerr<<"maxASAP is "<<maxASAP<<"\n";
366 for (unsigned i = numNodes + 1; i >= 2; i--) {
367 ModuloSchedGraphNode *node = getNode(i);
368 node->ALAP = maxASAP;
369 for (ModuloSchedGraphNode::const_iterator I =
370 node->beginOutEdges(), E = node->endOutEdges(); I != E; I++) {
371 SchedGraphEdge *edge = *I;
372 ModuloSchedGraphNode *succ =
373 (ModuloSchedGraphNode *) (edge->getSink());
374 if (PHINode::classof(succ->getInst()))
376 assert(succ->getPropertyComputed()
377 && "succ node property is not computed!");
379 succ->ALAP - edge->getMinDelay() +
380 edge->getIteDiff() * this->MII;
381 node->ALAP = std::min(node->ALAP, temp);
383 node->setPropertyComputed(true);
387 void ModuloSchedGraph::computeNodeMov(const BasicBlock *bb)
389 unsigned numNodes = bb->size();
390 for (unsigned i = 2; i < numNodes + 2; i++) {
391 ModuloSchedGraphNode *node = getNode(i);
392 node->mov = node->ALAP - node->ASAP;
393 assert(node->mov >= 0
394 && "move freedom for this node is less than zero? ");
399 void ModuloSchedGraph::computeNodeDepth(const BasicBlock * bb)
402 unsigned numNodes = bb->size();
403 for (unsigned i = 2; i < numNodes + 2; i++) {
404 ModuloSchedGraphNode *node = getNode(i);
405 node->setPropertyComputed(false);
408 for (unsigned i = 2; i < numNodes + 2; i++) {
409 ModuloSchedGraphNode *node = getNode(i);
411 if (i == 2 || node->getNumInEdges() == 0) {
412 node->setPropertyComputed(true);
415 for (ModuloSchedGraphNode::const_iterator I = node->beginInEdges(), E =
416 node->endInEdges(); I != E; I++) {
417 SchedGraphEdge *edge = *I;
418 ModuloSchedGraphNode *pred =
419 (ModuloSchedGraphNode *) (edge->getSrc());
420 assert(pred->getPropertyComputed()
421 && "pred node property is not computed!");
422 int temp = pred->depth + edge->getMinDelay();
423 node->depth = std::max(node->depth, temp);
425 node->setPropertyComputed(true);
431 void ModuloSchedGraph::computeNodeHeight(const BasicBlock *bb)
433 unsigned numNodes = bb->size();
434 for (unsigned i = numNodes + 1; i >= 2; i--) {
435 ModuloSchedGraphNode *node = getNode(i);
436 node->setPropertyComputed(false);
439 for (unsigned i = numNodes + 1; i >= 2; i--) {
440 ModuloSchedGraphNode *node = getNode(i);
442 for (ModuloSchedGraphNode::const_iterator I =
443 node->beginOutEdges(), E = node->endOutEdges(); I != E; ++I) {
444 SchedGraphEdge *edge = *I;
445 ModuloSchedGraphNode *succ =
446 (ModuloSchedGraphNode *) (edge->getSink());
447 if (PHINode::classof(succ->getInst()))
449 assert(succ->getPropertyComputed()
450 && "succ node property is not computed!");
451 node->height = std::max(node->height, succ->height + edge->getMinDelay());
454 node->setPropertyComputed(true);
460 void ModuloSchedGraph::computeNodeProperty(const BasicBlock * bb)
462 //FIXME: now assume the only backward edges come from the edges from other
463 //nodes to the phi Node so i will ignore all edges to the phi node; after
464 //this, there shall be no recurrence.
466 this->computeNodeASAP(bb);
467 this->computeNodeALAP(bb);
468 this->computeNodeMov(bb);
469 this->computeNodeDepth(bb);
470 this->computeNodeHeight(bb);
474 //do not consider the backedge in these two functions:
475 // i.e. don't consider the edge with destination in phi node
476 std::vector<ModuloSchedGraphNode*>
477 ModuloSchedGraph::predSet(std::vector<ModuloSchedGraphNode*> set,
478 unsigned backEdgeSrc,
482 std::vector<ModuloSchedGraphNode*> predS;
483 for (unsigned i = 0; i < set.size(); i++) {
484 ModuloSchedGraphNode *node = set[i];
485 for (ModuloSchedGraphNode::const_iterator I = node->beginInEdges(), E =
486 node->endInEdges(); I != E; I++) {
487 SchedGraphEdge *edge = *I;
488 if (edge->getSrc()->getNodeId() == backEdgeSrc
489 && edge->getSink()->getNodeId() == backEdgeSink)
491 ModuloSchedGraphNode *pred =
492 (ModuloSchedGraphNode *) (edge->getSrc());
493 //if pred is not in the predSet, push it in vector
494 bool alreadyInset = false;
495 for (unsigned j = 0; j < predS.size(); ++j)
496 if (predS[j]->getNodeId() == pred->getNodeId()) {
501 for (unsigned j = 0; j < set.size(); ++j)
502 if (set[j]->getNodeId() == pred->getNodeId()) {
508 predS.push_back(pred);
514 ModuloSchedGraph::NodeVec ModuloSchedGraph::predSet(NodeVec set)
516 //node number increases from 2
517 return predSet(set, 0, 0);
520 std::vector <ModuloSchedGraphNode*>
521 ModuloSchedGraph::predSet(ModuloSchedGraphNode *_node,
522 unsigned backEdgeSrc, unsigned backEdgeSink)
524 std::vector<ModuloSchedGraphNode*> set;
525 set.push_back(_node);
526 return predSet(set, backEdgeSrc, backEdgeSink);
529 std::vector <ModuloSchedGraphNode*>
530 ModuloSchedGraph::predSet(ModuloSchedGraphNode * _node)
532 return predSet(_node, 0, 0);
535 std::vector<ModuloSchedGraphNode*>
536 ModuloSchedGraph::succSet(std::vector<ModuloSchedGraphNode*> set,
537 unsigned src, unsigned sink)
539 std::vector<ModuloSchedGraphNode*> succS;
540 for (unsigned i = 0; i < set.size(); i++) {
541 ModuloSchedGraphNode *node = set[i];
542 for (ModuloSchedGraphNode::const_iterator I =
543 node->beginOutEdges(), E = node->endOutEdges(); I != E; I++) {
544 SchedGraphEdge *edge = *I;
545 if (edge->getSrc()->getNodeId() == src
546 && edge->getSink()->getNodeId() == sink)
548 ModuloSchedGraphNode *succ =
549 (ModuloSchedGraphNode *) (edge->getSink());
550 //if pred is not in the predSet, push it in vector
551 bool alreadyInset = false;
552 for (unsigned j = 0; j < succS.size(); j++)
553 if (succS[j]->getNodeId() == succ->getNodeId()) {
558 for (unsigned j = 0; j < set.size(); j++)
559 if (set[j]->getNodeId() == succ->getNodeId()) {
564 succS.push_back(succ);
570 ModuloSchedGraph::NodeVec ModuloSchedGraph::succSet(NodeVec set)
572 return succSet(set, 0, 0);
576 std::vector<ModuloSchedGraphNode*>
577 ModuloSchedGraph::succSet(ModuloSchedGraphNode *_node,
578 unsigned src, unsigned sink)
580 std::vector<ModuloSchedGraphNode*>set;
581 set.push_back(_node);
582 return succSet(set, src, sink);
585 std::vector<ModuloSchedGraphNode*>
586 ModuloSchedGraph::succSet(ModuloSchedGraphNode * _node)
588 return succSet(_node, 0, 0);
591 SchedGraphEdge *ModuloSchedGraph::getMaxDelayEdge(unsigned srcId,
594 ModuloSchedGraphNode *node = getNode(srcId);
595 SchedGraphEdge *maxDelayEdge = NULL;
597 for (ModuloSchedGraphNode::const_iterator I = node->beginOutEdges(), E =
598 node->endOutEdges(); I != E; I++) {
599 SchedGraphEdge *edge = *I;
600 if (edge->getSink()->getNodeId() == sinkId)
601 if (edge->getMinDelay() > maxDelay) {
603 maxDelay = edge->getMinDelay();
606 assert(maxDelayEdge != NULL && "no edge between the srcId and sinkId?");
610 void ModuloSchedGraph::dumpCircuits()
612 DEBUG(std::cerr << "dumping circuits for graph:\n");
614 while (circuits[++j][0] != 0) {
616 while (circuits[j][++k] != 0)
617 DEBUG(std::cerr << circuits[j][k] << "\t");
618 DEBUG(std::cerr << "\n");
622 void ModuloSchedGraph::dumpSet(std::vector < ModuloSchedGraphNode * >set)
624 for (unsigned i = 0; i < set.size(); i++)
625 DEBUG(std::cerr << set[i]->getNodeId() << "\t");
626 DEBUG(std::cerr << "\n");
629 std::vector<ModuloSchedGraphNode*>
630 ModuloSchedGraph::vectorUnion(std::vector<ModuloSchedGraphNode*> set1,
631 std::vector<ModuloSchedGraphNode*> set2)
633 std::vector<ModuloSchedGraphNode*> unionVec;
634 for (unsigned i = 0; i < set1.size(); i++)
635 unionVec.push_back(set1[i]);
636 for (unsigned j = 0; j < set2.size(); j++) {
638 for (unsigned i = 0; i < unionVec.size(); i++)
639 if (set2[j] == unionVec[i])
642 unionVec.push_back(set2[j]);
647 std::vector<ModuloSchedGraphNode*>
648 ModuloSchedGraph::vectorConj(std::vector<ModuloSchedGraphNode*> set1,
649 std::vector<ModuloSchedGraphNode*> set2)
651 std::vector<ModuloSchedGraphNode*> conjVec;
652 for (unsigned i = 0; i < set1.size(); i++)
653 for (unsigned j = 0; j < set2.size(); j++)
654 if (set1[i] == set2[j])
655 conjVec.push_back(set1[i]);
659 ModuloSchedGraph::NodeVec ModuloSchedGraph::vectorSub(NodeVec set1,
663 for (NodeVec::iterator I = set1.begin(); I != set1.end(); I++) {
665 for (NodeVec::iterator II = set2.begin(); II != set2.end(); II++)
666 if ((*I)->getNodeId() == (*II)->getNodeId()) {
671 newVec.push_back(*I);
676 void ModuloSchedGraph::orderNodes() {
679 std::vector < ModuloSchedGraphNode * >set;
680 const BasicBlock *bb = bbVec[0];
681 unsigned numNodes = bb->size();
683 // first order all the sets
687 while (circuits[++j][0] != 0) {
689 preDelay = totalDelay;
691 while (circuits[j][++k] != 0) {
692 ModuloSchedGraphNode *node = getNode(circuits[j][k]);
695 circuits[j][k + 1] != 0 ? circuits[j][k + 1] : circuits[j][0];
696 SchedGraphEdge *edge = getMaxDelayEdge(circuits[j][k], nextNodeId);
697 totalDelay += edge->getMinDelay();
699 if (preDelay != -1 && totalDelay > preDelay) {
700 // swap circuits[j][] and cuicuits[j-1][]
701 unsigned temp[MAXNODE];
702 for (int k = 0; k < MAXNODE; k++) {
703 temp[k] = circuits[j - 1][k];
704 circuits[j - 1][k] = circuits[j][k];
705 circuits[j][k] = temp[k];
712 // build the first set
715 if (ModuloScheduling::printScheduleProcess())
716 DEBUG(std::cerr << "building the first set" << "\n");
720 while (circuits[setSeq][++k] != 0)
721 set.push_back(getNode(circuits[setSeq][k]));
722 if (circuits[setSeq][0] != 0) {
723 backEdgeSrc = circuits[setSeq][k - 1];
724 backEdgeSink = circuits[setSeq][0];
726 if (ModuloScheduling::printScheduleProcess()) {
727 DEBUG(std::cerr << "the first set is:");
730 // implement the ordering algorithm
731 enum OrderSeq { bottom_up, top_down };
733 std::vector<ModuloSchedGraphNode*> R;
734 while (!set.empty()) {
735 std::vector<ModuloSchedGraphNode*> pset = predSet(oNodes);
736 std::vector<ModuloSchedGraphNode*> sset = succSet(oNodes);
738 if (!pset.empty() && !vectorConj(pset, set).empty()) {
739 R = vectorConj(pset, set);
741 } else if (!sset.empty() && !vectorConj(sset, set).empty()) {
742 R = vectorConj(sset, set);
747 for (unsigned i = 0; i < set.size(); i++) {
748 int temp = set[i]->getASAP();
749 if (temp > maxASAP) {
754 R.push_back(set[position]);
759 if (order == top_down) {
760 if (ModuloScheduling::printScheduleProcess())
761 DEBUG(std::cerr << "in top_down round\n");
764 NodeVec::iterator chosenI;
765 for (NodeVec::iterator I = R.begin(); I != R.end(); I++) {
766 int temp = (*I)->height;
767 if ((temp > maxHeight)
768 || (temp == maxHeight && (*I)->mov <= (*chosenI)->mov)) {
770 if ((temp > maxHeight)
771 || (temp == maxHeight && (*I)->mov < (*chosenI)->mov)) {
776 //possible case: instruction A and B has the same height and mov,
777 //but A has dependence to B e.g B is the branch instruction in the
778 //end, or A is the phi instruction at the beginning
779 if ((*I)->mov == (*chosenI)->mov)
780 for (ModuloSchedGraphNode::const_iterator oe =
781 (*I)->beginOutEdges(), end = (*I)->endOutEdges();
783 if ((*oe)->getSink() == (*chosenI)) {
791 ModuloSchedGraphNode *mu = *chosenI;
792 oNodes.push_back(mu);
794 std::vector<ModuloSchedGraphNode*> succ_mu =
795 succSet(mu, backEdgeSrc, backEdgeSink);
796 std::vector<ModuloSchedGraphNode*> comm =
797 vectorConj(succ_mu, set);
798 comm = vectorSub(comm, oNodes);
799 R = vectorUnion(comm, R);
802 R = vectorConj(predSet(oNodes), set);
804 if (ModuloScheduling::printScheduleProcess())
805 DEBUG(std::cerr << "in bottom up round\n");
808 NodeVec::iterator chosenI;
809 for (NodeVec::iterator I = R.begin(); I != R.end(); I++) {
810 int temp = (*I)->depth;
811 if ((temp > maxDepth)
812 || (temp == maxDepth && (*I)->mov < (*chosenI)->mov)) {
817 ModuloSchedGraphNode *mu = *chosenI;
818 oNodes.push_back(mu);
820 std::vector<ModuloSchedGraphNode*> pred_mu =
821 predSet(mu, backEdgeSrc, backEdgeSink);
822 std::vector<ModuloSchedGraphNode*> comm =
823 vectorConj(pred_mu, set);
824 comm = vectorSub(comm, oNodes);
825 R = vectorUnion(comm, R);
828 R = vectorConj(succSet(oNodes), set);
831 if (ModuloScheduling::printScheduleProcess()) {
832 DEBUG(std::cerr << "order finished\n");
833 DEBUG(std::cerr << "dumping the ordered nodes:\n");
838 //FIXME: the nodes between onodes and this circuit should also be include in
840 if (ModuloScheduling::printScheduleProcess())
841 DEBUG(std::cerr << "building the next set\n");
845 while (circuits[setSeq][++k] != 0)
846 set.push_back(getNode(circuits[setSeq][k]));
847 if (circuits[setSeq][0] != 0) {
848 backEdgeSrc = circuits[setSeq][k - 1];
849 backEdgeSink = circuits[setSeq][0];
852 //no circuits any more
853 //collect all other nodes
854 if (ModuloScheduling::printScheduleProcess())
855 DEBUG(std::cerr << "no circuits any more, collect the rest nodes\n");
856 for (unsigned i = 2; i < numNodes + 2; i++) {
858 for (unsigned j = 0; j < oNodes.size(); j++)
859 if (oNodes[j]->getNodeId() == i) {
864 set.push_back(getNode(i));
867 if (ModuloScheduling::printScheduleProcess()) {
868 DEBUG(std::cerr << "next set is:\n");
871 } //while(!set.empty())
877 void ModuloSchedGraph::buildGraph(const TargetMachine & target)
879 const BasicBlock *bb = bbVec[0];
881 assert(bbVec.size() == 1 && "only handling a single basic block here");
883 // Use this data structure to note all machine operands that compute
884 // ordinary LLVM values. These must be computed defs (i.e., instructions).
885 // Note that there may be multiple machine instructions that define
887 ValueToDefVecMap valueToDefVecMap;
889 // Use this data structure to note all memory instructions.
890 // We use this to add memory dependence edges without a second full walk.
892 // vector<const Instruction*> memVec;
893 std::vector<ModuloSchedGraphNode*> memNodeVec;
895 // Use this data structure to note any uses or definitions of
896 // machine registers so we can add edges for those later without
897 // extra passes over the nodes.
898 // The vector holds an ordered list of references to the machine reg,
899 // ordered according to control-flow order. This only works for a
900 // single basic block, hence the assertion. Each reference is identified
901 // by the pair: <node, operand-number>.
903 RegToRefVecMap regToRefVecMap;
905 // Make a dummy root node. We'll add edges to the real roots later.
906 graphRoot = new ModuloSchedGraphNode(0, NULL, NULL, -1, target);
907 graphLeaf = new ModuloSchedGraphNode(1, NULL, NULL, -1, target);
909 //----------------------------------------------------------------
910 // First add nodes for all the LLVM instructions in the basic block because
911 // this greatly simplifies identifying which edges to add. Do this one VM
912 // instruction at a time since the ModuloSchedGraphNode needs that.
913 // Also, remember the load/store instructions to add memory deps later.
914 //----------------------------------------------------------------
916 //FIXME:if there is call instruction, then we shall quit somewhere
917 // currently we leave call instruction and continue construct graph
919 //dump only the blocks which are from loops
922 if (ModuloScheduling::printScheduleProcess())
926 DEBUG(std::cerr << " dumping non-loop BB:\n");
930 buildNodesforBB(target, bb, memNodeVec, regToRefVecMap,
933 this->addDefUseEdges(bb);
934 this->addCDEdges(bb);
935 this->addMemEdges(bb);
939 int ResII = this->computeResII(bb);
940 if (ModuloScheduling::printScheduleProcess())
941 DEBUG(std::cerr << "ResII is " << ResII << "\n");
942 int RecII = this->computeRecII(bb);
943 if (ModuloScheduling::printScheduleProcess())
944 DEBUG(std::cerr << "RecII is " << RecII << "\n");
946 this->MII = std::max(ResII, RecII);
948 this->computeNodeProperty(bb);
949 if (ModuloScheduling::printScheduleProcess())
950 this->dumpNodeProperty();
954 if (ModuloScheduling::printScheduleProcess())
956 //this->instrScheduling();
958 //this->dumpScheduling();
962 ModuloSchedGraphNode *ModuloSchedGraph::getNode(const unsigned nodeId) const
964 for (const_iterator I = begin(), E = end(); I != E; I++)
965 if ((*I).second->getNodeId() == nodeId)
966 return (ModuloSchedGraphNode *) (*I).second;
970 int ModuloSchedGraph::computeRecII(const BasicBlock *bb)
975 //os<<"begining computerRecII()"<<"\n";
977 //FIXME: only deal with circuits starting at the first node: the phi node
980 //search all elementary circuits in the dependance graph
981 //assume maximum number of nodes is MAXNODE
983 unsigned path[MAXNODE];
984 unsigned stack[MAXNODE][MAXNODE];
986 for (int j = 0; j < MAXNODE; j++) {
988 for (int k = 0; k < MAXNODE; k++)
991 //in our graph, the node number starts at 2
993 //number of nodes, because each instruction will result in one node
994 const unsigned numNodes = bb->size();
999 ModuloSchedGraphNode *initNode = getNode(path[0]);
1000 unsigned initNodeId = initNode->getNodeId();
1001 ModuloSchedGraphNode *currentNode = initNode;
1003 while (currentNode != NULL) {
1004 unsigned currentNodeId = currentNode->getNodeId();
1005 // DEBUG(std::cerr<<"current node is "<<currentNodeId<<"\n");
1007 ModuloSchedGraphNode *nextNode = NULL;
1008 for (ModuloSchedGraphNode::const_iterator I =
1009 currentNode->beginOutEdges(), E = currentNode->endOutEdges();
1011 //DEBUG(std::cerr <<" searching in outgoint edges of node
1012 //"<<currentNodeId<<"\n";
1013 unsigned nodeId = ((SchedGraphEdge *) * I)->getSink()->getNodeId();
1014 bool inpath = false, instack = false;
1017 //DEBUG(std::cerr<<"nodeId is "<<nodeId<<"\n");
1020 while (path[++k] != 0)
1021 if (nodeId == path[k]) {
1027 while (stack[i][++k] != 0)
1028 if (nodeId == stack[i][k]) {
1033 if (nodeId > currentNodeId && !inpath && !instack) {
1035 (ModuloSchedGraphNode *) ((SchedGraphEdge *) * I)->getSink();
1040 if (nextNode != NULL) {
1041 //DEBUG(std::cerr<<"find the next Node "<<nextNode->getNodeId()<<"\n");
1044 while (stack[i][j] != 0)
1046 stack[i][j] = nextNode->getNodeId();
1049 path[i] = nextNode->getNodeId();
1050 currentNode = nextNode;
1052 //DEBUG(std::cerr<<"no expansion any more"<<"\n");
1054 for (ModuloSchedGraphNode::const_iterator I =
1055 currentNode->beginOutEdges(), E = currentNode->endOutEdges();
1057 unsigned nodeId = ((SchedGraphEdge *) * I)->getSink()->getNodeId();
1058 if (nodeId == initNodeId) {
1061 while (circuits[++j][0] != 0);
1062 for (int k = 0; k < MAXNODE; k++)
1063 circuits[j][k] = path[k];
1067 //remove this node in the path and clear the corresponding entries in the
1071 for (j = 0; j < MAXNODE; j++)
1074 currentNode = getNode(path[i]);
1078 if (ModuloScheduling::printScheduleProcess())
1079 DEBUG(std::cerr << "circuits found are:\n");
1081 while (circuits[++j][0] != 0) {
1083 while (circuits[j][++k] != 0)
1084 if (ModuloScheduling::printScheduleProcess())
1085 DEBUG(std::cerr << circuits[j][k] << "\t");
1086 if (ModuloScheduling::printScheduleProcess())
1087 DEBUG(std::cerr << "\n");
1089 //for this circuit, compute the sum of all edge delay
1092 while (circuits[j][++k] != 0) {
1093 //ModuloSchedGraphNode* node =getNode(circuits[j][k]);
1094 unsigned nextNodeId;
1096 circuits[j][k + 1] !=
1097 0 ? circuits[j][k + 1] : circuits[j][0];
1100 for(ModuloSchedGraphNode::const_iterator I=node->beginOutEdges(),
1101 E=node->endOutEdges();I !=E; I++)
1103 SchedGraphEdge* edge= *I;
1104 if(edge->getSink()->getNodeId() == nextNodeId)
1105 {sumDelay += edge->getMinDelay();break;}
1110 getMaxDelayEdge(circuits[j][k], nextNodeId)->getMinDelay();
1113 // assume we have distance 1, in this case the sumDelay is RecII
1114 // this is correct for SSA form only
1116 if (ModuloScheduling::printScheduleProcess())
1117 DEBUG(std::cerr << "The total Delay in the circuit is " << sumDelay
1120 RecII = RecII > sumDelay ? RecII : sumDelay;
1131 void ModuloSchedGraph::addResourceUsage(std::vector<std::pair<int,int> > &ruVec,
1134 //DEBUG(std::cerr<<"\nadding a resouce , current resouceUsage vector size is
1135 //"<<ruVec.size()<<"\n";
1136 bool alreadyExists = false;
1137 for (unsigned i = 0; i < ruVec.size(); i++) {
1138 if (rid == ruVec[i].first) {
1140 alreadyExists = true;
1145 ruVec.push_back(std::make_pair(rid, 1));
1146 //DEBUG(std::cerr<<"current resouceUsage vector size is "<<ruVec.size()<<"\n";
1149 void ModuloSchedGraph::dumpResourceUsage(std::vector<std::pair<int,int> > &ru)
1151 TargetSchedInfo & msi = (TargetSchedInfo &) target.getSchedInfo();
1153 std::vector<std::pair<int,int> > resourceNumVector = msi.resourceNumVector;
1154 DEBUG(std::cerr << "resourceID\t" << "resourceNum\n");
1155 for (unsigned i = 0; i < resourceNumVector.size(); i++)
1156 DEBUG(std::cerr << resourceNumVector[i].
1157 first << "\t" << resourceNumVector[i].second << "\n");
1159 DEBUG(std::cerr << " maxNumIssueTotal(issue slot in one cycle) = " << msi.
1160 maxNumIssueTotal << "\n");
1161 DEBUG(std::cerr << "resourceID\t resourceUsage\t ResourceNum\n");
1162 for (unsigned i = 0; i < ru.size(); i++) {
1163 DEBUG(std::cerr << ru[i].first << "\t" << ru[i].second);
1164 const unsigned resNum = msi.getCPUResourceNum(ru[i].first);
1165 DEBUG(std::cerr << "\t" << resNum << "\n");
1169 int ModuloSchedGraph::computeResII(const BasicBlock * bb)
1172 const TargetInstrInfo & mii = target.getInstrInfo();
1173 const TargetSchedInfo & msi = target.getSchedInfo();
1176 std::vector<std::pair<int,int> > resourceUsage;
1177 //pair<int resourceid, int resourceUsageTimes_in_the_whole_block>
1179 //FIXME: number of functional units the target machine can provide should be
1180 //get from Target here I temporary hardcode it
1182 for (BasicBlock::const_iterator I = bb->begin(), E = bb->end(); I != E;
1184 if (ModuloScheduling::printScheduleProcess()) {
1185 DEBUG(std::cerr << "machine instruction for llvm instruction( node " <<
1186 getGraphNodeForInst(I)->getNodeId() << ")\n");
1187 DEBUG(std::cerr << "\t" << *I);
1189 MachineCodeForInstruction & tempMvec =
1190 MachineCodeForInstruction::get(I);
1191 if (ModuloScheduling::printScheduleProcess())
1192 DEBUG(std::cerr << "size =" << tempMvec.size() << "\n");
1193 for (unsigned i = 0; i < tempMvec.size(); i++) {
1194 MachineInstr *minstr = tempMvec[i];
1196 unsigned minDelay = mii.minLatency(minstr->getOpCode());
1197 InstrRUsage rUsage = msi.getInstrRUsage(minstr->getOpCode());
1198 InstrClassRUsage classRUsage =
1199 msi.getClassRUsage(mii.getSchedClass(minstr->getOpCode()));
1200 unsigned totCycles = classRUsage.totCycles;
1202 std::vector<std::vector<resourceId_t> > resources=rUsage.resourcesByCycle;
1203 assert(totCycles == resources.size());
1204 if (ModuloScheduling::printScheduleProcess())
1205 DEBUG(std::cerr << "resources Usage for this Instr(totCycles="
1206 << totCycles << ",mindLatency="
1207 << mii.minLatency(minstr->getOpCode()) << "): " << *minstr
1209 for (unsigned j = 0; j < resources.size(); j++) {
1210 if (ModuloScheduling::printScheduleProcess())
1211 DEBUG(std::cerr << "cycle " << j << ": ");
1212 for (unsigned k = 0; k < resources[j].size(); k++) {
1213 if (ModuloScheduling::printScheduleProcess())
1214 DEBUG(std::cerr << "\t" << resources[j][k]);
1215 addResourceUsage(resourceUsage, resources[j][k]);
1217 if (ModuloScheduling::printScheduleProcess())
1218 DEBUG(std::cerr << "\n");
1222 if (ModuloScheduling::printScheduleProcess())
1223 this->dumpResourceUsage(resourceUsage);
1227 int issueSlots = msi.maxNumIssueTotal;
1228 for (unsigned i = 0; i < resourceUsage.size(); i++) {
1229 int resourceNum = msi.getCPUResourceNum(resourceUsage[i].first);
1230 int useNum = resourceUsage[i].second;
1232 if (resourceNum <= issueSlots)
1233 tempII = ceil(1.0 * useNum / resourceNum);
1235 tempII = ceil(1.0 * useNum / issueSlots);
1236 ResII = std::max((int) tempII, ResII);
1241 ModuloSchedGraphSet::ModuloSchedGraphSet(const Function *function,
1242 const TargetMachine &target)
1245 buildGraphsForMethod(method, target);
1249 ModuloSchedGraphSet::~ModuloSchedGraphSet()
1251 //delete all the graphs
1252 for (iterator I = begin(), E = end(); I != E; ++I)
1256 void ModuloSchedGraphSet::dump() const
1258 DEBUG(std::cerr << " ====== ModuloSched graphs for function `" <<
1259 method->getName() << "' =========\n\n");
1260 for (const_iterator I = begin(); I != end(); ++I)
1263 DEBUG(std::cerr << "\n=========End graphs for function `" << method->getName()
1264 << "' ==========\n\n");
1267 void ModuloSchedGraph::dump(const BasicBlock * bb)
1269 DEBUG(std::cerr << "dumping basic block:");
1270 DEBUG(std::cerr << (bb->hasName()? bb->getName() : "block")
1271 << " (" << bb << ")" << "\n");
1275 void ModuloSchedGraph::dump(const BasicBlock * bb, std::ostream & os)
1277 os << "dumping basic block:";
1278 os << (bb->hasName()? bb->getName() : "block")
1279 << " (" << bb << ")" << "\n";
1282 void ModuloSchedGraph::dump() const
1284 DEBUG(std::cerr << " ModuloSchedGraph for basic Blocks:");
1285 for (unsigned i = 0, N = bbVec.size(); i < N; i++) {
1286 DEBUG(std::cerr << (bbVec[i]->hasName()? bbVec[i]->getName() : "block")
1287 << " (" << bbVec[i] << ")" << ((i == N - 1) ? "" : ", "));
1290 DEBUG(std::cerr << "\n\n Actual Root nodes : ");
1291 for (unsigned i = 0, N = graphRoot->outEdges.size(); i < N; i++)
1292 DEBUG(std::cerr << graphRoot->outEdges[i]->getSink()->getNodeId()
1293 << ((i == N - 1) ? "" : ", "));
1295 DEBUG(std::cerr << "\n Graph Nodes:\n");
1296 //for (const_iterator I=begin(); I != end(); ++I)
1297 //DEBUG(std::cerr << "\n" << *I->second;
1298 unsigned numNodes = bbVec[0]->size();
1299 for (unsigned i = 2; i < numNodes + 2; i++) {
1300 ModuloSchedGraphNode *node = getNode(i);
1301 DEBUG(std::cerr << "\n" << *node);
1304 DEBUG(std::cerr << "\n");
1307 void ModuloSchedGraph::dumpNodeProperty() const
1309 const BasicBlock *bb = bbVec[0];
1310 unsigned numNodes = bb->size();
1311 for (unsigned i = 2; i < numNodes + 2; i++) {
1312 ModuloSchedGraphNode *node = getNode(i);
1313 DEBUG(std::cerr << "NodeId " << node->getNodeId() << "\t");
1314 DEBUG(std::cerr << "ASAP " << node->getASAP() << "\t");
1315 DEBUG(std::cerr << "ALAP " << node->getALAP() << "\t");
1316 DEBUG(std::cerr << "mov " << node->getMov() << "\t");
1317 DEBUG(std::cerr << "depth " << node->getDepth() << "\t");
1318 DEBUG(std::cerr << "height " << node->getHeight() << "\t\n");
1322 void ModuloSchedGraphSet::buildGraphsForMethod(const Function * F,
1323 const TargetMachine &
1326 for (Function::const_iterator BI = F->begin(); BI != F->end(); ++BI)
1327 addGraph(new ModuloSchedGraph(BI, target));
1330 std::ostream & operator<<(std::ostream & os,
1331 const ModuloSchedGraphNode & node)
1333 os << std::string(8, ' ')
1334 << "Node " << node.nodeId << " : "
1335 << "latency = " << node.latency << "\n" << std::string(12, ' ');
1337 if (node.getInst() == NULL)
1338 os << "(Dummy node)\n";
1340 os << *node.getInst() << "\n" << std::string(12, ' ');
1341 os << node.inEdges.size() << " Incoming Edges:\n";
1342 for (unsigned i = 0, N = node.inEdges.size(); i < N; i++)
1343 os << std::string(16, ' ') << *node.inEdges[i];
1345 os << std::string(12, ' ') << node.outEdges.size()
1346 << " Outgoing Edges:\n";
1347 for (unsigned i = 0, N = node.outEdges.size(); i < N; i++)
1348 os << std::string(16, ' ') << *node.outEdges[i];