From c5b4632c27566de7c3fc6679b43d7da4f4a29666 Mon Sep 17 00:00:00 2001 From: "Vikram S. Adve" Date: Sun, 30 Sep 2001 23:43:34 +0000 Subject: [PATCH] Bug fixes: (1) Ensure that delay slot instructions are not moved out of place (this was happening for some CALL instructions). Basically, we need to move all delay slot instructions out of the graph and handle them along with the delayed control transfer instruction. (2) Mark scheduled instructions correctly when instructions are scheduled in more than one cycle in a single step (due to delay slots). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@678 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/InstrSched/InstrScheduling.cpp | 809 ++++++++++-------- .../SparcV9/InstrSched/InstrScheduling.cpp | 809 ++++++++++-------- 2 files changed, 860 insertions(+), 758 deletions(-) diff --git a/lib/CodeGen/InstrSched/InstrScheduling.cpp b/lib/CodeGen/InstrSched/InstrScheduling.cpp index 34802c9e5bb..0b194207ca4 100644 --- a/lib/CodeGen/InstrSched/InstrScheduling.cpp +++ b/lib/CodeGen/InstrSched/InstrScheduling.cpp @@ -9,16 +9,26 @@ // 7/23/01 - Vikram Adve - Created //**************************************************************************/ + +//************************* User Include Files *****************************/ + #include "llvm/CodeGen/InstrScheduling.h" #include "SchedPriorities.h" #include "llvm/Analysis/LiveVar/BBLiveVar.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Support/CommandLine.h" #include "llvm/Instruction.h" + + +//************************ System Include Files *****************************/ + #include #include #include + +//************************* External Data Types *****************************/ + cl::Enum SchedDebugLevel("dsched", cl::NoFlags, "enable instruction scheduling debugging information", clEnumValN(Sched_NoDebugInfo, "n", "disable debug output"), @@ -27,55 +37,12 @@ cl::Enum SchedDebugLevel("dsched", cl::NoFlags, clEnumValN(Sched_PrintSchedGraphs, "g", "print scheduling graphs"), 0); +//************************* Internal Data Types *****************************/ + class InstrSchedule; class SchedulingManager; class DelaySlotInfo; -static void ForwardListSchedule (SchedulingManager& S); - -static void RecordSchedule (const BasicBlock* bb, - const SchedulingManager& S); - -static unsigned ChooseOneGroup (SchedulingManager& S); - -static void MarkSuccessorsReady (SchedulingManager& S, - const SchedGraphNode* node); - -static unsigned FindSlotChoices (SchedulingManager& S, - DelaySlotInfo*& getDelaySlotInfo); - -static void AssignInstructionsToSlots(class SchedulingManager& S, - unsigned maxIssue); - -static void ScheduleInstr (class SchedulingManager& S, - const SchedGraphNode* node, - unsigned int slotNum, - cycles_t curTime); - -static bool ViolatesMinimumGap (const SchedulingManager& S, - MachineOpCode opCode, - const cycles_t inCycle); - -static bool ConflictsWithChoices (const SchedulingManager& S, - MachineOpCode opCode); - -static void ChooseInstructionsForDelaySlots(SchedulingManager& S, - const BasicBlock* bb, - SchedGraph* graph); - -static bool NodeCanFillDelaySlot (const SchedulingManager& S, - const SchedGraphNode* node, - const SchedGraphNode* brNode, - bool nodeIsPredecessor); - -static void MarkNodeForDelaySlot (SchedulingManager& S, - SchedGraph* graph, - SchedGraphNode* node, - const SchedGraphNode* brNode, - bool nodeIsPredecessor); - -//************************* Internal Data Types *****************************/ - //---------------------------------------------------------------------- // class InstrGroup: @@ -369,7 +336,7 @@ public: delayedNodeSlotNum = slotNum; } - void scheduleDelayedNode (SchedulingManager& S); + unsigned scheduleDelayedNode (SchedulingManager& S); }; @@ -539,7 +506,7 @@ public: if (createIfMissing) { dinfo = new DelaySlotInfo(bn, - getInstrInfo().getNumDelaySlots(bn->getMachineInstr()->getOpCode())); + getInstrInfo().getNumDelaySlots(bn->getMachineInstr()->getOpCode())); delaySlotInfoForBranches[bn] = dinfo; } else @@ -608,159 +575,63 @@ SchedulingManager::updateEarliestStartTimes(const SchedGraphNode* node, } } -//************************* External Functions *****************************/ - +//************************* Internal Functions *****************************/ -//--------------------------------------------------------------------------- -// Function: ScheduleInstructionsWithSSA -// -// Purpose: -// Entry point for instruction scheduling on SSA form. -// Schedules the machine instructions generated by instruction selection. -// Assumes that register allocation has not been done, i.e., operands -// are still in SSA form. -//--------------------------------------------------------------------------- -bool -ScheduleInstructionsWithSSA(Method* method, - const TargetMachine &target) +static void +AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue) { - SchedGraphSet graphSet(method, target); - - if (SchedDebugLevel >= Sched_PrintSchedGraphs) - { - cout << endl << "*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING" - << endl; - graphSet.dump(); - } - - for (SchedGraphSet::const_iterator GI=graphSet.begin(); - GI != graphSet.end(); ++GI) - { - SchedGraph* graph = (*GI).second; - const vector& bbvec = graph->getBasicBlocks(); - assert(bbvec.size() == 1 && "Cannot schedule multiple basic blocks"); - const BasicBlock* bb = bbvec[0]; - - if (SchedDebugLevel >= Sched_PrintSchedTrace) - cout << endl << "*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n"; - - SchedPriorities schedPrio(method, graph); // expensive! - SchedulingManager S(target, graph, schedPrio); - - ChooseInstructionsForDelaySlots(S, bb, graph); // modifies graph - - ForwardListSchedule(S); // computes schedule in S - - RecordSchedule((*GI).first, S); // records schedule in BB - } + // find the slot to start from, in the current cycle + unsigned int startSlot = 0; + cycles_t curTime = S.getTime(); - if (SchedDebugLevel >= Sched_PrintMachineCode) - { - cout << endl - << "*** Machine instructions after INSTRUCTION SCHEDULING" << endl; - PrintMachineInstructions(method); - } + assert(maxIssue > 0 && maxIssue <= S.nslots - startSlot); - return false; // no reason to fail yet -} - - -// Check minimum gap requirements relative to instructions scheduled in -// previous cycles. -// Note that we do not need to consider `nextEarliestIssueTime' here because -// that is also captured in the earliest start times for each opcode. -// -static inline bool -ViolatesMinimumGap(const SchedulingManager& S, - MachineOpCode opCode, - const cycles_t inCycle) -{ - return (inCycle < S.getEarliestStartTimeForOp(opCode)); -} - - -// Check if the instruction would conflict with instructions already -// chosen for the current cycle -// -static inline bool -ConflictsWithChoices(const SchedulingManager& S, - MachineOpCode opCode) -{ - // Check if the instruction must issue by itself, and some feasible - // choices have already been made for this cycle - if (S.getNumChoices() > 0 && S.schedInfo.isSingleIssue(opCode)) - return true; + // If only one instruction can be issued, do so. + if (maxIssue == 1) + for (unsigned s=startSlot; s < S.nslots; s++) + if (S.getChoicesForSlot(s).size() > 0) + {// found the one instruction + S.scheduleInstr(*S.getChoicesForSlot(s).begin(), s, curTime); + return; + } - // For each class that opCode belongs to, check if there are too many - // instructions of that class. + // Otherwise, choose from the choices for each slot // - const InstrSchedClass sc = S.schedInfo.getSchedClass(opCode); - return (S.getNumChoicesInClass(sc) == S.schedInfo.getMaxIssueForClass(sc)); -} - - -// Check if any issue restrictions would prevent the instruction from -// being issued in the current cycle -// -bool -instrIsFeasible(const SchedulingManager& S, - MachineOpCode opCode) -{ - // skip the instruction if it cannot be issued due to issue restrictions - // caused by previously issued instructions - if (ViolatesMinimumGap(S, opCode, S.getTime())) - return false; - - // skip the instruction if it cannot be issued due to issue restrictions - // caused by previously chosen instructions for the current cycle - if (ConflictsWithChoices(S, opCode)) - return false; - - return true; -} - - -//************************* Internal Functions *****************************/ - - -static void -ForwardListSchedule(SchedulingManager& S) -{ - unsigned N; - const SchedGraphNode* node; - - S.schedPrio.initialize(); + InstrGroup* igroup = S.isched.getIGroup(S.getTime()); + assert(igroup != NULL && "Group creation failed?"); - while ((N = S.schedPrio.getNumReady()) > 0) + // Find a slot that has only a single choice, and take it. + // If all slots have 0 or multiple choices, pick the first slot with + // choices and use its last instruction (just to avoid shifting the vector). + unsigned numIssued; + for (numIssued = 0; numIssued < maxIssue; numIssued++) { - // Choose one group of instructions for a cycle. This will - // advance S.getTime() to the first cycle instructions can be issued. - // It may also schedule delay slot instructions in later cycles, - // but those are ignored here because they are outside the graph. - // - unsigned numIssued = ChooseOneGroup(S); - assert(numIssued > 0 && "Deadlock in list scheduling algorithm?"); - - // Notify the priority manager of scheduled instructions and mark - // any successors that may now be ready - // - const InstrGroup* igroup = S.isched.getIGroup(S.getTime()); - for (unsigned int s=0; s < S.nslots; s++) - if ((node = (*igroup)[s]) != NULL) + int chosenSlot = -1, chosenNodeIndex = -1; + for (unsigned s=startSlot; s < S.nslots; s++) + if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1) { - S.schedPrio.issuedReadyNodeAt(S.getTime(), node); - MarkSuccessorsReady(S, node); + chosenSlot = (int) s; + break; } - // Move to the next the next earliest cycle for which - // an instruction can be issued, or the next earliest in which - // one will be ready, or to the next cycle, whichever is latest. - // - S.updateTime(max(S.getTime() + 1, - max(S.getEarliestIssueTime(), - S.schedPrio.getEarliestReadyTime()))); + if (chosenSlot == -1) + for (unsigned s=startSlot; s < S.nslots; s++) + if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() > 0) + { + chosenSlot = (int) s; + break; + } + + if (chosenSlot != -1) + { // Insert the chosen instr in the chosen slot and + // erase it from all slots. + const SchedGraphNode* node= *S.getChoicesForSlot(chosenSlot).begin(); + S.scheduleInstr(node, chosenSlot, curTime); + } } + + assert(numIssued > 0 && "Should not happen when maxIssue > 0!"); } @@ -809,46 +680,6 @@ RecordSchedule(const BasicBlock* bb, const SchedulingManager& S) } -static unsigned -ChooseOneGroup(SchedulingManager& S) -{ - assert(S.schedPrio.getNumReady() > 0 - && "Don't get here without ready instructions."); - - DelaySlotInfo* getDelaySlotInfo = NULL; - - // Choose up to `nslots' feasible instructions and their possible slots. - unsigned numIssued = FindSlotChoices(S, getDelaySlotInfo); - - while (numIssued == 0) - { - S.updateTime(S.getTime()+1); - numIssued = FindSlotChoices(S, getDelaySlotInfo); - } - - AssignInstructionsToSlots(S, numIssued); - - if (getDelaySlotInfo != NULL) - getDelaySlotInfo->scheduleDelayedNode(S); - - // Print trace of scheduled instructions before newly ready ones - if (SchedDebugLevel >= Sched_PrintSchedTrace) - { - cout << " Cycle " << S.getTime() << " : Scheduled instructions:\n"; - const InstrGroup* igroup = S.isched.getIGroup(S.getTime()); - for (unsigned int s=0; s < S.nslots; s++) - { - cout << " "; - if ((*igroup)[s] != NULL) - cout << * ((*igroup)[s])->getMachineInstr() << endl; - else - cout << "" << endl; - } - } - - return numIssued; -} - static void MarkSuccessorsReady(SchedulingManager& S, const SchedGraphNode* node) @@ -1025,7 +856,7 @@ FindSlotChoices(SchedulingManager& S, { // Try to assign every other instruction to a lower numbered // slot than delayedNodeSlot. - MachineOpCode opCode = S.getChoice(i)->getMachineInstr()->getOpCode(); + MachineOpCode opCode =S.getChoice(i)->getMachineInstr()->getOpCode(); bool noSlotFound = true; unsigned int s; for (s=startSlot; s < delayedNodeSlot; s++) @@ -1093,7 +924,7 @@ FindSlotChoices(SchedulingManager& S, for (unsigned i=0; i < S.getNumChoices() && i < indexForBreakingNode; i++) { - MachineOpCode opCode = S.getChoice(i)->getMachineInstr()->getOpCode(); + MachineOpCode opCode =S.getChoice(i)->getMachineInstr()->getOpCode(); // If a higher priority instruction cannot be assigned to // any earlier slots, don't schedule the breaking instruction. @@ -1144,63 +975,95 @@ FindSlotChoices(SchedulingManager& S, } -static void -AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue) +static unsigned +ChooseOneGroup(SchedulingManager& S) { - // find the slot to start from, in the current cycle - unsigned int startSlot = 0; - cycles_t curTime = S.getTime(); - - assert(maxIssue > 0 && maxIssue <= S.nslots - startSlot); + assert(S.schedPrio.getNumReady() > 0 + && "Don't get here without ready instructions."); - // If only one instruction can be issued, do so. - if (maxIssue == 1) - for (unsigned s=startSlot; s < S.nslots; s++) - if (S.getChoicesForSlot(s).size() > 0) - {// found the one instruction - S.scheduleInstr(*S.getChoicesForSlot(s).begin(), s, curTime); - return; - } + cycles_t firstCycle = S.getTime(); + DelaySlotInfo* getDelaySlotInfo = NULL; - // Otherwise, choose from the choices for each slot - // - InstrGroup* igroup = S.isched.getIGroup(S.getTime()); - assert(igroup != NULL && "Group creation failed?"); + // Choose up to `nslots' feasible instructions and their possible slots. + unsigned numIssued = FindSlotChoices(S, getDelaySlotInfo); - // Find a slot that has only a single choice, and take it. - // If all slots have 0 or multiple choices, pick the first slot with - // choices and use its last instruction (just to avoid shifting the vector). - unsigned numIssued; - for (numIssued = 0; numIssued < maxIssue; numIssued++) + while (numIssued == 0) { - int chosenSlot = -1, chosenNodeIndex = -1; - for (unsigned s=startSlot; s < S.nslots; s++) - if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1) - { - chosenSlot = (int) s; - break; - } - - if (chosenSlot == -1) - for (unsigned s=startSlot; s < S.nslots; s++) - if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() > 0) - { - chosenSlot = (int) s; - break; - } - - if (chosenSlot != -1) - { // Insert the chosen instr in the chosen slot and - // erase it from all slots. - const SchedGraphNode* node= *S.getChoicesForSlot(chosenSlot).begin(); - S.scheduleInstr(node, chosenSlot, curTime); - } + S.updateTime(S.getTime()+1); + numIssued = FindSlotChoices(S, getDelaySlotInfo); } - assert(numIssued > 0 && "Should not happen when maxIssue > 0!"); + AssignInstructionsToSlots(S, numIssued); + + if (getDelaySlotInfo != NULL) + numIssued += getDelaySlotInfo->scheduleDelayedNode(S); + + // Print trace of scheduled instructions before newly ready ones + if (SchedDebugLevel >= Sched_PrintSchedTrace) + { + for (cycles_t c = firstCycle; c <= S.getTime(); c++) + { + cout << " Cycle " << c << " : Scheduled instructions:\n"; + const InstrGroup* igroup = S.isched.getIGroup(c); + for (unsigned int s=0; s < S.nslots; s++) + { + cout << " "; + if ((*igroup)[s] != NULL) + cout << * ((*igroup)[s])->getMachineInstr() << endl; + else + cout << "" << endl; + } + } + } + + return numIssued; } +static void +ForwardListSchedule(SchedulingManager& S) +{ + unsigned N; + const SchedGraphNode* node; + + S.schedPrio.initialize(); + + while ((N = S.schedPrio.getNumReady()) > 0) + { + cycles_t nextCycle = S.getTime(); + + // Choose one group of instructions for a cycle, plus any delay slot + // instructions (which may overflow into successive cycles). + // This will advance S.getTime() to the last cycle in which + // instructions are actually issued. + // + unsigned numIssued = ChooseOneGroup(S); + assert(numIssued > 0 && "Deadlock in list scheduling algorithm?"); + + // Notify the priority manager of scheduled instructions and mark + // any successors that may now be ready + // + for (cycles_t c = nextCycle; c <= S.getTime(); c++) + { + const InstrGroup* igroup = S.isched.getIGroup(c); + for (unsigned int s=0; s < S.nslots; s++) + if ((node = (*igroup)[s]) != NULL) + { + S.schedPrio.issuedReadyNodeAt(S.getTime(), node); + MarkSuccessorsReady(S, node); + } + } + + // Move to the next the next earliest cycle for which + // an instruction can be issued, or the next earliest in which + // one will be ready, or to the next cycle, whichever is latest. + // + S.updateTime(max(S.getTime() + 1, + max(S.getEarliestIssueTime(), + S.schedPrio.getEarliestReadyTime()))); + } +} + //--------------------------------------------------------------------- // Code for filling delay slots for delayed terminator instructions @@ -1211,107 +1074,7 @@ AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue) // when we cannot find single-cycle instructions that can be reordered. //---------------------------------------------------------------------- -static void -ChooseInstructionsForDelaySlots(SchedulingManager& S, - const BasicBlock* bb, - SchedGraph* graph) -{ - // Look for instructions that can be used for delay slots. - // Remove them from the graph, and mark them to be used for delay slots. - const MachineInstrInfo& mii = S.getInstrInfo(); - const TerminatorInst* term = bb->getTerminator(); - MachineCodeForVMInstr& termMvec = term->getMachineInstrVec(); - - // Find the first branch instr in the sequence of machine instrs for term - // - unsigned first = 0; - while (! mii.isBranch(termMvec[first]->getOpCode())) - ++first; - assert(first < termMvec.size() && - "No branch instructions for BR? Ok, but weird! Delete assertion."); - if (first == termMvec.size()) - return; - - SchedGraphNode* brNode = graph->getGraphNodeForInstr(termMvec[first]); - assert(! mii.isCall(brNode->getMachineInstr()->getOpCode()) && "Call used as terminator?"); - - unsigned ndelays = mii.getNumDelaySlots(brNode->getMachineInstr()->getOpCode()); - if (ndelays == 0) - return; - - // Use vectors to remember the nodes chosen for delay slots, and the - // NOPs that will be unused. We cannot remove them from the graph while - // walking through the preds and succs of the brNode here, so we - // remember the nodes in the vectors and remove them later. - // We use separate vectors for the single-cycle and multi-cycle nodes, - // so that we can give preference to single-cycle nodes. - // - vector sdelayNodeVec; - vector mdelayNodeVec; - vector nopNodeVec; - unsigned numUseful = 0; - - sdelayNodeVec.reserve(ndelays); - - for (sg_pred_iterator P = pred_begin(brNode); - P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P) - if (! (*P)->isDummyNode() && - ! mii.isNop((*P)->getMachineInstr()->getOpCode()) && - NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true)) - { - ++numUseful; - if (mii.maxLatency((*P)->getMachineInstr()->getOpCode()) > 1) - mdelayNodeVec.push_back(*P); - else - sdelayNodeVec.push_back(*P); - } - - // If not enough single-cycle instructions were found, select the - // lowest-latency multi-cycle instructions and use them. - // Note that this is the most efficient code when only 1 (or even 2) - // values need to be selected. - // - while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0) - { - unsigned latency; - unsigned minLatency = mii.maxLatency(mdelayNodeVec[0]->getMachineInstr()->getOpCode()); - unsigned minIndex = 0; - for (unsigned i=1; i < mdelayNodeVec.size(); i++) - if (minLatency >= - (latency = mii.maxLatency(mdelayNodeVec[i]->getMachineInstr()->getOpCode()))) - { - minLatency = latency; - minIndex = i; - } - sdelayNodeVec.push_back(mdelayNodeVec[minIndex]); - if (sdelayNodeVec.size() < ndelays) // avoid the last erase! - mdelayNodeVec.erase(mdelayNodeVec.begin() + minIndex); - } - - // Now, remove the NOPs currently in delay slots from the graph. - // If not enough useful instructions were found, use the NOPs to - // fill delay slots, otherwise, just discard them. - for (sg_succ_iterator I=succ_begin(brNode); I != succ_end(brNode); ++I) - if (! (*I)->isDummyNode() - && mii.isNop((*I)->getMachineInstr()->getOpCode())) - { - if (sdelayNodeVec.size() < ndelays) - sdelayNodeVec.push_back(*I); - else - nopNodeVec.push_back(*I); - } - - // Mark the nodes chosen for delay slots. This removes them from the graph. - for (unsigned i=0; i < sdelayNodeVec.size(); i++) - MarkNodeForDelaySlot(S, graph, sdelayNodeVec[i], brNode, true); - - // And remove the unused NOPs from the graph. - for (unsigned i=0; i < nopNodeVec.size(); i++) - graph->eraseIncidentEdges(nopNodeVec[i], /*addDummyEdges*/ true); -} - - -bool +static bool NodeCanFillDelaySlot(const SchedulingManager& S, const SchedGraphNode* node, const SchedGraphNode* brNode, @@ -1367,7 +1130,7 @@ NodeCanFillDelaySlot(const SchedulingManager& S, } -void +static void MarkNodeForDelaySlot(SchedulingManager& S, SchedGraph* graph, SchedGraphNode* node, @@ -1391,10 +1154,173 @@ MarkNodeForDelaySlot(SchedulingManager& S, } +void +FindUsefulInstructionsForDelaySlots(SchedulingManager& S, + SchedGraphNode* brNode, + vector& sdelayNodeVec) +{ + const MachineInstrInfo& mii = S.getInstrInfo(); + unsigned ndelays = + mii.getNumDelaySlots(brNode->getMachineInstr()->getOpCode()); + + if (ndelays == 0) + return; + + sdelayNodeVec.reserve(ndelays); + + // Use a separate vector to hold the feasible multi-cycle nodes. + // These will be used if not enough single-cycle nodes are found. + // + vector mdelayNodeVec; + + for (sg_pred_iterator P = pred_begin(brNode); + P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P) + if (! (*P)->isDummyNode() && + ! mii.isNop((*P)->getMachineInstr()->getOpCode()) && + NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true)) + { + if (mii.maxLatency((*P)->getMachineInstr()->getOpCode()) > 1) + mdelayNodeVec.push_back(*P); + else + sdelayNodeVec.push_back(*P); + } + + // If not enough single-cycle instructions were found, select the + // lowest-latency multi-cycle instructions and use them. + // Note that this is the most efficient code when only 1 (or even 2) + // values need to be selected. + // + while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0) + { + unsigned lmin = + mii.maxLatency(mdelayNodeVec[0]->getMachineInstr()->getOpCode()); + unsigned minIndex = 0; + for (unsigned i=1; i < mdelayNodeVec.size(); i++) + { + unsigned li = + mii.maxLatency(mdelayNodeVec[i]->getMachineInstr()->getOpCode()); + if (lmin >= li) + { + lmin = li; + minIndex = i; + } + } + sdelayNodeVec.push_back(mdelayNodeVec[minIndex]); + if (sdelayNodeVec.size() < ndelays) // avoid the last erase! + mdelayNodeVec.erase(mdelayNodeVec.begin() + minIndex); + } +} + + +// Remove the NOPs currently in delay slots from the graph. +// Mark instructions specified in sdelayNodeVec to replace them. +// If not enough useful instructions were found, mark the NOPs to be used +// for filling delay slots, otherwise, otherwise just discard them. +// +void +ReplaceNopsWithUsefulInstr(SchedulingManager& S, + SchedGraphNode* node, + vector sdelayNodeVec, + SchedGraph* graph) +{ + vector nopNodeVec; + const MachineInstrInfo& mii = S.getInstrInfo(); + unsigned ndelays= mii.getNumDelaySlots(node->getMachineInstr()->getOpCode()); + assert(ndelays > 0 && "Unnecessary call to replace NOPs"); + + // Remove the NOPs currently in delay slots from the graph. + // If not enough useful instructions were found, use the NOPs to + // fill delay slots, otherwise, just discard them. + for (sg_succ_iterator I=succ_begin(node); I != succ_end(node); ++I) + if (! (*I)->isDummyNode() + && mii.isNop((*I)->getMachineInstr()->getOpCode())) + { + if (sdelayNodeVec.size() < ndelays) + sdelayNodeVec.push_back(*I); + else + nopNodeVec.push_back(*I); + } + assert(sdelayNodeVec.size() == ndelays); + + // Mark the nodes chosen for delay slots. This removes them from the graph. + for (unsigned i=0; i < sdelayNodeVec.size(); i++) + MarkNodeForDelaySlot(S, graph, sdelayNodeVec[i], node, true); + + // And remove the unused NOPs from the graph. + for (unsigned i=0; i < nopNodeVec.size(); i++) + graph->eraseIncidentEdges(nopNodeVec[i], /*addDummyEdges*/ true); +} + + +// For all delayed instructions, choose instructions to put in the delay +// slots and pull those out of the graph. Mark them for the delay slots +// in the DelaySlotInfo object for that graph node. If no useful work +// is found for a delay slot, use the NOP that is currently in that slot. +// +// We try to fill the delay slots with useful work for all instructions +// except CALLs. For CALLs, it is nearly always possible to use one of the +// call sequence instrs and putting anything else in the delay slot could be +// suboptimal. +// +static void +ChooseInstructionsForDelaySlots(SchedulingManager& S, + const BasicBlock* bb, + SchedGraph* graph) +{ + const MachineInstrInfo& mii = S.getInstrInfo(); + const TerminatorInst* termInstr = bb->getTerminator(); + MachineCodeForVMInstr& termMvec = termInstr->getMachineInstrVec(); + vector delayNodeVec; + const MachineInstr* brInstr; + + assert(termInstr->getOpcode() != Instruction::Call + && "Call used as terminator?"); + + // To find instructions that need delay slots without searching the entire + // machine code, we assume the only delayed instructions are CALLs or + // instructions generated for the terminator inst. + // Find the first branch instr in the sequence of machine instrs for term + // + unsigned first = 0; + while (first < termMvec.size() && + ! mii.isBranch(termMvec[first]->getOpCode())) + { + ++first; + } + assert(first < termMvec.size() && + "No branch instructions for BR? Ok, but weird! Delete assertion."); + + brInstr = (first < termMvec.size())? termMvec[first] : NULL; + + // Compute a vector of the nodes chosen for delay slots and then + // mark delay slots to replace NOPs with these useful instructions. + // + if (brInstr != NULL) + { + SchedGraphNode* brNode = graph->getGraphNodeForInstr(brInstr); + FindUsefulInstructionsForDelaySlots(S, brNode, delayNodeVec); + ReplaceNopsWithUsefulInstr(S, brNode, delayNodeVec, graph); + } + + // Also mark delay slots for other delayed instructions to hold NOPs. + // Simply passing in an empty delayNodeVec will have this effect. + // + delayNodeVec.clear(); + const MachineCodeForBasicBlock& bbMvec = bb->getMachineInstrVec(); + for (unsigned i=0; i < bbMvec.size(); i++) + if (bbMvec[i] != brInstr && + mii.getNumDelaySlots(bbMvec[i]->getOpCode()) > 0) + { + SchedGraphNode* node = graph->getGraphNodeForInstr(bbMvec[i]); + ReplaceNopsWithUsefulInstr(S, node, delayNodeVec, graph); + } +} + + // // Schedule the delayed branch and its delay slots // -void +unsigned DelaySlotInfo::scheduleDelayedNode(SchedulingManager& S) { assert(delayedNodeSlotNum < S.nslots && "Illegal slot for branch"); @@ -1461,5 +1387,130 @@ DelaySlotInfo::scheduleDelayedNode(SchedulingManager& S) S.scheduleInstr(delayNodeVec[i], nextSlot, nextTime); break; } + + return 1 + ndelays; +} + + +// Check if the instruction would conflict with instructions already +// chosen for the current cycle +// +static inline bool +ConflictsWithChoices(const SchedulingManager& S, + MachineOpCode opCode) +{ + // Check if the instruction must issue by itself, and some feasible + // choices have already been made for this cycle + if (S.getNumChoices() > 0 && S.schedInfo.isSingleIssue(opCode)) + return true; + + // For each class that opCode belongs to, check if there are too many + // instructions of that class. + // + const InstrSchedClass sc = S.schedInfo.getSchedClass(opCode); + return (S.getNumChoicesInClass(sc) == S.schedInfo.getMaxIssueForClass(sc)); } + +//************************* External Functions *****************************/ + + +//--------------------------------------------------------------------------- +// Function: ViolatesMinimumGap +// +// Purpose: +// Check minimum gap requirements relative to instructions scheduled in +// previous cycles. +// Note that we do not need to consider `nextEarliestIssueTime' here because +// that is also captured in the earliest start times for each opcode. +//--------------------------------------------------------------------------- + +static inline bool +ViolatesMinimumGap(const SchedulingManager& S, + MachineOpCode opCode, + const cycles_t inCycle) +{ + return (inCycle < S.getEarliestStartTimeForOp(opCode)); +} + + +//--------------------------------------------------------------------------- +// Function: instrIsFeasible +// +// Purpose: +// Check if any issue restrictions would prevent the instruction from +// being issued in the current cycle +//--------------------------------------------------------------------------- + +bool +instrIsFeasible(const SchedulingManager& S, + MachineOpCode opCode) +{ + // skip the instruction if it cannot be issued due to issue restrictions + // caused by previously issued instructions + if (ViolatesMinimumGap(S, opCode, S.getTime())) + return false; + + // skip the instruction if it cannot be issued due to issue restrictions + // caused by previously chosen instructions for the current cycle + if (ConflictsWithChoices(S, opCode)) + return false; + + return true; +} + +//--------------------------------------------------------------------------- +// Function: ScheduleInstructionsWithSSA +// +// Purpose: +// Entry point for instruction scheduling on SSA form. +// Schedules the machine instructions generated by instruction selection. +// Assumes that register allocation has not been done, i.e., operands +// are still in SSA form. +//--------------------------------------------------------------------------- + +bool +ScheduleInstructionsWithSSA(Method* method, + const TargetMachine &target) +{ + SchedGraphSet graphSet(method, target); + + if (SchedDebugLevel >= Sched_PrintSchedGraphs) + { + cout << endl << "*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING" + << endl; + graphSet.dump(); + } + + for (SchedGraphSet::const_iterator GI=graphSet.begin(); + GI != graphSet.end(); ++GI) + { + SchedGraph* graph = (*GI).second; + const vector& bbvec = graph->getBasicBlocks(); + assert(bbvec.size() == 1 && "Cannot schedule multiple basic blocks"); + const BasicBlock* bb = bbvec[0]; + + if (SchedDebugLevel >= Sched_PrintSchedTrace) + cout << endl << "*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n"; + + SchedPriorities schedPrio(method, graph); // expensive! + SchedulingManager S(target, graph, schedPrio); + + ChooseInstructionsForDelaySlots(S, bb, graph); // modifies graph + + ForwardListSchedule(S); // computes schedule in S + + RecordSchedule((*GI).first, S); // records schedule in BB + } + + if (SchedDebugLevel >= Sched_PrintMachineCode) + { + cout << endl + << "*** Machine instructions after INSTRUCTION SCHEDULING" << endl; + PrintMachineInstructions(method); + } + + return false; // no reason to fail yet +} + + diff --git a/lib/Target/SparcV9/InstrSched/InstrScheduling.cpp b/lib/Target/SparcV9/InstrSched/InstrScheduling.cpp index 34802c9e5bb..0b194207ca4 100644 --- a/lib/Target/SparcV9/InstrSched/InstrScheduling.cpp +++ b/lib/Target/SparcV9/InstrSched/InstrScheduling.cpp @@ -9,16 +9,26 @@ // 7/23/01 - Vikram Adve - Created //**************************************************************************/ + +//************************* User Include Files *****************************/ + #include "llvm/CodeGen/InstrScheduling.h" #include "SchedPriorities.h" #include "llvm/Analysis/LiveVar/BBLiveVar.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Support/CommandLine.h" #include "llvm/Instruction.h" + + +//************************ System Include Files *****************************/ + #include #include #include + +//************************* External Data Types *****************************/ + cl::Enum SchedDebugLevel("dsched", cl::NoFlags, "enable instruction scheduling debugging information", clEnumValN(Sched_NoDebugInfo, "n", "disable debug output"), @@ -27,55 +37,12 @@ cl::Enum SchedDebugLevel("dsched", cl::NoFlags, clEnumValN(Sched_PrintSchedGraphs, "g", "print scheduling graphs"), 0); +//************************* Internal Data Types *****************************/ + class InstrSchedule; class SchedulingManager; class DelaySlotInfo; -static void ForwardListSchedule (SchedulingManager& S); - -static void RecordSchedule (const BasicBlock* bb, - const SchedulingManager& S); - -static unsigned ChooseOneGroup (SchedulingManager& S); - -static void MarkSuccessorsReady (SchedulingManager& S, - const SchedGraphNode* node); - -static unsigned FindSlotChoices (SchedulingManager& S, - DelaySlotInfo*& getDelaySlotInfo); - -static void AssignInstructionsToSlots(class SchedulingManager& S, - unsigned maxIssue); - -static void ScheduleInstr (class SchedulingManager& S, - const SchedGraphNode* node, - unsigned int slotNum, - cycles_t curTime); - -static bool ViolatesMinimumGap (const SchedulingManager& S, - MachineOpCode opCode, - const cycles_t inCycle); - -static bool ConflictsWithChoices (const SchedulingManager& S, - MachineOpCode opCode); - -static void ChooseInstructionsForDelaySlots(SchedulingManager& S, - const BasicBlock* bb, - SchedGraph* graph); - -static bool NodeCanFillDelaySlot (const SchedulingManager& S, - const SchedGraphNode* node, - const SchedGraphNode* brNode, - bool nodeIsPredecessor); - -static void MarkNodeForDelaySlot (SchedulingManager& S, - SchedGraph* graph, - SchedGraphNode* node, - const SchedGraphNode* brNode, - bool nodeIsPredecessor); - -//************************* Internal Data Types *****************************/ - //---------------------------------------------------------------------- // class InstrGroup: @@ -369,7 +336,7 @@ public: delayedNodeSlotNum = slotNum; } - void scheduleDelayedNode (SchedulingManager& S); + unsigned scheduleDelayedNode (SchedulingManager& S); }; @@ -539,7 +506,7 @@ public: if (createIfMissing) { dinfo = new DelaySlotInfo(bn, - getInstrInfo().getNumDelaySlots(bn->getMachineInstr()->getOpCode())); + getInstrInfo().getNumDelaySlots(bn->getMachineInstr()->getOpCode())); delaySlotInfoForBranches[bn] = dinfo; } else @@ -608,159 +575,63 @@ SchedulingManager::updateEarliestStartTimes(const SchedGraphNode* node, } } -//************************* External Functions *****************************/ - +//************************* Internal Functions *****************************/ -//--------------------------------------------------------------------------- -// Function: ScheduleInstructionsWithSSA -// -// Purpose: -// Entry point for instruction scheduling on SSA form. -// Schedules the machine instructions generated by instruction selection. -// Assumes that register allocation has not been done, i.e., operands -// are still in SSA form. -//--------------------------------------------------------------------------- -bool -ScheduleInstructionsWithSSA(Method* method, - const TargetMachine &target) +static void +AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue) { - SchedGraphSet graphSet(method, target); - - if (SchedDebugLevel >= Sched_PrintSchedGraphs) - { - cout << endl << "*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING" - << endl; - graphSet.dump(); - } - - for (SchedGraphSet::const_iterator GI=graphSet.begin(); - GI != graphSet.end(); ++GI) - { - SchedGraph* graph = (*GI).second; - const vector& bbvec = graph->getBasicBlocks(); - assert(bbvec.size() == 1 && "Cannot schedule multiple basic blocks"); - const BasicBlock* bb = bbvec[0]; - - if (SchedDebugLevel >= Sched_PrintSchedTrace) - cout << endl << "*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n"; - - SchedPriorities schedPrio(method, graph); // expensive! - SchedulingManager S(target, graph, schedPrio); - - ChooseInstructionsForDelaySlots(S, bb, graph); // modifies graph - - ForwardListSchedule(S); // computes schedule in S - - RecordSchedule((*GI).first, S); // records schedule in BB - } + // find the slot to start from, in the current cycle + unsigned int startSlot = 0; + cycles_t curTime = S.getTime(); - if (SchedDebugLevel >= Sched_PrintMachineCode) - { - cout << endl - << "*** Machine instructions after INSTRUCTION SCHEDULING" << endl; - PrintMachineInstructions(method); - } + assert(maxIssue > 0 && maxIssue <= S.nslots - startSlot); - return false; // no reason to fail yet -} - - -// Check minimum gap requirements relative to instructions scheduled in -// previous cycles. -// Note that we do not need to consider `nextEarliestIssueTime' here because -// that is also captured in the earliest start times for each opcode. -// -static inline bool -ViolatesMinimumGap(const SchedulingManager& S, - MachineOpCode opCode, - const cycles_t inCycle) -{ - return (inCycle < S.getEarliestStartTimeForOp(opCode)); -} - - -// Check if the instruction would conflict with instructions already -// chosen for the current cycle -// -static inline bool -ConflictsWithChoices(const SchedulingManager& S, - MachineOpCode opCode) -{ - // Check if the instruction must issue by itself, and some feasible - // choices have already been made for this cycle - if (S.getNumChoices() > 0 && S.schedInfo.isSingleIssue(opCode)) - return true; + // If only one instruction can be issued, do so. + if (maxIssue == 1) + for (unsigned s=startSlot; s < S.nslots; s++) + if (S.getChoicesForSlot(s).size() > 0) + {// found the one instruction + S.scheduleInstr(*S.getChoicesForSlot(s).begin(), s, curTime); + return; + } - // For each class that opCode belongs to, check if there are too many - // instructions of that class. + // Otherwise, choose from the choices for each slot // - const InstrSchedClass sc = S.schedInfo.getSchedClass(opCode); - return (S.getNumChoicesInClass(sc) == S.schedInfo.getMaxIssueForClass(sc)); -} - - -// Check if any issue restrictions would prevent the instruction from -// being issued in the current cycle -// -bool -instrIsFeasible(const SchedulingManager& S, - MachineOpCode opCode) -{ - // skip the instruction if it cannot be issued due to issue restrictions - // caused by previously issued instructions - if (ViolatesMinimumGap(S, opCode, S.getTime())) - return false; - - // skip the instruction if it cannot be issued due to issue restrictions - // caused by previously chosen instructions for the current cycle - if (ConflictsWithChoices(S, opCode)) - return false; - - return true; -} - - -//************************* Internal Functions *****************************/ - - -static void -ForwardListSchedule(SchedulingManager& S) -{ - unsigned N; - const SchedGraphNode* node; - - S.schedPrio.initialize(); + InstrGroup* igroup = S.isched.getIGroup(S.getTime()); + assert(igroup != NULL && "Group creation failed?"); - while ((N = S.schedPrio.getNumReady()) > 0) + // Find a slot that has only a single choice, and take it. + // If all slots have 0 or multiple choices, pick the first slot with + // choices and use its last instruction (just to avoid shifting the vector). + unsigned numIssued; + for (numIssued = 0; numIssued < maxIssue; numIssued++) { - // Choose one group of instructions for a cycle. This will - // advance S.getTime() to the first cycle instructions can be issued. - // It may also schedule delay slot instructions in later cycles, - // but those are ignored here because they are outside the graph. - // - unsigned numIssued = ChooseOneGroup(S); - assert(numIssued > 0 && "Deadlock in list scheduling algorithm?"); - - // Notify the priority manager of scheduled instructions and mark - // any successors that may now be ready - // - const InstrGroup* igroup = S.isched.getIGroup(S.getTime()); - for (unsigned int s=0; s < S.nslots; s++) - if ((node = (*igroup)[s]) != NULL) + int chosenSlot = -1, chosenNodeIndex = -1; + for (unsigned s=startSlot; s < S.nslots; s++) + if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1) { - S.schedPrio.issuedReadyNodeAt(S.getTime(), node); - MarkSuccessorsReady(S, node); + chosenSlot = (int) s; + break; } - // Move to the next the next earliest cycle for which - // an instruction can be issued, or the next earliest in which - // one will be ready, or to the next cycle, whichever is latest. - // - S.updateTime(max(S.getTime() + 1, - max(S.getEarliestIssueTime(), - S.schedPrio.getEarliestReadyTime()))); + if (chosenSlot == -1) + for (unsigned s=startSlot; s < S.nslots; s++) + if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() > 0) + { + chosenSlot = (int) s; + break; + } + + if (chosenSlot != -1) + { // Insert the chosen instr in the chosen slot and + // erase it from all slots. + const SchedGraphNode* node= *S.getChoicesForSlot(chosenSlot).begin(); + S.scheduleInstr(node, chosenSlot, curTime); + } } + + assert(numIssued > 0 && "Should not happen when maxIssue > 0!"); } @@ -809,46 +680,6 @@ RecordSchedule(const BasicBlock* bb, const SchedulingManager& S) } -static unsigned -ChooseOneGroup(SchedulingManager& S) -{ - assert(S.schedPrio.getNumReady() > 0 - && "Don't get here without ready instructions."); - - DelaySlotInfo* getDelaySlotInfo = NULL; - - // Choose up to `nslots' feasible instructions and their possible slots. - unsigned numIssued = FindSlotChoices(S, getDelaySlotInfo); - - while (numIssued == 0) - { - S.updateTime(S.getTime()+1); - numIssued = FindSlotChoices(S, getDelaySlotInfo); - } - - AssignInstructionsToSlots(S, numIssued); - - if (getDelaySlotInfo != NULL) - getDelaySlotInfo->scheduleDelayedNode(S); - - // Print trace of scheduled instructions before newly ready ones - if (SchedDebugLevel >= Sched_PrintSchedTrace) - { - cout << " Cycle " << S.getTime() << " : Scheduled instructions:\n"; - const InstrGroup* igroup = S.isched.getIGroup(S.getTime()); - for (unsigned int s=0; s < S.nslots; s++) - { - cout << " "; - if ((*igroup)[s] != NULL) - cout << * ((*igroup)[s])->getMachineInstr() << endl; - else - cout << "" << endl; - } - } - - return numIssued; -} - static void MarkSuccessorsReady(SchedulingManager& S, const SchedGraphNode* node) @@ -1025,7 +856,7 @@ FindSlotChoices(SchedulingManager& S, { // Try to assign every other instruction to a lower numbered // slot than delayedNodeSlot. - MachineOpCode opCode = S.getChoice(i)->getMachineInstr()->getOpCode(); + MachineOpCode opCode =S.getChoice(i)->getMachineInstr()->getOpCode(); bool noSlotFound = true; unsigned int s; for (s=startSlot; s < delayedNodeSlot; s++) @@ -1093,7 +924,7 @@ FindSlotChoices(SchedulingManager& S, for (unsigned i=0; i < S.getNumChoices() && i < indexForBreakingNode; i++) { - MachineOpCode opCode = S.getChoice(i)->getMachineInstr()->getOpCode(); + MachineOpCode opCode =S.getChoice(i)->getMachineInstr()->getOpCode(); // If a higher priority instruction cannot be assigned to // any earlier slots, don't schedule the breaking instruction. @@ -1144,63 +975,95 @@ FindSlotChoices(SchedulingManager& S, } -static void -AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue) +static unsigned +ChooseOneGroup(SchedulingManager& S) { - // find the slot to start from, in the current cycle - unsigned int startSlot = 0; - cycles_t curTime = S.getTime(); - - assert(maxIssue > 0 && maxIssue <= S.nslots - startSlot); + assert(S.schedPrio.getNumReady() > 0 + && "Don't get here without ready instructions."); - // If only one instruction can be issued, do so. - if (maxIssue == 1) - for (unsigned s=startSlot; s < S.nslots; s++) - if (S.getChoicesForSlot(s).size() > 0) - {// found the one instruction - S.scheduleInstr(*S.getChoicesForSlot(s).begin(), s, curTime); - return; - } + cycles_t firstCycle = S.getTime(); + DelaySlotInfo* getDelaySlotInfo = NULL; - // Otherwise, choose from the choices for each slot - // - InstrGroup* igroup = S.isched.getIGroup(S.getTime()); - assert(igroup != NULL && "Group creation failed?"); + // Choose up to `nslots' feasible instructions and their possible slots. + unsigned numIssued = FindSlotChoices(S, getDelaySlotInfo); - // Find a slot that has only a single choice, and take it. - // If all slots have 0 or multiple choices, pick the first slot with - // choices and use its last instruction (just to avoid shifting the vector). - unsigned numIssued; - for (numIssued = 0; numIssued < maxIssue; numIssued++) + while (numIssued == 0) { - int chosenSlot = -1, chosenNodeIndex = -1; - for (unsigned s=startSlot; s < S.nslots; s++) - if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1) - { - chosenSlot = (int) s; - break; - } - - if (chosenSlot == -1) - for (unsigned s=startSlot; s < S.nslots; s++) - if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() > 0) - { - chosenSlot = (int) s; - break; - } - - if (chosenSlot != -1) - { // Insert the chosen instr in the chosen slot and - // erase it from all slots. - const SchedGraphNode* node= *S.getChoicesForSlot(chosenSlot).begin(); - S.scheduleInstr(node, chosenSlot, curTime); - } + S.updateTime(S.getTime()+1); + numIssued = FindSlotChoices(S, getDelaySlotInfo); } - assert(numIssued > 0 && "Should not happen when maxIssue > 0!"); + AssignInstructionsToSlots(S, numIssued); + + if (getDelaySlotInfo != NULL) + numIssued += getDelaySlotInfo->scheduleDelayedNode(S); + + // Print trace of scheduled instructions before newly ready ones + if (SchedDebugLevel >= Sched_PrintSchedTrace) + { + for (cycles_t c = firstCycle; c <= S.getTime(); c++) + { + cout << " Cycle " << c << " : Scheduled instructions:\n"; + const InstrGroup* igroup = S.isched.getIGroup(c); + for (unsigned int s=0; s < S.nslots; s++) + { + cout << " "; + if ((*igroup)[s] != NULL) + cout << * ((*igroup)[s])->getMachineInstr() << endl; + else + cout << "" << endl; + } + } + } + + return numIssued; } +static void +ForwardListSchedule(SchedulingManager& S) +{ + unsigned N; + const SchedGraphNode* node; + + S.schedPrio.initialize(); + + while ((N = S.schedPrio.getNumReady()) > 0) + { + cycles_t nextCycle = S.getTime(); + + // Choose one group of instructions for a cycle, plus any delay slot + // instructions (which may overflow into successive cycles). + // This will advance S.getTime() to the last cycle in which + // instructions are actually issued. + // + unsigned numIssued = ChooseOneGroup(S); + assert(numIssued > 0 && "Deadlock in list scheduling algorithm?"); + + // Notify the priority manager of scheduled instructions and mark + // any successors that may now be ready + // + for (cycles_t c = nextCycle; c <= S.getTime(); c++) + { + const InstrGroup* igroup = S.isched.getIGroup(c); + for (unsigned int s=0; s < S.nslots; s++) + if ((node = (*igroup)[s]) != NULL) + { + S.schedPrio.issuedReadyNodeAt(S.getTime(), node); + MarkSuccessorsReady(S, node); + } + } + + // Move to the next the next earliest cycle for which + // an instruction can be issued, or the next earliest in which + // one will be ready, or to the next cycle, whichever is latest. + // + S.updateTime(max(S.getTime() + 1, + max(S.getEarliestIssueTime(), + S.schedPrio.getEarliestReadyTime()))); + } +} + //--------------------------------------------------------------------- // Code for filling delay slots for delayed terminator instructions @@ -1211,107 +1074,7 @@ AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue) // when we cannot find single-cycle instructions that can be reordered. //---------------------------------------------------------------------- -static void -ChooseInstructionsForDelaySlots(SchedulingManager& S, - const BasicBlock* bb, - SchedGraph* graph) -{ - // Look for instructions that can be used for delay slots. - // Remove them from the graph, and mark them to be used for delay slots. - const MachineInstrInfo& mii = S.getInstrInfo(); - const TerminatorInst* term = bb->getTerminator(); - MachineCodeForVMInstr& termMvec = term->getMachineInstrVec(); - - // Find the first branch instr in the sequence of machine instrs for term - // - unsigned first = 0; - while (! mii.isBranch(termMvec[first]->getOpCode())) - ++first; - assert(first < termMvec.size() && - "No branch instructions for BR? Ok, but weird! Delete assertion."); - if (first == termMvec.size()) - return; - - SchedGraphNode* brNode = graph->getGraphNodeForInstr(termMvec[first]); - assert(! mii.isCall(brNode->getMachineInstr()->getOpCode()) && "Call used as terminator?"); - - unsigned ndelays = mii.getNumDelaySlots(brNode->getMachineInstr()->getOpCode()); - if (ndelays == 0) - return; - - // Use vectors to remember the nodes chosen for delay slots, and the - // NOPs that will be unused. We cannot remove them from the graph while - // walking through the preds and succs of the brNode here, so we - // remember the nodes in the vectors and remove them later. - // We use separate vectors for the single-cycle and multi-cycle nodes, - // so that we can give preference to single-cycle nodes. - // - vector sdelayNodeVec; - vector mdelayNodeVec; - vector nopNodeVec; - unsigned numUseful = 0; - - sdelayNodeVec.reserve(ndelays); - - for (sg_pred_iterator P = pred_begin(brNode); - P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P) - if (! (*P)->isDummyNode() && - ! mii.isNop((*P)->getMachineInstr()->getOpCode()) && - NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true)) - { - ++numUseful; - if (mii.maxLatency((*P)->getMachineInstr()->getOpCode()) > 1) - mdelayNodeVec.push_back(*P); - else - sdelayNodeVec.push_back(*P); - } - - // If not enough single-cycle instructions were found, select the - // lowest-latency multi-cycle instructions and use them. - // Note that this is the most efficient code when only 1 (or even 2) - // values need to be selected. - // - while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0) - { - unsigned latency; - unsigned minLatency = mii.maxLatency(mdelayNodeVec[0]->getMachineInstr()->getOpCode()); - unsigned minIndex = 0; - for (unsigned i=1; i < mdelayNodeVec.size(); i++) - if (minLatency >= - (latency = mii.maxLatency(mdelayNodeVec[i]->getMachineInstr()->getOpCode()))) - { - minLatency = latency; - minIndex = i; - } - sdelayNodeVec.push_back(mdelayNodeVec[minIndex]); - if (sdelayNodeVec.size() < ndelays) // avoid the last erase! - mdelayNodeVec.erase(mdelayNodeVec.begin() + minIndex); - } - - // Now, remove the NOPs currently in delay slots from the graph. - // If not enough useful instructions were found, use the NOPs to - // fill delay slots, otherwise, just discard them. - for (sg_succ_iterator I=succ_begin(brNode); I != succ_end(brNode); ++I) - if (! (*I)->isDummyNode() - && mii.isNop((*I)->getMachineInstr()->getOpCode())) - { - if (sdelayNodeVec.size() < ndelays) - sdelayNodeVec.push_back(*I); - else - nopNodeVec.push_back(*I); - } - - // Mark the nodes chosen for delay slots. This removes them from the graph. - for (unsigned i=0; i < sdelayNodeVec.size(); i++) - MarkNodeForDelaySlot(S, graph, sdelayNodeVec[i], brNode, true); - - // And remove the unused NOPs from the graph. - for (unsigned i=0; i < nopNodeVec.size(); i++) - graph->eraseIncidentEdges(nopNodeVec[i], /*addDummyEdges*/ true); -} - - -bool +static bool NodeCanFillDelaySlot(const SchedulingManager& S, const SchedGraphNode* node, const SchedGraphNode* brNode, @@ -1367,7 +1130,7 @@ NodeCanFillDelaySlot(const SchedulingManager& S, } -void +static void MarkNodeForDelaySlot(SchedulingManager& S, SchedGraph* graph, SchedGraphNode* node, @@ -1391,10 +1154,173 @@ MarkNodeForDelaySlot(SchedulingManager& S, } +void +FindUsefulInstructionsForDelaySlots(SchedulingManager& S, + SchedGraphNode* brNode, + vector& sdelayNodeVec) +{ + const MachineInstrInfo& mii = S.getInstrInfo(); + unsigned ndelays = + mii.getNumDelaySlots(brNode->getMachineInstr()->getOpCode()); + + if (ndelays == 0) + return; + + sdelayNodeVec.reserve(ndelays); + + // Use a separate vector to hold the feasible multi-cycle nodes. + // These will be used if not enough single-cycle nodes are found. + // + vector mdelayNodeVec; + + for (sg_pred_iterator P = pred_begin(brNode); + P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P) + if (! (*P)->isDummyNode() && + ! mii.isNop((*P)->getMachineInstr()->getOpCode()) && + NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true)) + { + if (mii.maxLatency((*P)->getMachineInstr()->getOpCode()) > 1) + mdelayNodeVec.push_back(*P); + else + sdelayNodeVec.push_back(*P); + } + + // If not enough single-cycle instructions were found, select the + // lowest-latency multi-cycle instructions and use them. + // Note that this is the most efficient code when only 1 (or even 2) + // values need to be selected. + // + while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0) + { + unsigned lmin = + mii.maxLatency(mdelayNodeVec[0]->getMachineInstr()->getOpCode()); + unsigned minIndex = 0; + for (unsigned i=1; i < mdelayNodeVec.size(); i++) + { + unsigned li = + mii.maxLatency(mdelayNodeVec[i]->getMachineInstr()->getOpCode()); + if (lmin >= li) + { + lmin = li; + minIndex = i; + } + } + sdelayNodeVec.push_back(mdelayNodeVec[minIndex]); + if (sdelayNodeVec.size() < ndelays) // avoid the last erase! + mdelayNodeVec.erase(mdelayNodeVec.begin() + minIndex); + } +} + + +// Remove the NOPs currently in delay slots from the graph. +// Mark instructions specified in sdelayNodeVec to replace them. +// If not enough useful instructions were found, mark the NOPs to be used +// for filling delay slots, otherwise, otherwise just discard them. +// +void +ReplaceNopsWithUsefulInstr(SchedulingManager& S, + SchedGraphNode* node, + vector sdelayNodeVec, + SchedGraph* graph) +{ + vector nopNodeVec; + const MachineInstrInfo& mii = S.getInstrInfo(); + unsigned ndelays= mii.getNumDelaySlots(node->getMachineInstr()->getOpCode()); + assert(ndelays > 0 && "Unnecessary call to replace NOPs"); + + // Remove the NOPs currently in delay slots from the graph. + // If not enough useful instructions were found, use the NOPs to + // fill delay slots, otherwise, just discard them. + for (sg_succ_iterator I=succ_begin(node); I != succ_end(node); ++I) + if (! (*I)->isDummyNode() + && mii.isNop((*I)->getMachineInstr()->getOpCode())) + { + if (sdelayNodeVec.size() < ndelays) + sdelayNodeVec.push_back(*I); + else + nopNodeVec.push_back(*I); + } + assert(sdelayNodeVec.size() == ndelays); + + // Mark the nodes chosen for delay slots. This removes them from the graph. + for (unsigned i=0; i < sdelayNodeVec.size(); i++) + MarkNodeForDelaySlot(S, graph, sdelayNodeVec[i], node, true); + + // And remove the unused NOPs from the graph. + for (unsigned i=0; i < nopNodeVec.size(); i++) + graph->eraseIncidentEdges(nopNodeVec[i], /*addDummyEdges*/ true); +} + + +// For all delayed instructions, choose instructions to put in the delay +// slots and pull those out of the graph. Mark them for the delay slots +// in the DelaySlotInfo object for that graph node. If no useful work +// is found for a delay slot, use the NOP that is currently in that slot. +// +// We try to fill the delay slots with useful work for all instructions +// except CALLs. For CALLs, it is nearly always possible to use one of the +// call sequence instrs and putting anything else in the delay slot could be +// suboptimal. +// +static void +ChooseInstructionsForDelaySlots(SchedulingManager& S, + const BasicBlock* bb, + SchedGraph* graph) +{ + const MachineInstrInfo& mii = S.getInstrInfo(); + const TerminatorInst* termInstr = bb->getTerminator(); + MachineCodeForVMInstr& termMvec = termInstr->getMachineInstrVec(); + vector delayNodeVec; + const MachineInstr* brInstr; + + assert(termInstr->getOpcode() != Instruction::Call + && "Call used as terminator?"); + + // To find instructions that need delay slots without searching the entire + // machine code, we assume the only delayed instructions are CALLs or + // instructions generated for the terminator inst. + // Find the first branch instr in the sequence of machine instrs for term + // + unsigned first = 0; + while (first < termMvec.size() && + ! mii.isBranch(termMvec[first]->getOpCode())) + { + ++first; + } + assert(first < termMvec.size() && + "No branch instructions for BR? Ok, but weird! Delete assertion."); + + brInstr = (first < termMvec.size())? termMvec[first] : NULL; + + // Compute a vector of the nodes chosen for delay slots and then + // mark delay slots to replace NOPs with these useful instructions. + // + if (brInstr != NULL) + { + SchedGraphNode* brNode = graph->getGraphNodeForInstr(brInstr); + FindUsefulInstructionsForDelaySlots(S, brNode, delayNodeVec); + ReplaceNopsWithUsefulInstr(S, brNode, delayNodeVec, graph); + } + + // Also mark delay slots for other delayed instructions to hold NOPs. + // Simply passing in an empty delayNodeVec will have this effect. + // + delayNodeVec.clear(); + const MachineCodeForBasicBlock& bbMvec = bb->getMachineInstrVec(); + for (unsigned i=0; i < bbMvec.size(); i++) + if (bbMvec[i] != brInstr && + mii.getNumDelaySlots(bbMvec[i]->getOpCode()) > 0) + { + SchedGraphNode* node = graph->getGraphNodeForInstr(bbMvec[i]); + ReplaceNopsWithUsefulInstr(S, node, delayNodeVec, graph); + } +} + + // // Schedule the delayed branch and its delay slots // -void +unsigned DelaySlotInfo::scheduleDelayedNode(SchedulingManager& S) { assert(delayedNodeSlotNum < S.nslots && "Illegal slot for branch"); @@ -1461,5 +1387,130 @@ DelaySlotInfo::scheduleDelayedNode(SchedulingManager& S) S.scheduleInstr(delayNodeVec[i], nextSlot, nextTime); break; } + + return 1 + ndelays; +} + + +// Check if the instruction would conflict with instructions already +// chosen for the current cycle +// +static inline bool +ConflictsWithChoices(const SchedulingManager& S, + MachineOpCode opCode) +{ + // Check if the instruction must issue by itself, and some feasible + // choices have already been made for this cycle + if (S.getNumChoices() > 0 && S.schedInfo.isSingleIssue(opCode)) + return true; + + // For each class that opCode belongs to, check if there are too many + // instructions of that class. + // + const InstrSchedClass sc = S.schedInfo.getSchedClass(opCode); + return (S.getNumChoicesInClass(sc) == S.schedInfo.getMaxIssueForClass(sc)); } + +//************************* External Functions *****************************/ + + +//--------------------------------------------------------------------------- +// Function: ViolatesMinimumGap +// +// Purpose: +// Check minimum gap requirements relative to instructions scheduled in +// previous cycles. +// Note that we do not need to consider `nextEarliestIssueTime' here because +// that is also captured in the earliest start times for each opcode. +//--------------------------------------------------------------------------- + +static inline bool +ViolatesMinimumGap(const SchedulingManager& S, + MachineOpCode opCode, + const cycles_t inCycle) +{ + return (inCycle < S.getEarliestStartTimeForOp(opCode)); +} + + +//--------------------------------------------------------------------------- +// Function: instrIsFeasible +// +// Purpose: +// Check if any issue restrictions would prevent the instruction from +// being issued in the current cycle +//--------------------------------------------------------------------------- + +bool +instrIsFeasible(const SchedulingManager& S, + MachineOpCode opCode) +{ + // skip the instruction if it cannot be issued due to issue restrictions + // caused by previously issued instructions + if (ViolatesMinimumGap(S, opCode, S.getTime())) + return false; + + // skip the instruction if it cannot be issued due to issue restrictions + // caused by previously chosen instructions for the current cycle + if (ConflictsWithChoices(S, opCode)) + return false; + + return true; +} + +//--------------------------------------------------------------------------- +// Function: ScheduleInstructionsWithSSA +// +// Purpose: +// Entry point for instruction scheduling on SSA form. +// Schedules the machine instructions generated by instruction selection. +// Assumes that register allocation has not been done, i.e., operands +// are still in SSA form. +//--------------------------------------------------------------------------- + +bool +ScheduleInstructionsWithSSA(Method* method, + const TargetMachine &target) +{ + SchedGraphSet graphSet(method, target); + + if (SchedDebugLevel >= Sched_PrintSchedGraphs) + { + cout << endl << "*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING" + << endl; + graphSet.dump(); + } + + for (SchedGraphSet::const_iterator GI=graphSet.begin(); + GI != graphSet.end(); ++GI) + { + SchedGraph* graph = (*GI).second; + const vector& bbvec = graph->getBasicBlocks(); + assert(bbvec.size() == 1 && "Cannot schedule multiple basic blocks"); + const BasicBlock* bb = bbvec[0]; + + if (SchedDebugLevel >= Sched_PrintSchedTrace) + cout << endl << "*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n"; + + SchedPriorities schedPrio(method, graph); // expensive! + SchedulingManager S(target, graph, schedPrio); + + ChooseInstructionsForDelaySlots(S, bb, graph); // modifies graph + + ForwardListSchedule(S); // computes schedule in S + + RecordSchedule((*GI).first, S); // records schedule in BB + } + + if (SchedDebugLevel >= Sched_PrintMachineCode) + { + cout << endl + << "*** Machine instructions after INSTRUCTION SCHEDULING" << endl; + PrintMachineInstructions(method); + } + + return false; // no reason to fail yet +} + + -- 2.34.1