// 7/23/01 - Vikram Adve - Created
//**************************************************************************/
+
+//************************* User Include Files *****************************/
+
#include "llvm/CodeGen/InstrScheduling.h"
#include "SchedPriorities.h"
#include "llvm/Analysis/LiveVar/BBLiveVar.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Instruction.h"
+
+
+//************************ System Include Files *****************************/
+
#include <hash_set>
#include <algorithm>
#include <iterator>
+
+//************************* External Data Types *****************************/
+
cl::Enum<enum SchedDebugLevel_t> SchedDebugLevel("dsched", cl::NoFlags,
"enable instruction scheduling debugging information",
clEnumValN(Sched_NoDebugInfo, "n", "disable debug output"),
clEnumValN(Sched_PrintSchedGraphs, "g", "print scheduling graphs"), 0);
+//************************* Internal Data Types *****************************/
+
class InstrSchedule;
class SchedulingManager;
class DelaySlotInfo;
-static void ForwardListSchedule (SchedulingManager& S);
-
-static void RecordSchedule (const BasicBlock* bb,
- const SchedulingManager& S);
-
-static unsigned ChooseOneGroup (SchedulingManager& S);
-
-static void MarkSuccessorsReady (SchedulingManager& S,
- const SchedGraphNode* node);
-
-static unsigned FindSlotChoices (SchedulingManager& S,
- DelaySlotInfo*& getDelaySlotInfo);
-
-static void AssignInstructionsToSlots(class SchedulingManager& S,
- unsigned maxIssue);
-
-static void ScheduleInstr (class SchedulingManager& S,
- const SchedGraphNode* node,
- unsigned int slotNum,
- cycles_t curTime);
-
-static bool ViolatesMinimumGap (const SchedulingManager& S,
- MachineOpCode opCode,
- const cycles_t inCycle);
-
-static bool ConflictsWithChoices (const SchedulingManager& S,
- MachineOpCode opCode);
-
-static void ChooseInstructionsForDelaySlots(SchedulingManager& S,
- const BasicBlock* bb,
- SchedGraph* graph);
-
-static bool NodeCanFillDelaySlot (const SchedulingManager& S,
- const SchedGraphNode* node,
- const SchedGraphNode* brNode,
- bool nodeIsPredecessor);
-
-static void MarkNodeForDelaySlot (SchedulingManager& S,
- SchedGraph* graph,
- SchedGraphNode* node,
- const SchedGraphNode* brNode,
- bool nodeIsPredecessor);
-
-//************************* Internal Data Types *****************************/
-
//----------------------------------------------------------------------
// class InstrGroup:
delayedNodeSlotNum = slotNum;
}
- void scheduleDelayedNode (SchedulingManager& S);
+ unsigned scheduleDelayedNode (SchedulingManager& S);
};
if (createIfMissing)
{
dinfo = new DelaySlotInfo(bn,
- getInstrInfo().getNumDelaySlots(bn->getMachineInstr()->getOpCode()));
+ getInstrInfo().getNumDelaySlots(bn->getMachineInstr()->getOpCode()));
delaySlotInfoForBranches[bn] = dinfo;
}
else
}
}
-//************************* External Functions *****************************/
-
+//************************* Internal Functions *****************************/
-//---------------------------------------------------------------------------
-// Function: ScheduleInstructionsWithSSA
-//
-// Purpose:
-// Entry point for instruction scheduling on SSA form.
-// Schedules the machine instructions generated by instruction selection.
-// Assumes that register allocation has not been done, i.e., operands
-// are still in SSA form.
-//---------------------------------------------------------------------------
-bool
-ScheduleInstructionsWithSSA(Method* method,
- const TargetMachine &target)
+static void
+AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue)
{
- SchedGraphSet graphSet(method, target);
-
- if (SchedDebugLevel >= Sched_PrintSchedGraphs)
- {
- cout << endl << "*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING"
- << endl;
- graphSet.dump();
- }
-
- for (SchedGraphSet::const_iterator GI=graphSet.begin();
- GI != graphSet.end(); ++GI)
- {
- SchedGraph* graph = (*GI).second;
- const vector<const BasicBlock*>& bbvec = graph->getBasicBlocks();
- assert(bbvec.size() == 1 && "Cannot schedule multiple basic blocks");
- const BasicBlock* bb = bbvec[0];
-
- if (SchedDebugLevel >= Sched_PrintSchedTrace)
- cout << endl << "*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n";
-
- SchedPriorities schedPrio(method, graph); // expensive!
- SchedulingManager S(target, graph, schedPrio);
-
- ChooseInstructionsForDelaySlots(S, bb, graph); // modifies graph
-
- ForwardListSchedule(S); // computes schedule in S
-
- RecordSchedule((*GI).first, S); // records schedule in BB
- }
+ // find the slot to start from, in the current cycle
+ unsigned int startSlot = 0;
+ cycles_t curTime = S.getTime();
- if (SchedDebugLevel >= Sched_PrintMachineCode)
- {
- cout << endl
- << "*** Machine instructions after INSTRUCTION SCHEDULING" << endl;
- PrintMachineInstructions(method);
- }
+ assert(maxIssue > 0 && maxIssue <= S.nslots - startSlot);
- return false; // no reason to fail yet
-}
-
-
-// Check minimum gap requirements relative to instructions scheduled in
-// previous cycles.
-// Note that we do not need to consider `nextEarliestIssueTime' here because
-// that is also captured in the earliest start times for each opcode.
-//
-static inline bool
-ViolatesMinimumGap(const SchedulingManager& S,
- MachineOpCode opCode,
- const cycles_t inCycle)
-{
- return (inCycle < S.getEarliestStartTimeForOp(opCode));
-}
-
-
-// Check if the instruction would conflict with instructions already
-// chosen for the current cycle
-//
-static inline bool
-ConflictsWithChoices(const SchedulingManager& S,
- MachineOpCode opCode)
-{
- // Check if the instruction must issue by itself, and some feasible
- // choices have already been made for this cycle
- if (S.getNumChoices() > 0 && S.schedInfo.isSingleIssue(opCode))
- return true;
+ // If only one instruction can be issued, do so.
+ if (maxIssue == 1)
+ for (unsigned s=startSlot; s < S.nslots; s++)
+ if (S.getChoicesForSlot(s).size() > 0)
+ {// found the one instruction
+ S.scheduleInstr(*S.getChoicesForSlot(s).begin(), s, curTime);
+ return;
+ }
- // For each class that opCode belongs to, check if there are too many
- // instructions of that class.
+ // Otherwise, choose from the choices for each slot
//
- const InstrSchedClass sc = S.schedInfo.getSchedClass(opCode);
- return (S.getNumChoicesInClass(sc) == S.schedInfo.getMaxIssueForClass(sc));
-}
-
-
-// Check if any issue restrictions would prevent the instruction from
-// being issued in the current cycle
-//
-bool
-instrIsFeasible(const SchedulingManager& S,
- MachineOpCode opCode)
-{
- // skip the instruction if it cannot be issued due to issue restrictions
- // caused by previously issued instructions
- if (ViolatesMinimumGap(S, opCode, S.getTime()))
- return false;
-
- // skip the instruction if it cannot be issued due to issue restrictions
- // caused by previously chosen instructions for the current cycle
- if (ConflictsWithChoices(S, opCode))
- return false;
-
- return true;
-}
-
-
-//************************* Internal Functions *****************************/
-
-
-static void
-ForwardListSchedule(SchedulingManager& S)
-{
- unsigned N;
- const SchedGraphNode* node;
-
- S.schedPrio.initialize();
+ InstrGroup* igroup = S.isched.getIGroup(S.getTime());
+ assert(igroup != NULL && "Group creation failed?");
- while ((N = S.schedPrio.getNumReady()) > 0)
+ // Find a slot that has only a single choice, and take it.
+ // If all slots have 0 or multiple choices, pick the first slot with
+ // choices and use its last instruction (just to avoid shifting the vector).
+ unsigned numIssued;
+ for (numIssued = 0; numIssued < maxIssue; numIssued++)
{
- // Choose one group of instructions for a cycle. This will
- // advance S.getTime() to the first cycle instructions can be issued.
- // It may also schedule delay slot instructions in later cycles,
- // but those are ignored here because they are outside the graph.
- //
- unsigned numIssued = ChooseOneGroup(S);
- assert(numIssued > 0 && "Deadlock in list scheduling algorithm?");
-
- // Notify the priority manager of scheduled instructions and mark
- // any successors that may now be ready
- //
- const InstrGroup* igroup = S.isched.getIGroup(S.getTime());
- for (unsigned int s=0; s < S.nslots; s++)
- if ((node = (*igroup)[s]) != NULL)
+ int chosenSlot = -1, chosenNodeIndex = -1;
+ for (unsigned s=startSlot; s < S.nslots; s++)
+ if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1)
{
- S.schedPrio.issuedReadyNodeAt(S.getTime(), node);
- MarkSuccessorsReady(S, node);
+ chosenSlot = (int) s;
+ break;
}
- // Move to the next the next earliest cycle for which
- // an instruction can be issued, or the next earliest in which
- // one will be ready, or to the next cycle, whichever is latest.
- //
- S.updateTime(max(S.getTime() + 1,
- max(S.getEarliestIssueTime(),
- S.schedPrio.getEarliestReadyTime())));
+ if (chosenSlot == -1)
+ for (unsigned s=startSlot; s < S.nslots; s++)
+ if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() > 0)
+ {
+ chosenSlot = (int) s;
+ break;
+ }
+
+ if (chosenSlot != -1)
+ { // Insert the chosen instr in the chosen slot and
+ // erase it from all slots.
+ const SchedGraphNode* node= *S.getChoicesForSlot(chosenSlot).begin();
+ S.scheduleInstr(node, chosenSlot, curTime);
+ }
}
+
+ assert(numIssued > 0 && "Should not happen when maxIssue > 0!");
}
}
-static unsigned
-ChooseOneGroup(SchedulingManager& S)
-{
- assert(S.schedPrio.getNumReady() > 0
- && "Don't get here without ready instructions.");
-
- DelaySlotInfo* getDelaySlotInfo = NULL;
-
- // Choose up to `nslots' feasible instructions and their possible slots.
- unsigned numIssued = FindSlotChoices(S, getDelaySlotInfo);
-
- while (numIssued == 0)
- {
- S.updateTime(S.getTime()+1);
- numIssued = FindSlotChoices(S, getDelaySlotInfo);
- }
-
- AssignInstructionsToSlots(S, numIssued);
-
- if (getDelaySlotInfo != NULL)
- getDelaySlotInfo->scheduleDelayedNode(S);
-
- // Print trace of scheduled instructions before newly ready ones
- if (SchedDebugLevel >= Sched_PrintSchedTrace)
- {
- cout << " Cycle " << S.getTime() << " : Scheduled instructions:\n";
- const InstrGroup* igroup = S.isched.getIGroup(S.getTime());
- for (unsigned int s=0; s < S.nslots; s++)
- {
- cout << " ";
- if ((*igroup)[s] != NULL)
- cout << * ((*igroup)[s])->getMachineInstr() << endl;
- else
- cout << "<none>" << endl;
- }
- }
-
- return numIssued;
-}
-
static void
MarkSuccessorsReady(SchedulingManager& S, const SchedGraphNode* node)
{
// Try to assign every other instruction to a lower numbered
// slot than delayedNodeSlot.
- MachineOpCode opCode = S.getChoice(i)->getMachineInstr()->getOpCode();
+ MachineOpCode opCode =S.getChoice(i)->getMachineInstr()->getOpCode();
bool noSlotFound = true;
unsigned int s;
for (s=startSlot; s < delayedNodeSlot; s++)
for (unsigned i=0;
i < S.getNumChoices() && i < indexForBreakingNode; i++)
{
- MachineOpCode opCode = S.getChoice(i)->getMachineInstr()->getOpCode();
+ MachineOpCode opCode =S.getChoice(i)->getMachineInstr()->getOpCode();
// If a higher priority instruction cannot be assigned to
// any earlier slots, don't schedule the breaking instruction.
}
-static void
-AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue)
+static unsigned
+ChooseOneGroup(SchedulingManager& S)
{
- // find the slot to start from, in the current cycle
- unsigned int startSlot = 0;
- cycles_t curTime = S.getTime();
-
- assert(maxIssue > 0 && maxIssue <= S.nslots - startSlot);
+ assert(S.schedPrio.getNumReady() > 0
+ && "Don't get here without ready instructions.");
- // If only one instruction can be issued, do so.
- if (maxIssue == 1)
- for (unsigned s=startSlot; s < S.nslots; s++)
- if (S.getChoicesForSlot(s).size() > 0)
- {// found the one instruction
- S.scheduleInstr(*S.getChoicesForSlot(s).begin(), s, curTime);
- return;
- }
+ cycles_t firstCycle = S.getTime();
+ DelaySlotInfo* getDelaySlotInfo = NULL;
- // Otherwise, choose from the choices for each slot
- //
- InstrGroup* igroup = S.isched.getIGroup(S.getTime());
- assert(igroup != NULL && "Group creation failed?");
+ // Choose up to `nslots' feasible instructions and their possible slots.
+ unsigned numIssued = FindSlotChoices(S, getDelaySlotInfo);
- // Find a slot that has only a single choice, and take it.
- // If all slots have 0 or multiple choices, pick the first slot with
- // choices and use its last instruction (just to avoid shifting the vector).
- unsigned numIssued;
- for (numIssued = 0; numIssued < maxIssue; numIssued++)
+ while (numIssued == 0)
{
- int chosenSlot = -1, chosenNodeIndex = -1;
- for (unsigned s=startSlot; s < S.nslots; s++)
- if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1)
- {
- chosenSlot = (int) s;
- break;
- }
-
- if (chosenSlot == -1)
- for (unsigned s=startSlot; s < S.nslots; s++)
- if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() > 0)
- {
- chosenSlot = (int) s;
- break;
- }
-
- if (chosenSlot != -1)
- { // Insert the chosen instr in the chosen slot and
- // erase it from all slots.
- const SchedGraphNode* node= *S.getChoicesForSlot(chosenSlot).begin();
- S.scheduleInstr(node, chosenSlot, curTime);
- }
+ S.updateTime(S.getTime()+1);
+ numIssued = FindSlotChoices(S, getDelaySlotInfo);
}
- assert(numIssued > 0 && "Should not happen when maxIssue > 0!");
+ AssignInstructionsToSlots(S, numIssued);
+
+ if (getDelaySlotInfo != NULL)
+ numIssued += getDelaySlotInfo->scheduleDelayedNode(S);
+
+ // Print trace of scheduled instructions before newly ready ones
+ if (SchedDebugLevel >= Sched_PrintSchedTrace)
+ {
+ for (cycles_t c = firstCycle; c <= S.getTime(); c++)
+ {
+ cout << " Cycle " << c << " : Scheduled instructions:\n";
+ const InstrGroup* igroup = S.isched.getIGroup(c);
+ for (unsigned int s=0; s < S.nslots; s++)
+ {
+ cout << " ";
+ if ((*igroup)[s] != NULL)
+ cout << * ((*igroup)[s])->getMachineInstr() << endl;
+ else
+ cout << "<none>" << endl;
+ }
+ }
+ }
+
+ return numIssued;
}
+static void
+ForwardListSchedule(SchedulingManager& S)
+{
+ unsigned N;
+ const SchedGraphNode* node;
+
+ S.schedPrio.initialize();
+
+ while ((N = S.schedPrio.getNumReady()) > 0)
+ {
+ cycles_t nextCycle = S.getTime();
+
+ // Choose one group of instructions for a cycle, plus any delay slot
+ // instructions (which may overflow into successive cycles).
+ // This will advance S.getTime() to the last cycle in which
+ // instructions are actually issued.
+ //
+ unsigned numIssued = ChooseOneGroup(S);
+ assert(numIssued > 0 && "Deadlock in list scheduling algorithm?");
+
+ // Notify the priority manager of scheduled instructions and mark
+ // any successors that may now be ready
+ //
+ for (cycles_t c = nextCycle; c <= S.getTime(); c++)
+ {
+ const InstrGroup* igroup = S.isched.getIGroup(c);
+ for (unsigned int s=0; s < S.nslots; s++)
+ if ((node = (*igroup)[s]) != NULL)
+ {
+ S.schedPrio.issuedReadyNodeAt(S.getTime(), node);
+ MarkSuccessorsReady(S, node);
+ }
+ }
+
+ // Move to the next the next earliest cycle for which
+ // an instruction can be issued, or the next earliest in which
+ // one will be ready, or to the next cycle, whichever is latest.
+ //
+ S.updateTime(max(S.getTime() + 1,
+ max(S.getEarliestIssueTime(),
+ S.schedPrio.getEarliestReadyTime())));
+ }
+}
+
//---------------------------------------------------------------------
// Code for filling delay slots for delayed terminator instructions
// when we cannot find single-cycle instructions that can be reordered.
//----------------------------------------------------------------------
-static void
-ChooseInstructionsForDelaySlots(SchedulingManager& S,
- const BasicBlock* bb,
- SchedGraph* graph)
-{
- // Look for instructions that can be used for delay slots.
- // Remove them from the graph, and mark them to be used for delay slots.
- const MachineInstrInfo& mii = S.getInstrInfo();
- const TerminatorInst* term = bb->getTerminator();
- MachineCodeForVMInstr& termMvec = term->getMachineInstrVec();
-
- // Find the first branch instr in the sequence of machine instrs for term
- //
- unsigned first = 0;
- while (! mii.isBranch(termMvec[first]->getOpCode()))
- ++first;
- assert(first < termMvec.size() &&
- "No branch instructions for BR? Ok, but weird! Delete assertion.");
- if (first == termMvec.size())
- return;
-
- SchedGraphNode* brNode = graph->getGraphNodeForInstr(termMvec[first]);
- assert(! mii.isCall(brNode->getMachineInstr()->getOpCode()) && "Call used as terminator?");
-
- unsigned ndelays = mii.getNumDelaySlots(brNode->getMachineInstr()->getOpCode());
- if (ndelays == 0)
- return;
-
- // Use vectors to remember the nodes chosen for delay slots, and the
- // NOPs that will be unused. We cannot remove them from the graph while
- // walking through the preds and succs of the brNode here, so we
- // remember the nodes in the vectors and remove them later.
- // We use separate vectors for the single-cycle and multi-cycle nodes,
- // so that we can give preference to single-cycle nodes.
- //
- vector<SchedGraphNode*> sdelayNodeVec;
- vector<SchedGraphNode*> mdelayNodeVec;
- vector<SchedGraphNode*> nopNodeVec;
- unsigned numUseful = 0;
-
- sdelayNodeVec.reserve(ndelays);
-
- for (sg_pred_iterator P = pred_begin(brNode);
- P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P)
- if (! (*P)->isDummyNode() &&
- ! mii.isNop((*P)->getMachineInstr()->getOpCode()) &&
- NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true))
- {
- ++numUseful;
- if (mii.maxLatency((*P)->getMachineInstr()->getOpCode()) > 1)
- mdelayNodeVec.push_back(*P);
- else
- sdelayNodeVec.push_back(*P);
- }
-
- // If not enough single-cycle instructions were found, select the
- // lowest-latency multi-cycle instructions and use them.
- // Note that this is the most efficient code when only 1 (or even 2)
- // values need to be selected.
- //
- while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0)
- {
- unsigned latency;
- unsigned minLatency = mii.maxLatency(mdelayNodeVec[0]->getMachineInstr()->getOpCode());
- unsigned minIndex = 0;
- for (unsigned i=1; i < mdelayNodeVec.size(); i++)
- if (minLatency >=
- (latency = mii.maxLatency(mdelayNodeVec[i]->getMachineInstr()->getOpCode())))
- {
- minLatency = latency;
- minIndex = i;
- }
- sdelayNodeVec.push_back(mdelayNodeVec[minIndex]);
- if (sdelayNodeVec.size() < ndelays) // avoid the last erase!
- mdelayNodeVec.erase(mdelayNodeVec.begin() + minIndex);
- }
-
- // Now, remove the NOPs currently in delay slots from the graph.
- // If not enough useful instructions were found, use the NOPs to
- // fill delay slots, otherwise, just discard them.
- for (sg_succ_iterator I=succ_begin(brNode); I != succ_end(brNode); ++I)
- if (! (*I)->isDummyNode()
- && mii.isNop((*I)->getMachineInstr()->getOpCode()))
- {
- if (sdelayNodeVec.size() < ndelays)
- sdelayNodeVec.push_back(*I);
- else
- nopNodeVec.push_back(*I);
- }
-
- // Mark the nodes chosen for delay slots. This removes them from the graph.
- for (unsigned i=0; i < sdelayNodeVec.size(); i++)
- MarkNodeForDelaySlot(S, graph, sdelayNodeVec[i], brNode, true);
-
- // And remove the unused NOPs from the graph.
- for (unsigned i=0; i < nopNodeVec.size(); i++)
- graph->eraseIncidentEdges(nopNodeVec[i], /*addDummyEdges*/ true);
-}
-
-
-bool
+static bool
NodeCanFillDelaySlot(const SchedulingManager& S,
const SchedGraphNode* node,
const SchedGraphNode* brNode,
}
-void
+static void
MarkNodeForDelaySlot(SchedulingManager& S,
SchedGraph* graph,
SchedGraphNode* node,
}
+void
+FindUsefulInstructionsForDelaySlots(SchedulingManager& S,
+ SchedGraphNode* brNode,
+ vector<SchedGraphNode*>& sdelayNodeVec)
+{
+ const MachineInstrInfo& mii = S.getInstrInfo();
+ unsigned ndelays =
+ mii.getNumDelaySlots(brNode->getMachineInstr()->getOpCode());
+
+ if (ndelays == 0)
+ return;
+
+ sdelayNodeVec.reserve(ndelays);
+
+ // Use a separate vector to hold the feasible multi-cycle nodes.
+ // These will be used if not enough single-cycle nodes are found.
+ //
+ vector<SchedGraphNode*> mdelayNodeVec;
+
+ for (sg_pred_iterator P = pred_begin(brNode);
+ P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P)
+ if (! (*P)->isDummyNode() &&
+ ! mii.isNop((*P)->getMachineInstr()->getOpCode()) &&
+ NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true))
+ {
+ if (mii.maxLatency((*P)->getMachineInstr()->getOpCode()) > 1)
+ mdelayNodeVec.push_back(*P);
+ else
+ sdelayNodeVec.push_back(*P);
+ }
+
+ // If not enough single-cycle instructions were found, select the
+ // lowest-latency multi-cycle instructions and use them.
+ // Note that this is the most efficient code when only 1 (or even 2)
+ // values need to be selected.
+ //
+ while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0)
+ {
+ unsigned lmin =
+ mii.maxLatency(mdelayNodeVec[0]->getMachineInstr()->getOpCode());
+ unsigned minIndex = 0;
+ for (unsigned i=1; i < mdelayNodeVec.size(); i++)
+ {
+ unsigned li =
+ mii.maxLatency(mdelayNodeVec[i]->getMachineInstr()->getOpCode());
+ if (lmin >= li)
+ {
+ lmin = li;
+ minIndex = i;
+ }
+ }
+ sdelayNodeVec.push_back(mdelayNodeVec[minIndex]);
+ if (sdelayNodeVec.size() < ndelays) // avoid the last erase!
+ mdelayNodeVec.erase(mdelayNodeVec.begin() + minIndex);
+ }
+}
+
+
+// Remove the NOPs currently in delay slots from the graph.
+// Mark instructions specified in sdelayNodeVec to replace them.
+// If not enough useful instructions were found, mark the NOPs to be used
+// for filling delay slots, otherwise, otherwise just discard them.
+//
+void
+ReplaceNopsWithUsefulInstr(SchedulingManager& S,
+ SchedGraphNode* node,
+ vector<SchedGraphNode*> sdelayNodeVec,
+ SchedGraph* graph)
+{
+ vector<SchedGraphNode*> nopNodeVec;
+ const MachineInstrInfo& mii = S.getInstrInfo();
+ unsigned ndelays= mii.getNumDelaySlots(node->getMachineInstr()->getOpCode());
+ assert(ndelays > 0 && "Unnecessary call to replace NOPs");
+
+ // Remove the NOPs currently in delay slots from the graph.
+ // If not enough useful instructions were found, use the NOPs to
+ // fill delay slots, otherwise, just discard them.
+ for (sg_succ_iterator I=succ_begin(node); I != succ_end(node); ++I)
+ if (! (*I)->isDummyNode()
+ && mii.isNop((*I)->getMachineInstr()->getOpCode()))
+ {
+ if (sdelayNodeVec.size() < ndelays)
+ sdelayNodeVec.push_back(*I);
+ else
+ nopNodeVec.push_back(*I);
+ }
+ assert(sdelayNodeVec.size() == ndelays);
+
+ // Mark the nodes chosen for delay slots. This removes them from the graph.
+ for (unsigned i=0; i < sdelayNodeVec.size(); i++)
+ MarkNodeForDelaySlot(S, graph, sdelayNodeVec[i], node, true);
+
+ // And remove the unused NOPs from the graph.
+ for (unsigned i=0; i < nopNodeVec.size(); i++)
+ graph->eraseIncidentEdges(nopNodeVec[i], /*addDummyEdges*/ true);
+}
+
+
+// For all delayed instructions, choose instructions to put in the delay
+// slots and pull those out of the graph. Mark them for the delay slots
+// in the DelaySlotInfo object for that graph node. If no useful work
+// is found for a delay slot, use the NOP that is currently in that slot.
+//
+// We try to fill the delay slots with useful work for all instructions
+// except CALLs. For CALLs, it is nearly always possible to use one of the
+// call sequence instrs and putting anything else in the delay slot could be
+// suboptimal.
+//
+static void
+ChooseInstructionsForDelaySlots(SchedulingManager& S,
+ const BasicBlock* bb,
+ SchedGraph* graph)
+{
+ const MachineInstrInfo& mii = S.getInstrInfo();
+ const TerminatorInst* termInstr = bb->getTerminator();
+ MachineCodeForVMInstr& termMvec = termInstr->getMachineInstrVec();
+ vector<SchedGraphNode*> delayNodeVec;
+ const MachineInstr* brInstr;
+
+ assert(termInstr->getOpcode() != Instruction::Call
+ && "Call used as terminator?");
+
+ // To find instructions that need delay slots without searching the entire
+ // machine code, we assume the only delayed instructions are CALLs or
+ // instructions generated for the terminator inst.
+ // Find the first branch instr in the sequence of machine instrs for term
+ //
+ unsigned first = 0;
+ while (first < termMvec.size() &&
+ ! mii.isBranch(termMvec[first]->getOpCode()))
+ {
+ ++first;
+ }
+ assert(first < termMvec.size() &&
+ "No branch instructions for BR? Ok, but weird! Delete assertion.");
+
+ brInstr = (first < termMvec.size())? termMvec[first] : NULL;
+
+ // Compute a vector of the nodes chosen for delay slots and then
+ // mark delay slots to replace NOPs with these useful instructions.
+ //
+ if (brInstr != NULL)
+ {
+ SchedGraphNode* brNode = graph->getGraphNodeForInstr(brInstr);
+ FindUsefulInstructionsForDelaySlots(S, brNode, delayNodeVec);
+ ReplaceNopsWithUsefulInstr(S, brNode, delayNodeVec, graph);
+ }
+
+ // Also mark delay slots for other delayed instructions to hold NOPs.
+ // Simply passing in an empty delayNodeVec will have this effect.
+ //
+ delayNodeVec.clear();
+ const MachineCodeForBasicBlock& bbMvec = bb->getMachineInstrVec();
+ for (unsigned i=0; i < bbMvec.size(); i++)
+ if (bbMvec[i] != brInstr &&
+ mii.getNumDelaySlots(bbMvec[i]->getOpCode()) > 0)
+ {
+ SchedGraphNode* node = graph->getGraphNodeForInstr(bbMvec[i]);
+ ReplaceNopsWithUsefulInstr(S, node, delayNodeVec, graph);
+ }
+}
+
+
//
// Schedule the delayed branch and its delay slots
//
-void
+unsigned
DelaySlotInfo::scheduleDelayedNode(SchedulingManager& S)
{
assert(delayedNodeSlotNum < S.nslots && "Illegal slot for branch");
S.scheduleInstr(delayNodeVec[i], nextSlot, nextTime);
break;
}
+
+ return 1 + ndelays;
+}
+
+
+// Check if the instruction would conflict with instructions already
+// chosen for the current cycle
+//
+static inline bool
+ConflictsWithChoices(const SchedulingManager& S,
+ MachineOpCode opCode)
+{
+ // Check if the instruction must issue by itself, and some feasible
+ // choices have already been made for this cycle
+ if (S.getNumChoices() > 0 && S.schedInfo.isSingleIssue(opCode))
+ return true;
+
+ // For each class that opCode belongs to, check if there are too many
+ // instructions of that class.
+ //
+ const InstrSchedClass sc = S.schedInfo.getSchedClass(opCode);
+ return (S.getNumChoicesInClass(sc) == S.schedInfo.getMaxIssueForClass(sc));
}
+
+//************************* External Functions *****************************/
+
+
+//---------------------------------------------------------------------------
+// Function: ViolatesMinimumGap
+//
+// Purpose:
+// Check minimum gap requirements relative to instructions scheduled in
+// previous cycles.
+// Note that we do not need to consider `nextEarliestIssueTime' here because
+// that is also captured in the earliest start times for each opcode.
+//---------------------------------------------------------------------------
+
+static inline bool
+ViolatesMinimumGap(const SchedulingManager& S,
+ MachineOpCode opCode,
+ const cycles_t inCycle)
+{
+ return (inCycle < S.getEarliestStartTimeForOp(opCode));
+}
+
+
+//---------------------------------------------------------------------------
+// Function: instrIsFeasible
+//
+// Purpose:
+// Check if any issue restrictions would prevent the instruction from
+// being issued in the current cycle
+//---------------------------------------------------------------------------
+
+bool
+instrIsFeasible(const SchedulingManager& S,
+ MachineOpCode opCode)
+{
+ // skip the instruction if it cannot be issued due to issue restrictions
+ // caused by previously issued instructions
+ if (ViolatesMinimumGap(S, opCode, S.getTime()))
+ return false;
+
+ // skip the instruction if it cannot be issued due to issue restrictions
+ // caused by previously chosen instructions for the current cycle
+ if (ConflictsWithChoices(S, opCode))
+ return false;
+
+ return true;
+}
+
+//---------------------------------------------------------------------------
+// Function: ScheduleInstructionsWithSSA
+//
+// Purpose:
+// Entry point for instruction scheduling on SSA form.
+// Schedules the machine instructions generated by instruction selection.
+// Assumes that register allocation has not been done, i.e., operands
+// are still in SSA form.
+//---------------------------------------------------------------------------
+
+bool
+ScheduleInstructionsWithSSA(Method* method,
+ const TargetMachine &target)
+{
+ SchedGraphSet graphSet(method, target);
+
+ if (SchedDebugLevel >= Sched_PrintSchedGraphs)
+ {
+ cout << endl << "*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING"
+ << endl;
+ graphSet.dump();
+ }
+
+ for (SchedGraphSet::const_iterator GI=graphSet.begin();
+ GI != graphSet.end(); ++GI)
+ {
+ SchedGraph* graph = (*GI).second;
+ const vector<const BasicBlock*>& bbvec = graph->getBasicBlocks();
+ assert(bbvec.size() == 1 && "Cannot schedule multiple basic blocks");
+ const BasicBlock* bb = bbvec[0];
+
+ if (SchedDebugLevel >= Sched_PrintSchedTrace)
+ cout << endl << "*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n";
+
+ SchedPriorities schedPrio(method, graph); // expensive!
+ SchedulingManager S(target, graph, schedPrio);
+
+ ChooseInstructionsForDelaySlots(S, bb, graph); // modifies graph
+
+ ForwardListSchedule(S); // computes schedule in S
+
+ RecordSchedule((*GI).first, S); // records schedule in BB
+ }
+
+ if (SchedDebugLevel >= Sched_PrintMachineCode)
+ {
+ cout << endl
+ << "*** Machine instructions after INSTRUCTION SCHEDULING" << endl;
+ PrintMachineInstructions(method);
+ }
+
+ return false; // no reason to fail yet
+}
+
+
// 7/23/01 - Vikram Adve - Created
//**************************************************************************/
+
+//************************* User Include Files *****************************/
+
#include "llvm/CodeGen/InstrScheduling.h"
#include "SchedPriorities.h"
#include "llvm/Analysis/LiveVar/BBLiveVar.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Instruction.h"
+
+
+//************************ System Include Files *****************************/
+
#include <hash_set>
#include <algorithm>
#include <iterator>
+
+//************************* External Data Types *****************************/
+
cl::Enum<enum SchedDebugLevel_t> SchedDebugLevel("dsched", cl::NoFlags,
"enable instruction scheduling debugging information",
clEnumValN(Sched_NoDebugInfo, "n", "disable debug output"),
clEnumValN(Sched_PrintSchedGraphs, "g", "print scheduling graphs"), 0);
+//************************* Internal Data Types *****************************/
+
class InstrSchedule;
class SchedulingManager;
class DelaySlotInfo;
-static void ForwardListSchedule (SchedulingManager& S);
-
-static void RecordSchedule (const BasicBlock* bb,
- const SchedulingManager& S);
-
-static unsigned ChooseOneGroup (SchedulingManager& S);
-
-static void MarkSuccessorsReady (SchedulingManager& S,
- const SchedGraphNode* node);
-
-static unsigned FindSlotChoices (SchedulingManager& S,
- DelaySlotInfo*& getDelaySlotInfo);
-
-static void AssignInstructionsToSlots(class SchedulingManager& S,
- unsigned maxIssue);
-
-static void ScheduleInstr (class SchedulingManager& S,
- const SchedGraphNode* node,
- unsigned int slotNum,
- cycles_t curTime);
-
-static bool ViolatesMinimumGap (const SchedulingManager& S,
- MachineOpCode opCode,
- const cycles_t inCycle);
-
-static bool ConflictsWithChoices (const SchedulingManager& S,
- MachineOpCode opCode);
-
-static void ChooseInstructionsForDelaySlots(SchedulingManager& S,
- const BasicBlock* bb,
- SchedGraph* graph);
-
-static bool NodeCanFillDelaySlot (const SchedulingManager& S,
- const SchedGraphNode* node,
- const SchedGraphNode* brNode,
- bool nodeIsPredecessor);
-
-static void MarkNodeForDelaySlot (SchedulingManager& S,
- SchedGraph* graph,
- SchedGraphNode* node,
- const SchedGraphNode* brNode,
- bool nodeIsPredecessor);
-
-//************************* Internal Data Types *****************************/
-
//----------------------------------------------------------------------
// class InstrGroup:
delayedNodeSlotNum = slotNum;
}
- void scheduleDelayedNode (SchedulingManager& S);
+ unsigned scheduleDelayedNode (SchedulingManager& S);
};
if (createIfMissing)
{
dinfo = new DelaySlotInfo(bn,
- getInstrInfo().getNumDelaySlots(bn->getMachineInstr()->getOpCode()));
+ getInstrInfo().getNumDelaySlots(bn->getMachineInstr()->getOpCode()));
delaySlotInfoForBranches[bn] = dinfo;
}
else
}
}
-//************************* External Functions *****************************/
-
+//************************* Internal Functions *****************************/
-//---------------------------------------------------------------------------
-// Function: ScheduleInstructionsWithSSA
-//
-// Purpose:
-// Entry point for instruction scheduling on SSA form.
-// Schedules the machine instructions generated by instruction selection.
-// Assumes that register allocation has not been done, i.e., operands
-// are still in SSA form.
-//---------------------------------------------------------------------------
-bool
-ScheduleInstructionsWithSSA(Method* method,
- const TargetMachine &target)
+static void
+AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue)
{
- SchedGraphSet graphSet(method, target);
-
- if (SchedDebugLevel >= Sched_PrintSchedGraphs)
- {
- cout << endl << "*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING"
- << endl;
- graphSet.dump();
- }
-
- for (SchedGraphSet::const_iterator GI=graphSet.begin();
- GI != graphSet.end(); ++GI)
- {
- SchedGraph* graph = (*GI).second;
- const vector<const BasicBlock*>& bbvec = graph->getBasicBlocks();
- assert(bbvec.size() == 1 && "Cannot schedule multiple basic blocks");
- const BasicBlock* bb = bbvec[0];
-
- if (SchedDebugLevel >= Sched_PrintSchedTrace)
- cout << endl << "*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n";
-
- SchedPriorities schedPrio(method, graph); // expensive!
- SchedulingManager S(target, graph, schedPrio);
-
- ChooseInstructionsForDelaySlots(S, bb, graph); // modifies graph
-
- ForwardListSchedule(S); // computes schedule in S
-
- RecordSchedule((*GI).first, S); // records schedule in BB
- }
+ // find the slot to start from, in the current cycle
+ unsigned int startSlot = 0;
+ cycles_t curTime = S.getTime();
- if (SchedDebugLevel >= Sched_PrintMachineCode)
- {
- cout << endl
- << "*** Machine instructions after INSTRUCTION SCHEDULING" << endl;
- PrintMachineInstructions(method);
- }
+ assert(maxIssue > 0 && maxIssue <= S.nslots - startSlot);
- return false; // no reason to fail yet
-}
-
-
-// Check minimum gap requirements relative to instructions scheduled in
-// previous cycles.
-// Note that we do not need to consider `nextEarliestIssueTime' here because
-// that is also captured in the earliest start times for each opcode.
-//
-static inline bool
-ViolatesMinimumGap(const SchedulingManager& S,
- MachineOpCode opCode,
- const cycles_t inCycle)
-{
- return (inCycle < S.getEarliestStartTimeForOp(opCode));
-}
-
-
-// Check if the instruction would conflict with instructions already
-// chosen for the current cycle
-//
-static inline bool
-ConflictsWithChoices(const SchedulingManager& S,
- MachineOpCode opCode)
-{
- // Check if the instruction must issue by itself, and some feasible
- // choices have already been made for this cycle
- if (S.getNumChoices() > 0 && S.schedInfo.isSingleIssue(opCode))
- return true;
+ // If only one instruction can be issued, do so.
+ if (maxIssue == 1)
+ for (unsigned s=startSlot; s < S.nslots; s++)
+ if (S.getChoicesForSlot(s).size() > 0)
+ {// found the one instruction
+ S.scheduleInstr(*S.getChoicesForSlot(s).begin(), s, curTime);
+ return;
+ }
- // For each class that opCode belongs to, check if there are too many
- // instructions of that class.
+ // Otherwise, choose from the choices for each slot
//
- const InstrSchedClass sc = S.schedInfo.getSchedClass(opCode);
- return (S.getNumChoicesInClass(sc) == S.schedInfo.getMaxIssueForClass(sc));
-}
-
-
-// Check if any issue restrictions would prevent the instruction from
-// being issued in the current cycle
-//
-bool
-instrIsFeasible(const SchedulingManager& S,
- MachineOpCode opCode)
-{
- // skip the instruction if it cannot be issued due to issue restrictions
- // caused by previously issued instructions
- if (ViolatesMinimumGap(S, opCode, S.getTime()))
- return false;
-
- // skip the instruction if it cannot be issued due to issue restrictions
- // caused by previously chosen instructions for the current cycle
- if (ConflictsWithChoices(S, opCode))
- return false;
-
- return true;
-}
-
-
-//************************* Internal Functions *****************************/
-
-
-static void
-ForwardListSchedule(SchedulingManager& S)
-{
- unsigned N;
- const SchedGraphNode* node;
-
- S.schedPrio.initialize();
+ InstrGroup* igroup = S.isched.getIGroup(S.getTime());
+ assert(igroup != NULL && "Group creation failed?");
- while ((N = S.schedPrio.getNumReady()) > 0)
+ // Find a slot that has only a single choice, and take it.
+ // If all slots have 0 or multiple choices, pick the first slot with
+ // choices and use its last instruction (just to avoid shifting the vector).
+ unsigned numIssued;
+ for (numIssued = 0; numIssued < maxIssue; numIssued++)
{
- // Choose one group of instructions for a cycle. This will
- // advance S.getTime() to the first cycle instructions can be issued.
- // It may also schedule delay slot instructions in later cycles,
- // but those are ignored here because they are outside the graph.
- //
- unsigned numIssued = ChooseOneGroup(S);
- assert(numIssued > 0 && "Deadlock in list scheduling algorithm?");
-
- // Notify the priority manager of scheduled instructions and mark
- // any successors that may now be ready
- //
- const InstrGroup* igroup = S.isched.getIGroup(S.getTime());
- for (unsigned int s=0; s < S.nslots; s++)
- if ((node = (*igroup)[s]) != NULL)
+ int chosenSlot = -1, chosenNodeIndex = -1;
+ for (unsigned s=startSlot; s < S.nslots; s++)
+ if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1)
{
- S.schedPrio.issuedReadyNodeAt(S.getTime(), node);
- MarkSuccessorsReady(S, node);
+ chosenSlot = (int) s;
+ break;
}
- // Move to the next the next earliest cycle for which
- // an instruction can be issued, or the next earliest in which
- // one will be ready, or to the next cycle, whichever is latest.
- //
- S.updateTime(max(S.getTime() + 1,
- max(S.getEarliestIssueTime(),
- S.schedPrio.getEarliestReadyTime())));
+ if (chosenSlot == -1)
+ for (unsigned s=startSlot; s < S.nslots; s++)
+ if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() > 0)
+ {
+ chosenSlot = (int) s;
+ break;
+ }
+
+ if (chosenSlot != -1)
+ { // Insert the chosen instr in the chosen slot and
+ // erase it from all slots.
+ const SchedGraphNode* node= *S.getChoicesForSlot(chosenSlot).begin();
+ S.scheduleInstr(node, chosenSlot, curTime);
+ }
}
+
+ assert(numIssued > 0 && "Should not happen when maxIssue > 0!");
}
}
-static unsigned
-ChooseOneGroup(SchedulingManager& S)
-{
- assert(S.schedPrio.getNumReady() > 0
- && "Don't get here without ready instructions.");
-
- DelaySlotInfo* getDelaySlotInfo = NULL;
-
- // Choose up to `nslots' feasible instructions and their possible slots.
- unsigned numIssued = FindSlotChoices(S, getDelaySlotInfo);
-
- while (numIssued == 0)
- {
- S.updateTime(S.getTime()+1);
- numIssued = FindSlotChoices(S, getDelaySlotInfo);
- }
-
- AssignInstructionsToSlots(S, numIssued);
-
- if (getDelaySlotInfo != NULL)
- getDelaySlotInfo->scheduleDelayedNode(S);
-
- // Print trace of scheduled instructions before newly ready ones
- if (SchedDebugLevel >= Sched_PrintSchedTrace)
- {
- cout << " Cycle " << S.getTime() << " : Scheduled instructions:\n";
- const InstrGroup* igroup = S.isched.getIGroup(S.getTime());
- for (unsigned int s=0; s < S.nslots; s++)
- {
- cout << " ";
- if ((*igroup)[s] != NULL)
- cout << * ((*igroup)[s])->getMachineInstr() << endl;
- else
- cout << "<none>" << endl;
- }
- }
-
- return numIssued;
-}
-
static void
MarkSuccessorsReady(SchedulingManager& S, const SchedGraphNode* node)
{
// Try to assign every other instruction to a lower numbered
// slot than delayedNodeSlot.
- MachineOpCode opCode = S.getChoice(i)->getMachineInstr()->getOpCode();
+ MachineOpCode opCode =S.getChoice(i)->getMachineInstr()->getOpCode();
bool noSlotFound = true;
unsigned int s;
for (s=startSlot; s < delayedNodeSlot; s++)
for (unsigned i=0;
i < S.getNumChoices() && i < indexForBreakingNode; i++)
{
- MachineOpCode opCode = S.getChoice(i)->getMachineInstr()->getOpCode();
+ MachineOpCode opCode =S.getChoice(i)->getMachineInstr()->getOpCode();
// If a higher priority instruction cannot be assigned to
// any earlier slots, don't schedule the breaking instruction.
}
-static void
-AssignInstructionsToSlots(class SchedulingManager& S, unsigned maxIssue)
+static unsigned
+ChooseOneGroup(SchedulingManager& S)
{
- // find the slot to start from, in the current cycle
- unsigned int startSlot = 0;
- cycles_t curTime = S.getTime();
-
- assert(maxIssue > 0 && maxIssue <= S.nslots - startSlot);
+ assert(S.schedPrio.getNumReady() > 0
+ && "Don't get here without ready instructions.");
- // If only one instruction can be issued, do so.
- if (maxIssue == 1)
- for (unsigned s=startSlot; s < S.nslots; s++)
- if (S.getChoicesForSlot(s).size() > 0)
- {// found the one instruction
- S.scheduleInstr(*S.getChoicesForSlot(s).begin(), s, curTime);
- return;
- }
+ cycles_t firstCycle = S.getTime();
+ DelaySlotInfo* getDelaySlotInfo = NULL;
- // Otherwise, choose from the choices for each slot
- //
- InstrGroup* igroup = S.isched.getIGroup(S.getTime());
- assert(igroup != NULL && "Group creation failed?");
+ // Choose up to `nslots' feasible instructions and their possible slots.
+ unsigned numIssued = FindSlotChoices(S, getDelaySlotInfo);
- // Find a slot that has only a single choice, and take it.
- // If all slots have 0 or multiple choices, pick the first slot with
- // choices and use its last instruction (just to avoid shifting the vector).
- unsigned numIssued;
- for (numIssued = 0; numIssued < maxIssue; numIssued++)
+ while (numIssued == 0)
{
- int chosenSlot = -1, chosenNodeIndex = -1;
- for (unsigned s=startSlot; s < S.nslots; s++)
- if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() == 1)
- {
- chosenSlot = (int) s;
- break;
- }
-
- if (chosenSlot == -1)
- for (unsigned s=startSlot; s < S.nslots; s++)
- if ((*igroup)[s] == NULL && S.getChoicesForSlot(s).size() > 0)
- {
- chosenSlot = (int) s;
- break;
- }
-
- if (chosenSlot != -1)
- { // Insert the chosen instr in the chosen slot and
- // erase it from all slots.
- const SchedGraphNode* node= *S.getChoicesForSlot(chosenSlot).begin();
- S.scheduleInstr(node, chosenSlot, curTime);
- }
+ S.updateTime(S.getTime()+1);
+ numIssued = FindSlotChoices(S, getDelaySlotInfo);
}
- assert(numIssued > 0 && "Should not happen when maxIssue > 0!");
+ AssignInstructionsToSlots(S, numIssued);
+
+ if (getDelaySlotInfo != NULL)
+ numIssued += getDelaySlotInfo->scheduleDelayedNode(S);
+
+ // Print trace of scheduled instructions before newly ready ones
+ if (SchedDebugLevel >= Sched_PrintSchedTrace)
+ {
+ for (cycles_t c = firstCycle; c <= S.getTime(); c++)
+ {
+ cout << " Cycle " << c << " : Scheduled instructions:\n";
+ const InstrGroup* igroup = S.isched.getIGroup(c);
+ for (unsigned int s=0; s < S.nslots; s++)
+ {
+ cout << " ";
+ if ((*igroup)[s] != NULL)
+ cout << * ((*igroup)[s])->getMachineInstr() << endl;
+ else
+ cout << "<none>" << endl;
+ }
+ }
+ }
+
+ return numIssued;
}
+static void
+ForwardListSchedule(SchedulingManager& S)
+{
+ unsigned N;
+ const SchedGraphNode* node;
+
+ S.schedPrio.initialize();
+
+ while ((N = S.schedPrio.getNumReady()) > 0)
+ {
+ cycles_t nextCycle = S.getTime();
+
+ // Choose one group of instructions for a cycle, plus any delay slot
+ // instructions (which may overflow into successive cycles).
+ // This will advance S.getTime() to the last cycle in which
+ // instructions are actually issued.
+ //
+ unsigned numIssued = ChooseOneGroup(S);
+ assert(numIssued > 0 && "Deadlock in list scheduling algorithm?");
+
+ // Notify the priority manager of scheduled instructions and mark
+ // any successors that may now be ready
+ //
+ for (cycles_t c = nextCycle; c <= S.getTime(); c++)
+ {
+ const InstrGroup* igroup = S.isched.getIGroup(c);
+ for (unsigned int s=0; s < S.nslots; s++)
+ if ((node = (*igroup)[s]) != NULL)
+ {
+ S.schedPrio.issuedReadyNodeAt(S.getTime(), node);
+ MarkSuccessorsReady(S, node);
+ }
+ }
+
+ // Move to the next the next earliest cycle for which
+ // an instruction can be issued, or the next earliest in which
+ // one will be ready, or to the next cycle, whichever is latest.
+ //
+ S.updateTime(max(S.getTime() + 1,
+ max(S.getEarliestIssueTime(),
+ S.schedPrio.getEarliestReadyTime())));
+ }
+}
+
//---------------------------------------------------------------------
// Code for filling delay slots for delayed terminator instructions
// when we cannot find single-cycle instructions that can be reordered.
//----------------------------------------------------------------------
-static void
-ChooseInstructionsForDelaySlots(SchedulingManager& S,
- const BasicBlock* bb,
- SchedGraph* graph)
-{
- // Look for instructions that can be used for delay slots.
- // Remove them from the graph, and mark them to be used for delay slots.
- const MachineInstrInfo& mii = S.getInstrInfo();
- const TerminatorInst* term = bb->getTerminator();
- MachineCodeForVMInstr& termMvec = term->getMachineInstrVec();
-
- // Find the first branch instr in the sequence of machine instrs for term
- //
- unsigned first = 0;
- while (! mii.isBranch(termMvec[first]->getOpCode()))
- ++first;
- assert(first < termMvec.size() &&
- "No branch instructions for BR? Ok, but weird! Delete assertion.");
- if (first == termMvec.size())
- return;
-
- SchedGraphNode* brNode = graph->getGraphNodeForInstr(termMvec[first]);
- assert(! mii.isCall(brNode->getMachineInstr()->getOpCode()) && "Call used as terminator?");
-
- unsigned ndelays = mii.getNumDelaySlots(brNode->getMachineInstr()->getOpCode());
- if (ndelays == 0)
- return;
-
- // Use vectors to remember the nodes chosen for delay slots, and the
- // NOPs that will be unused. We cannot remove them from the graph while
- // walking through the preds and succs of the brNode here, so we
- // remember the nodes in the vectors and remove them later.
- // We use separate vectors for the single-cycle and multi-cycle nodes,
- // so that we can give preference to single-cycle nodes.
- //
- vector<SchedGraphNode*> sdelayNodeVec;
- vector<SchedGraphNode*> mdelayNodeVec;
- vector<SchedGraphNode*> nopNodeVec;
- unsigned numUseful = 0;
-
- sdelayNodeVec.reserve(ndelays);
-
- for (sg_pred_iterator P = pred_begin(brNode);
- P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P)
- if (! (*P)->isDummyNode() &&
- ! mii.isNop((*P)->getMachineInstr()->getOpCode()) &&
- NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true))
- {
- ++numUseful;
- if (mii.maxLatency((*P)->getMachineInstr()->getOpCode()) > 1)
- mdelayNodeVec.push_back(*P);
- else
- sdelayNodeVec.push_back(*P);
- }
-
- // If not enough single-cycle instructions were found, select the
- // lowest-latency multi-cycle instructions and use them.
- // Note that this is the most efficient code when only 1 (or even 2)
- // values need to be selected.
- //
- while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0)
- {
- unsigned latency;
- unsigned minLatency = mii.maxLatency(mdelayNodeVec[0]->getMachineInstr()->getOpCode());
- unsigned minIndex = 0;
- for (unsigned i=1; i < mdelayNodeVec.size(); i++)
- if (minLatency >=
- (latency = mii.maxLatency(mdelayNodeVec[i]->getMachineInstr()->getOpCode())))
- {
- minLatency = latency;
- minIndex = i;
- }
- sdelayNodeVec.push_back(mdelayNodeVec[minIndex]);
- if (sdelayNodeVec.size() < ndelays) // avoid the last erase!
- mdelayNodeVec.erase(mdelayNodeVec.begin() + minIndex);
- }
-
- // Now, remove the NOPs currently in delay slots from the graph.
- // If not enough useful instructions were found, use the NOPs to
- // fill delay slots, otherwise, just discard them.
- for (sg_succ_iterator I=succ_begin(brNode); I != succ_end(brNode); ++I)
- if (! (*I)->isDummyNode()
- && mii.isNop((*I)->getMachineInstr()->getOpCode()))
- {
- if (sdelayNodeVec.size() < ndelays)
- sdelayNodeVec.push_back(*I);
- else
- nopNodeVec.push_back(*I);
- }
-
- // Mark the nodes chosen for delay slots. This removes them from the graph.
- for (unsigned i=0; i < sdelayNodeVec.size(); i++)
- MarkNodeForDelaySlot(S, graph, sdelayNodeVec[i], brNode, true);
-
- // And remove the unused NOPs from the graph.
- for (unsigned i=0; i < nopNodeVec.size(); i++)
- graph->eraseIncidentEdges(nopNodeVec[i], /*addDummyEdges*/ true);
-}
-
-
-bool
+static bool
NodeCanFillDelaySlot(const SchedulingManager& S,
const SchedGraphNode* node,
const SchedGraphNode* brNode,
}
-void
+static void
MarkNodeForDelaySlot(SchedulingManager& S,
SchedGraph* graph,
SchedGraphNode* node,
}
+void
+FindUsefulInstructionsForDelaySlots(SchedulingManager& S,
+ SchedGraphNode* brNode,
+ vector<SchedGraphNode*>& sdelayNodeVec)
+{
+ const MachineInstrInfo& mii = S.getInstrInfo();
+ unsigned ndelays =
+ mii.getNumDelaySlots(brNode->getMachineInstr()->getOpCode());
+
+ if (ndelays == 0)
+ return;
+
+ sdelayNodeVec.reserve(ndelays);
+
+ // Use a separate vector to hold the feasible multi-cycle nodes.
+ // These will be used if not enough single-cycle nodes are found.
+ //
+ vector<SchedGraphNode*> mdelayNodeVec;
+
+ for (sg_pred_iterator P = pred_begin(brNode);
+ P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P)
+ if (! (*P)->isDummyNode() &&
+ ! mii.isNop((*P)->getMachineInstr()->getOpCode()) &&
+ NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true))
+ {
+ if (mii.maxLatency((*P)->getMachineInstr()->getOpCode()) > 1)
+ mdelayNodeVec.push_back(*P);
+ else
+ sdelayNodeVec.push_back(*P);
+ }
+
+ // If not enough single-cycle instructions were found, select the
+ // lowest-latency multi-cycle instructions and use them.
+ // Note that this is the most efficient code when only 1 (or even 2)
+ // values need to be selected.
+ //
+ while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0)
+ {
+ unsigned lmin =
+ mii.maxLatency(mdelayNodeVec[0]->getMachineInstr()->getOpCode());
+ unsigned minIndex = 0;
+ for (unsigned i=1; i < mdelayNodeVec.size(); i++)
+ {
+ unsigned li =
+ mii.maxLatency(mdelayNodeVec[i]->getMachineInstr()->getOpCode());
+ if (lmin >= li)
+ {
+ lmin = li;
+ minIndex = i;
+ }
+ }
+ sdelayNodeVec.push_back(mdelayNodeVec[minIndex]);
+ if (sdelayNodeVec.size() < ndelays) // avoid the last erase!
+ mdelayNodeVec.erase(mdelayNodeVec.begin() + minIndex);
+ }
+}
+
+
+// Remove the NOPs currently in delay slots from the graph.
+// Mark instructions specified in sdelayNodeVec to replace them.
+// If not enough useful instructions were found, mark the NOPs to be used
+// for filling delay slots, otherwise, otherwise just discard them.
+//
+void
+ReplaceNopsWithUsefulInstr(SchedulingManager& S,
+ SchedGraphNode* node,
+ vector<SchedGraphNode*> sdelayNodeVec,
+ SchedGraph* graph)
+{
+ vector<SchedGraphNode*> nopNodeVec;
+ const MachineInstrInfo& mii = S.getInstrInfo();
+ unsigned ndelays= mii.getNumDelaySlots(node->getMachineInstr()->getOpCode());
+ assert(ndelays > 0 && "Unnecessary call to replace NOPs");
+
+ // Remove the NOPs currently in delay slots from the graph.
+ // If not enough useful instructions were found, use the NOPs to
+ // fill delay slots, otherwise, just discard them.
+ for (sg_succ_iterator I=succ_begin(node); I != succ_end(node); ++I)
+ if (! (*I)->isDummyNode()
+ && mii.isNop((*I)->getMachineInstr()->getOpCode()))
+ {
+ if (sdelayNodeVec.size() < ndelays)
+ sdelayNodeVec.push_back(*I);
+ else
+ nopNodeVec.push_back(*I);
+ }
+ assert(sdelayNodeVec.size() == ndelays);
+
+ // Mark the nodes chosen for delay slots. This removes them from the graph.
+ for (unsigned i=0; i < sdelayNodeVec.size(); i++)
+ MarkNodeForDelaySlot(S, graph, sdelayNodeVec[i], node, true);
+
+ // And remove the unused NOPs from the graph.
+ for (unsigned i=0; i < nopNodeVec.size(); i++)
+ graph->eraseIncidentEdges(nopNodeVec[i], /*addDummyEdges*/ true);
+}
+
+
+// For all delayed instructions, choose instructions to put in the delay
+// slots and pull those out of the graph. Mark them for the delay slots
+// in the DelaySlotInfo object for that graph node. If no useful work
+// is found for a delay slot, use the NOP that is currently in that slot.
+//
+// We try to fill the delay slots with useful work for all instructions
+// except CALLs. For CALLs, it is nearly always possible to use one of the
+// call sequence instrs and putting anything else in the delay slot could be
+// suboptimal.
+//
+static void
+ChooseInstructionsForDelaySlots(SchedulingManager& S,
+ const BasicBlock* bb,
+ SchedGraph* graph)
+{
+ const MachineInstrInfo& mii = S.getInstrInfo();
+ const TerminatorInst* termInstr = bb->getTerminator();
+ MachineCodeForVMInstr& termMvec = termInstr->getMachineInstrVec();
+ vector<SchedGraphNode*> delayNodeVec;
+ const MachineInstr* brInstr;
+
+ assert(termInstr->getOpcode() != Instruction::Call
+ && "Call used as terminator?");
+
+ // To find instructions that need delay slots without searching the entire
+ // machine code, we assume the only delayed instructions are CALLs or
+ // instructions generated for the terminator inst.
+ // Find the first branch instr in the sequence of machine instrs for term
+ //
+ unsigned first = 0;
+ while (first < termMvec.size() &&
+ ! mii.isBranch(termMvec[first]->getOpCode()))
+ {
+ ++first;
+ }
+ assert(first < termMvec.size() &&
+ "No branch instructions for BR? Ok, but weird! Delete assertion.");
+
+ brInstr = (first < termMvec.size())? termMvec[first] : NULL;
+
+ // Compute a vector of the nodes chosen for delay slots and then
+ // mark delay slots to replace NOPs with these useful instructions.
+ //
+ if (brInstr != NULL)
+ {
+ SchedGraphNode* brNode = graph->getGraphNodeForInstr(brInstr);
+ FindUsefulInstructionsForDelaySlots(S, brNode, delayNodeVec);
+ ReplaceNopsWithUsefulInstr(S, brNode, delayNodeVec, graph);
+ }
+
+ // Also mark delay slots for other delayed instructions to hold NOPs.
+ // Simply passing in an empty delayNodeVec will have this effect.
+ //
+ delayNodeVec.clear();
+ const MachineCodeForBasicBlock& bbMvec = bb->getMachineInstrVec();
+ for (unsigned i=0; i < bbMvec.size(); i++)
+ if (bbMvec[i] != brInstr &&
+ mii.getNumDelaySlots(bbMvec[i]->getOpCode()) > 0)
+ {
+ SchedGraphNode* node = graph->getGraphNodeForInstr(bbMvec[i]);
+ ReplaceNopsWithUsefulInstr(S, node, delayNodeVec, graph);
+ }
+}
+
+
//
// Schedule the delayed branch and its delay slots
//
-void
+unsigned
DelaySlotInfo::scheduleDelayedNode(SchedulingManager& S)
{
assert(delayedNodeSlotNum < S.nslots && "Illegal slot for branch");
S.scheduleInstr(delayNodeVec[i], nextSlot, nextTime);
break;
}
+
+ return 1 + ndelays;
+}
+
+
+// Check if the instruction would conflict with instructions already
+// chosen for the current cycle
+//
+static inline bool
+ConflictsWithChoices(const SchedulingManager& S,
+ MachineOpCode opCode)
+{
+ // Check if the instruction must issue by itself, and some feasible
+ // choices have already been made for this cycle
+ if (S.getNumChoices() > 0 && S.schedInfo.isSingleIssue(opCode))
+ return true;
+
+ // For each class that opCode belongs to, check if there are too many
+ // instructions of that class.
+ //
+ const InstrSchedClass sc = S.schedInfo.getSchedClass(opCode);
+ return (S.getNumChoicesInClass(sc) == S.schedInfo.getMaxIssueForClass(sc));
}
+
+//************************* External Functions *****************************/
+
+
+//---------------------------------------------------------------------------
+// Function: ViolatesMinimumGap
+//
+// Purpose:
+// Check minimum gap requirements relative to instructions scheduled in
+// previous cycles.
+// Note that we do not need to consider `nextEarliestIssueTime' here because
+// that is also captured in the earliest start times for each opcode.
+//---------------------------------------------------------------------------
+
+static inline bool
+ViolatesMinimumGap(const SchedulingManager& S,
+ MachineOpCode opCode,
+ const cycles_t inCycle)
+{
+ return (inCycle < S.getEarliestStartTimeForOp(opCode));
+}
+
+
+//---------------------------------------------------------------------------
+// Function: instrIsFeasible
+//
+// Purpose:
+// Check if any issue restrictions would prevent the instruction from
+// being issued in the current cycle
+//---------------------------------------------------------------------------
+
+bool
+instrIsFeasible(const SchedulingManager& S,
+ MachineOpCode opCode)
+{
+ // skip the instruction if it cannot be issued due to issue restrictions
+ // caused by previously issued instructions
+ if (ViolatesMinimumGap(S, opCode, S.getTime()))
+ return false;
+
+ // skip the instruction if it cannot be issued due to issue restrictions
+ // caused by previously chosen instructions for the current cycle
+ if (ConflictsWithChoices(S, opCode))
+ return false;
+
+ return true;
+}
+
+//---------------------------------------------------------------------------
+// Function: ScheduleInstructionsWithSSA
+//
+// Purpose:
+// Entry point for instruction scheduling on SSA form.
+// Schedules the machine instructions generated by instruction selection.
+// Assumes that register allocation has not been done, i.e., operands
+// are still in SSA form.
+//---------------------------------------------------------------------------
+
+bool
+ScheduleInstructionsWithSSA(Method* method,
+ const TargetMachine &target)
+{
+ SchedGraphSet graphSet(method, target);
+
+ if (SchedDebugLevel >= Sched_PrintSchedGraphs)
+ {
+ cout << endl << "*** SCHEDULING GRAPHS FOR INSTRUCTION SCHEDULING"
+ << endl;
+ graphSet.dump();
+ }
+
+ for (SchedGraphSet::const_iterator GI=graphSet.begin();
+ GI != graphSet.end(); ++GI)
+ {
+ SchedGraph* graph = (*GI).second;
+ const vector<const BasicBlock*>& bbvec = graph->getBasicBlocks();
+ assert(bbvec.size() == 1 && "Cannot schedule multiple basic blocks");
+ const BasicBlock* bb = bbvec[0];
+
+ if (SchedDebugLevel >= Sched_PrintSchedTrace)
+ cout << endl << "*** TRACE OF INSTRUCTION SCHEDULING OPERATIONS\n\n";
+
+ SchedPriorities schedPrio(method, graph); // expensive!
+ SchedulingManager S(target, graph, schedPrio);
+
+ ChooseInstructionsForDelaySlots(S, bb, graph); // modifies graph
+
+ ForwardListSchedule(S); // computes schedule in S
+
+ RecordSchedule((*GI).first, S); // records schedule in BB
+ }
+
+ if (SchedDebugLevel >= Sched_PrintMachineCode)
+ {
+ cout << endl
+ << "*** Machine instructions after INSTRUCTION SCHEDULING" << endl;
+ PrintMachineInstructions(method);
+ }
+
+ return false; // no reason to fail yet
+}
+
+