//===- InstrScheduling.cpp - Generic Instruction Scheduling support -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
//
// This file implements the llvm/CodeGen/InstrScheduling.h interface, along with
// generic support routines for instruction scheduling.
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineCodeForInstruction.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/FunctionLiveVarInfo.h"
+#include "../../Target/SparcV9/LiveVar/FunctionLiveVarInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/BasicBlock.h"
#include "Support/CommandLine.h"
#include <algorithm>
+#include <iostream>
+
+namespace llvm {
SchedDebugLevel_t SchedDebugLevel;
+static cl::opt<bool> EnableFillingDelaySlots("sched-fill-delay-slots",
+ cl::desc("Fill branch delay slots during local scheduling"));
+
static cl::opt<SchedDebugLevel_t, true>
SDL_opt("dsched", cl::Hidden, cl::location(SchedDebugLevel),
cl::desc("enable instruction scheduling debugging information"),
clEnumValN(Sched_PrintMachineCode, "y", "print machine code after scheduling"),
clEnumValN(Sched_PrintSchedTrace, "t", "print trace of scheduling actions"),
clEnumValN(Sched_PrintSchedGraphs, "g", "print scheduling graphs"),
- 0));
+ clEnumValEnd));
//************************* Internal Data Types *****************************/
inline bool operator!=(const _Self& x) const { return !operator==(x); }
- inline _NodeType* operator*() const {
- assert(cycleNum < S.groups.size());
- return (*S.groups[cycleNum])[slotNum];
- }
+ inline _NodeType* operator*() const;
inline _NodeType* operator->() const { return operator*(); }
_Self& operator++(); // Preincrement
typedef ScheduleIterator<SchedGraphNode> iterator;
typedef ScheduleIterator<const SchedGraphNode> const_iterator;
- iterator begin();
- const_iterator begin() const;
- iterator end();
- const_iterator end() const;
+ iterator begin() { return iterator::begin(*this); }
+ const_iterator begin() const { return const_iterator::begin(*this); }
+ iterator end() { return iterator::end(*this); }
+ const_iterator end() const { return const_iterator::end(*this); }
public: // constructors and destructor
/*ctor*/ InstrSchedule (unsigned int _nslots,
unsigned int slotNum,
cycles_t cycle) {
InstrGroup* igroup = this->getIGroup(cycle);
- assert((*igroup)[slotNum] == NULL && "Slot already filled?");
+ if (!((*igroup)[slotNum] == NULL)) {
+ std::cerr << "Slot already filled?\n";
+ abort();
+ }
igroup->addInstr(node, slotNum);
assert(node->getNodeId() < startTime.size());
startTime[node->getNodeId()] = cycle;
}
private:
- friend class iterator;
- friend class const_iterator;
+ friend class ScheduleIterator<SchedGraphNode>;
+ friend class ScheduleIterator<const SchedGraphNode>;
/*ctor*/ InstrSchedule (); // Disable: DO NOT IMPLEMENT.
};
+template<class NodeType>
+inline NodeType *ScheduleIterator<NodeType>::operator*() const {
+ assert(cycleNum < S.groups.size());
+ return (*S.groups[cycleNum])[slotNum];
+}
+
/*ctor*/
InstrSchedule::InstrSchedule(unsigned int _nslots, unsigned int _numNodes)
return _Self(_schedule, _schedule.groups.size(), 0);
}
-InstrSchedule::iterator
-InstrSchedule::begin()
-{
- return iterator::begin(*this);
-}
-
-InstrSchedule::const_iterator
-InstrSchedule::begin() const
-{
- return const_iterator::begin(*this);
-}
-
-InstrSchedule::iterator
-InstrSchedule::end()
-{
- return iterator::end(*this);
-}
-
-InstrSchedule::const_iterator
-InstrSchedule::end() const
-{
- return const_iterator::end( *this);
-}
-
//----------------------------------------------------------------------
// class DelaySlotInfo:
// Append the instruction to the vector of choices for current cycle.
// Increment numInClass[c] for the sched class to which the instr belongs.
choiceVec.push_back(node);
- const InstrSchedClass& sc = schedInfo.getSchedClass(node->getOpCode());
+ const InstrSchedClass& sc = schedInfo.getSchedClass(node->getOpcode());
assert(sc < numInClass.size());
numInClass[sc]++;
}
choicesForSlot[s].erase(node);
// and decrement the instr count for the sched class to which it belongs
- const InstrSchedClass& sc = schedInfo.getSchedClass(node->getOpCode());
+ const InstrSchedClass& sc = schedInfo.getSchedClass(node->getOpcode());
assert(sc < numInClass.size());
numInClass[sc]--;
}
if (!createIfMissing) return 0;
DelaySlotInfo *dinfo =
- new DelaySlotInfo(bn, getInstrInfo().getNumDelaySlots(bn->getOpCode()));
+ new DelaySlotInfo(bn, getInstrInfo().getNumDelaySlots(bn->getOpcode()));
return delaySlotInfoForBranches[bn] = dinfo;
}
SchedulingManager::SchedulingManager(const TargetMachine& target,
const SchedGraph* graph,
SchedPriorities& _schedPrio)
- : nslots(target.getSchedInfo().getMaxNumIssueTotal()),
- schedInfo(target.getSchedInfo()),
+ : nslots(target.getSchedInfo()->getMaxNumIssueTotal()),
+ schedInfo(*target.getSchedInfo()),
schedPrio(_schedPrio),
isched(nslots, graph->getNumNodes()),
totalInstrCount(graph->getNumNodes() - 2),
nextEarliestIssueTime(0),
choicesForSlot(nslots),
- numInClass(target.getSchedInfo().getNumSchedClasses(), 0), // set all to 0
- nextEarliestStartTime(target.getInstrInfo().getNumRealOpCodes(),
+ numInClass(target.getSchedInfo()->getNumSchedClasses(), 0), // set all to 0
+ nextEarliestStartTime(target.getInstrInfo()->getNumOpcodes(),
(cycles_t) 0) // set all to 0
{
updateTime(0);
SchedulingManager::updateEarliestStartTimes(const SchedGraphNode* node,
cycles_t schedTime)
{
- if (schedInfo.numBubblesAfter(node->getOpCode()) > 0)
+ if (schedInfo.numBubblesAfter(node->getOpcode()) > 0)
{ // Update next earliest time before which *nothing* can issue.
nextEarliestIssueTime = std::max(nextEarliestIssueTime,
- curTime + 1 + schedInfo.numBubblesAfter(node->getOpCode()));
+ curTime + 1 + schedInfo.numBubblesAfter(node->getOpcode()));
}
const std::vector<MachineOpCode>&
- conflictVec = schedInfo.getConflictList(node->getOpCode());
+ conflictVec = schedInfo.getConflictList(node->getOpcode());
for (unsigned i=0; i < conflictVec.size(); i++)
{
MachineOpCode toOp = conflictVec[i];
- cycles_t est=schedTime + schedInfo.getMinIssueGap(node->getOpCode(),toOp);
+ cycles_t est=schedTime + schedInfo.getMinIssueGap(node->getOpcode(),toOp);
assert(toOp < (int) nextEarliestStartTime.size());
if (nextEarliestStartTime[toOp] < est)
nextEarliestStartTime[toOp] = est;
{
const TargetInstrInfo& mii = S.schedInfo.getInstrInfo();
-#ifndef NDEBUG
// Lets make sure we didn't lose any instructions, except possibly
// some NOPs from delay slots. Also, PHIs are not included in the schedule.
unsigned numInstr = 0;
for (MachineBasicBlock::iterator I=MBB.begin(); I != MBB.end(); ++I)
- if (! mii.isNop((*I)->getOpCode()) &&
- ! mii.isDummyPhiInstr((*I)->getOpCode()))
+ if (! mii.isNop(I->getOpcode()) &&
+ ! mii.isDummyPhiInstr(I->getOpcode()))
++numInstr;
assert(S.isched.getNumInstructions() >= numInstr &&
"Lost some non-NOP instructions during scheduling!");
-#endif
if (S.isched.getNumInstructions() == 0)
return; // empty basic block!
// First find the dummy instructions at the start of the basic block
MachineBasicBlock::iterator I = MBB.begin();
for ( ; I != MBB.end(); ++I)
- if (! mii.isDummyPhiInstr((*I)->getOpCode()))
+ if (! mii.isDummyPhiInstr(I->getOpcode()))
break;
- // Erase all except the dummy PHI instructions from MBB, and
+ // Remove all except the dummy PHI instructions from MBB, and
// pre-allocate create space for the ones we will put back in.
- MBB.erase(I, MBB.end());
+ while (I != MBB.end())
+ MBB.remove(I++);
InstrSchedule::const_iterator NIend = S.isched.end();
for (InstrSchedule::const_iterator NI = S.isched.begin(); NI != NIend; ++NI)
if (nextNode == NULL)
break; // no more instructions for this cycle
- if (S.getInstrInfo().getNumDelaySlots(nextNode->getOpCode()) > 0) {
+ if (S.getInstrInfo().getNumDelaySlots(nextNode->getOpcode()) > 0) {
delaySlotInfo = S.getDelaySlotInfoForInstr(nextNode);
if (delaySlotInfo != NULL) {
if (indexForBreakingNode < S.nslots)
else
indexForDelayedInstr = S.getNumChoices();
}
- } else if (S.schedInfo.breaksIssueGroup(nextNode->getOpCode())) {
+ } else if (S.schedInfo.breaksIssueGroup(nextNode->getOpcode())) {
if (indexForBreakingNode < S.nslots)
// have a breaking instruction already so throw this one away
nextNode = NULL;
if (nextNode != NULL) {
S.addChoice(nextNode);
- if (S.schedInfo.isSingleIssue(nextNode->getOpCode())) {
+ if (S.schedInfo.isSingleIssue(nextNode->getOpcode())) {
assert(S.getNumChoices() == 1 &&
"Prioritizer returned invalid instr for this cycle!");
break;
// This is the common case, so handle it separately for efficiency.
if (S.getNumChoices() == 1) {
- MachineOpCode opCode = S.getChoice(0)->getOpCode();
+ MachineOpCode opCode = S.getChoice(0)->getOpcode();
unsigned int s;
for (s=startSlot; s < S.nslots; s++)
if (S.schedInfo.instrCanUseSlot(opCode, s))
S.addChoiceToSlot(s, S.getChoice(0));
} else {
for (unsigned i=0; i < S.getNumChoices(); i++) {
- MachineOpCode opCode = S.getChoice(i)->getOpCode();
+ MachineOpCode opCode = S.getChoice(i)->getOpcode();
for (unsigned int s=startSlot; s < S.nslots; s++)
if (S.schedInfo.instrCanUseSlot(opCode, s))
S.addChoiceToSlot(s, S.getChoice(i));
assert(delaySlotInfo != NULL && "No delay slot info for instr?");
const SchedGraphNode* delayedNode = S.getChoice(indexForDelayedInstr);
- MachineOpCode delayOpCode = delayedNode->getOpCode();
+ MachineOpCode delayOpCode = delayedNode->getOpcode();
unsigned ndelays= S.getInstrInfo().getNumDelaySlots(delayOpCode);
unsigned delayedNodeSlot = S.nslots;
for (unsigned i=0; i < S.getNumChoices() - 1; i++) {
// Try to assign every other instruction to a lower numbered
// slot than delayedNodeSlot.
- MachineOpCode opCode =S.getChoice(i)->getOpCode();
+ MachineOpCode opCode =S.getChoice(i)->getOpcode();
bool noSlotFound = true;
unsigned int s;
for (s=startSlot; s < delayedNodeSlot; s++)
// Find the last possible slot for this instruction.
for (int s = S.nslots-1; s >= (int) startSlot; s--)
- if (S.schedInfo.instrCanUseSlot(breakingNode->getOpCode(), s)) {
+ if (S.schedInfo.instrCanUseSlot(breakingNode->getOpcode(), s)) {
breakingSlot = s;
break;
}
for (unsigned i=0;
i < S.getNumChoices() && i < indexForBreakingNode; i++)
{
- MachineOpCode opCode =S.getChoice(i)->getOpCode();
+ MachineOpCode opCode =S.getChoice(i)->getOpcode();
// If a higher priority instruction cannot be assigned to
// any earlier slots, don't schedule the breaking instruction.
// group, only assign them to slots lower than the breaking slot.
// Otherwise, just ignore the instruction.
for (unsigned i=indexForBreakingNode+1; i < S.getNumChoices(); i++) {
- MachineOpCode opCode = S.getChoice(i)->getOpCode();
+ MachineOpCode opCode = S.getChoice(i)->getOpcode();
for (unsigned int s=startSlot; s < nslotsToUse; s++)
if (S.schedInfo.instrCanUseSlot(opCode, s))
S.addChoiceToSlot(s, S.getChoice(i));
assert(! node->isDummyNode());
// don't put a branch in the delay slot of another branch
- if (S.getInstrInfo().isBranch(node->getOpCode()))
+ if (S.getInstrInfo().isBranch(node->getOpcode()))
return false;
// don't put a single-issue instruction in the delay slot of a branch
- if (S.schedInfo.isSingleIssue(node->getOpCode()))
+ if (S.schedInfo.isSingleIssue(node->getOpcode()))
return false;
// don't put a load-use dependence in the delay slot of a branch
for (SchedGraphNode::const_iterator EI = node->beginInEdges();
EI != node->endInEdges(); ++EI)
- if (! (*EI)->getSrc()->isDummyNode()
- && mii.isLoad((*EI)->getSrc()->getOpCode())
+ if (! ((SchedGraphNode*)(*EI)->getSrc())->isDummyNode()
+ && mii.isLoad(((SchedGraphNode*)(*EI)->getSrc())->getOpcode())
&& (*EI)->getDepType() == SchedGraphEdge::CtrlDep)
return false;
- // for now, don't put an instruction that does not have operand
- // interlocks in the delay slot of a branch
- if (! S.getInstrInfo().hasOperandInterlock(node->getOpCode()))
- return false;
-
- // Finally, if the instruction preceeds the branch, we make sure the
+ // Finally, if the instruction precedes the branch, we make sure the
// instruction can be reordered relative to the branch. We simply check
// if the instr. has only 1 outgoing edge, viz., a CD edge to the branch.
//
bool onlyCDEdgeToBranch = true;
for (SchedGraphNode::const_iterator OEI = node->beginOutEdges();
OEI != node->endOutEdges(); ++OEI)
- if (! (*OEI)->getSink()->isDummyNode()
+ if (! ((SchedGraphNode*)(*OEI)->getSink())->isDummyNode()
&& ((*OEI)->getSink() != brNode
|| (*OEI)->getDepType() != SchedGraphEdge::CtrlDep))
{
bool nodeIsPredecessor)
{
if (nodeIsPredecessor) {
- // If node is in the same basic block (i.e., preceeds brNode),
+ // If node is in the same basic block (i.e., precedes brNode),
// remove it and all its incident edges from the graph. Make sure we
// add dummy edges for pred/succ nodes that become entry/exit nodes.
graph->eraseIncidentEdges(node, /*addDummyEdges*/ true);
{
const TargetInstrInfo& mii = S.getInstrInfo();
unsigned ndelays =
- mii.getNumDelaySlots(brNode->getOpCode());
+ mii.getNumDelaySlots(brNode->getOpcode());
if (ndelays == 0)
return;
for (sg_pred_iterator P = pred_begin(brNode);
P != pred_end(brNode) && sdelayNodeVec.size() < ndelays; ++P)
if (! (*P)->isDummyNode() &&
- ! mii.isNop((*P)->getOpCode()) &&
+ ! mii.isNop((*P)->getOpcode()) &&
NodeCanFillDelaySlot(S, *P, brNode, /*pred*/ true))
{
- if (mii.maxLatency((*P)->getOpCode()) > 1)
+ if (mii.maxLatency((*P)->getOpcode()) > 1)
mdelayNodeVec.push_back(*P);
else
sdelayNodeVec.push_back(*P);
//
while (sdelayNodeVec.size() < ndelays && mdelayNodeVec.size() > 0) {
unsigned lmin =
- mii.maxLatency(mdelayNodeVec[0]->getOpCode());
+ mii.maxLatency(mdelayNodeVec[0]->getOpcode());
unsigned minIndex = 0;
for (unsigned i=1; i < mdelayNodeVec.size(); i++)
{
unsigned li =
- mii.maxLatency(mdelayNodeVec[i]->getOpCode());
+ mii.maxLatency(mdelayNodeVec[i]->getOpcode());
if (lmin >= li)
{
lmin = li;
std::vector<SchedGraphNode*> nopNodeVec; // this will hold unused NOPs
const TargetInstrInfo& mii = S.getInstrInfo();
const MachineInstr* brInstr = node->getMachineInstr();
- unsigned ndelays= mii.getNumDelaySlots(brInstr->getOpCode());
+ unsigned ndelays= mii.getNumDelaySlots(brInstr->getOpcode());
assert(ndelays > 0 && "Unnecessary call to replace NOPs");
// Remove the NOPs currently in delay slots from the graph.
//
unsigned int firstDelaySlotIdx = node->getOrigIndexInBB() + 1;
MachineBasicBlock& MBB = node->getMachineBasicBlock();
- assert(MBB[firstDelaySlotIdx - 1] == brInstr &&
- "Incorrect instr. index in basic block for brInstr");
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ std::advance(MBBI, firstDelaySlotIdx - 1);
+ if (!(&*MBBI++ == brInstr)) {
+ std::cerr << "Incorrect instr. index in basic block for brInstr";
+ abort();
+ }
// First find all useful instructions already in the delay slots
// and USE THEM. We'll throw away the unused alternatives below
//
- for (unsigned i=firstDelaySlotIdx; i < firstDelaySlotIdx + ndelays; ++i)
- if (! mii.isNop(MBB[i]->getOpCode()))
+ MachineBasicBlock::iterator Tmp = MBBI;
+ for (unsigned i = 0; i != ndelays; ++i, ++MBBI)
+ if (!mii.isNop(MBBI->getOpcode()))
sdelayNodeVec.insert(sdelayNodeVec.begin(),
- graph->getGraphNodeForInstr(MBB[i]));
-
+ graph->getGraphNodeForInstr(MBBI));
+ MBBI = Tmp;
+
// Then find the NOPs and keep only as many as are needed.
// Put the rest in nopNodeVec to be deleted.
- for (unsigned i=firstDelaySlotIdx; i < firstDelaySlotIdx + ndelays; ++i)
- if (mii.isNop(MBB[i]->getOpCode()))
+ for (unsigned i=firstDelaySlotIdx; i < firstDelaySlotIdx+ndelays; ++i, ++MBBI)
+ if (mii.isNop(MBBI->getOpcode()))
if (sdelayNodeVec.size() < ndelays)
- sdelayNodeVec.push_back(graph->getGraphNodeForInstr(MBB[i]));
+ sdelayNodeVec.push_back(graph->getGraphNodeForInstr(MBBI));
else {
- nopNodeVec.push_back(graph->getGraphNodeForInstr(MBB[i]));
+ nopNodeVec.push_back(graph->getGraphNodeForInstr(MBBI));
//remove the MI from the Machine Code For Instruction
const TerminatorInst *TI = MBB.getBasicBlock()->getTerminator();
for(MachineCodeForInstruction::iterator mciI=llvmMvec.begin(),
mciE=llvmMvec.end(); mciI!=mciE; ++mciI){
- if (*mciI==MBB[i])
+ if (*mciI == MBBI)
llvmMvec.erase(mciI);
}
}
std::vector<SchedGraphNode*> delayNodeVec;
const MachineInstr* brInstr = NULL;
- if (termInstr->getOpcode() != Instruction::Ret)
+ if (EnableFillingDelaySlots &&
+ termInstr->getOpcode() != Instruction::Ret)
{
// To find instructions that need delay slots without searching the full
// machine code, we assume that the only delayed instructions are CALLs
//
unsigned first = 0;
while (first < termMvec.size() &&
- ! mii.isBranch(termMvec[first]->getOpCode()))
+ ! mii.isBranch(termMvec[first]->getOpcode()))
{
++first;
}
// Also mark delay slots for other delayed instructions to hold NOPs.
// Simply passing in an empty delayNodeVec will have this effect.
+ // If brInstr is not handled above (EnableFillingDelaySlots == false),
+ // brInstr will be NULL so this will handle the branch instrs. as well.
//
delayNodeVec.clear();
- for (unsigned i=0; i < MBB.size(); ++i)
- if (MBB[i] != brInstr &&
- mii.getNumDelaySlots(MBB[i]->getOpCode()) > 0)
- {
- SchedGraphNode* node = graph->getGraphNodeForInstr(MBB[i]);
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I)
+ if (I != brInstr && mii.getNumDelaySlots(I->getOpcode()) > 0) {
+ SchedGraphNode* node = graph->getGraphNodeForInstr(I);
ReplaceNopsWithUsefulInstr(S, node, delayNodeVec, graph);
}
}
for (unsigned i=0; i < delayNodeVec.size(); i++) {
const SchedGraphNode* dnode = delayNodeVec[i];
if ( ! S.isScheduled(dnode)
- && S.schedInfo.instrCanUseSlot(dnode->getOpCode(), nextSlot)
- && instrIsFeasible(S, dnode->getOpCode()))
- {
- assert(S.getInstrInfo().hasOperandInterlock(dnode->getOpCode())
- && "Instructions without interlocks not yet supported "
- "when filling branch delay slots");
+ && S.schedInfo.instrCanUseSlot(dnode->getOpcode(), nextSlot)
+ && instrIsFeasible(S, dnode->getOpcode())) {
S.scheduleInstr(dnode, nextSlot, nextTime);
break;
}
FunctionPass *createInstructionSchedulingWithSSAPass(const TargetMachine &tgt) {
return new InstructionSchedulingWithSSA(tgt);
}
+
+} // End llvm namespace
+