//
// The LLVM Compiler Infrastructure
//
-// This file was developed by Evan Cheng and is distributed under the
-// University of Illinois Open Source License. See LICENSE.TXT for details.
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
continue;
SDNode *OpN = SU->Node->getOperand(j).Val;
- SUnit *OpSU = SUnitMap[OpN][SU->InstanceNo];
+ SUnit *OpSU = isPassiveNode(OpN) ? NULL : SUnitMap[OpN][SU->InstanceNo];
if (OpSU && OperandSeen.count(OpSU) == 1) {
// Ok, so SU is not the last use of OpSU, but SU is two-address so
// it will clobber OpSU. Try to commute SU if no other source operands
for (unsigned k = 0; k < NumOps; ++k) {
if (k != j) {
OpN = SU->Node->getOperand(k).Val;
- OpSU = SUnitMap[OpN][SU->InstanceNo];
+ OpSU = isPassiveNode(OpN) ? NULL : SUnitMap[OpN][SU->InstanceNo];
if (OpSU && OperandSeen.count(OpSU) == 1) {
DoCommute = false;
break;
return NULL;
SUnit *NewSU;
- for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
- if (N->getValueType(i) == MVT::Flag)
- return NULL;
bool TryUnfold = false;
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ MVT::ValueType VT = N->getValueType(i);
+ if (VT == MVT::Flag)
+ return NULL;
+ else if (VT == MVT::Other)
+ TryUnfold = true;
+ }
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
const SDOperand &Op = N->getOperand(i);
MVT::ValueType VT = Op.Val->getValueType(Op.ResNo);
if (VT == MVT::Flag)
return NULL;
- else if (VT == MVT::Other)
- TryUnfold = true;
}
if (TryUnfold) {
N = NewNodes[1];
SDNode *LoadNode = NewNodes[0];
- std::vector<SDNode*> Deleted;
unsigned NumVals = N->getNumValues();
unsigned OldNumVals = SU->Node->getNumValues();
for (unsigned i = 0; i != NumVals; ++i)
- DAG.ReplaceAllUsesOfValueWith(SDOperand(SU->Node, i),
- SDOperand(N, i), Deleted);
+ DAG.ReplaceAllUsesOfValueWith(SDOperand(SU->Node, i), SDOperand(N, i));
DAG.ReplaceAllUsesOfValueWith(SDOperand(SU->Node, OldNumVals-1),
- SDOperand(LoadNode, 1), Deleted);
+ SDOperand(LoadNode, 1));
- SUnit *LoadSU = NewSUnit(LoadNode);
SUnit *NewSU = NewSUnit(N);
- SUnitMap[LoadNode].push_back(LoadSU);
SUnitMap[N].push_back(NewSU);
- const TargetInstrDescriptor *TID = &TII->get(LoadNode->getTargetOpcode());
- for (unsigned i = 0; i != TID->numOperands; ++i) {
- if (TID->getOperandConstraint(i, TOI::TIED_TO) != -1) {
- LoadSU->isTwoAddress = true;
- break;
- }
- }
- if (TID->Flags & M_COMMUTABLE)
- LoadSU->isCommutable = true;
-
- TID = &TII->get(N->getTargetOpcode());
+ const TargetInstrDescriptor *TID = &TII->get(N->getTargetOpcode());
for (unsigned i = 0; i != TID->numOperands; ++i) {
if (TID->getOperandConstraint(i, TOI::TIED_TO) != -1) {
NewSU->isTwoAddress = true;
}
if (TID->Flags & M_COMMUTABLE)
NewSU->isCommutable = true;
-
// FIXME: Calculate height / depth and propagate the changes?
- LoadSU->Depth = NewSU->Depth = SU->Depth;
- LoadSU->Height = NewSU->Height = SU->Height;
- ComputeLatency(LoadSU);
+ NewSU->Depth = SU->Depth;
+ NewSU->Height = SU->Height;
ComputeLatency(NewSU);
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ DenseMap<SDNode*, std::vector<SUnit*> >::iterator SMI =
+ SUnitMap.find(LoadNode);
+ if (SMI != SUnitMap.end()) {
+ LoadSU = SMI->second.front();
+ isNewLoad = false;
+ } else {
+ LoadSU = NewSUnit(LoadNode);
+ SUnitMap[LoadNode].push_back(LoadSU);
+
+ LoadSU->Depth = SU->Depth;
+ LoadSU->Height = SU->Height;
+ ComputeLatency(LoadSU);
+ }
+
SUnit *ChainPred = NULL;
SmallVector<SDep, 4> ChainSuccs;
SmallVector<SDep, 4> LoadPreds;
}
SU->removePred(ChainPred, true, false);
- LoadSU->addPred(ChainPred, true, false);
+ if (isNewLoad)
+ LoadSU->addPred(ChainPred, true, false);
for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
SDep *Pred = &LoadPreds[i];
SU->removePred(Pred->Dep, Pred->isCtrl, Pred->isSpecial);
- LoadSU->addPred(Pred->Dep, Pred->isCtrl, Pred->isSpecial,
- Pred->Reg, Pred->Cost);
+ if (isNewLoad)
+ LoadSU->addPred(Pred->Dep, Pred->isCtrl, Pred->isSpecial,
+ Pred->Reg, Pred->Cost);
}
for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
SDep *Pred = &NodePreds[i];
for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
SDep *Succ = &ChainSuccs[i];
Succ->Dep->removePred(SU, Succ->isCtrl, Succ->isSpecial);
- Succ->Dep->addPred(LoadSU, Succ->isCtrl, Succ->isSpecial,
- Succ->Reg, Succ->Cost);
+ if (isNewLoad)
+ Succ->Dep->addPred(LoadSU, Succ->isCtrl, Succ->isSpecial,
+ Succ->Reg, Succ->Cost);
}
- NewSU->addPred(LoadSU, false, false);
+ if (isNewLoad)
+ NewSU->addPred(LoadSU, false, false);
- AvailableQueue->addNode(LoadSU);
+ if (isNewLoad)
+ AvailableQueue->addNode(LoadSU);
AvailableQueue->addNode(NewSU);
++NumUnfolds;
if (NewSU->NumSuccsLeft == 0) {
NewSU->isAvailable = true;
return NewSU;
- } else
- SU = NewSU;
+ }
+ SU = NewSU;
}
DOUT << "Duplicating SU # " << SU->NodeNum << "\n";
const TargetRegisterClass *DestRC,
const TargetRegisterClass *SrcRC,
SmallVector<SUnit*, 2> &Copies) {
- abort();
SUnit *CopyFromSU = NewSUnit(NULL);
CopyFromSU->CopySrcRC = SrcRC;
CopyFromSU->CopyDstRC = DestRC;
std::vector<unsigned> SethiUllmanNumbers;
const TargetInstrInfo *TII;
+ const MRegisterInfo *MRI;
public:
- explicit BURegReductionPriorityQueue(const TargetInstrInfo *tii)
- : TII(tii) {}
+ explicit BURegReductionPriorityQueue(const TargetInstrInfo *tii,
+ const MRegisterInfo *mri)
+ : TII(tii), MRI(mri) {}
void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap,
std::vector<SUnit> &sunits) {
// CopyToReg should be close to its uses to facilitate coalescing and
// avoid spilling.
return 0;
+ else if (Opc == TargetInstrInfo::EXTRACT_SUBREG ||
+ Opc == TargetInstrInfo::INSERT_SUBREG)
+ // EXTRACT_SUBREG / INSERT_SUBREG should be close to its use to
+ // facilitate coalescing.
+ return 0;
else if (SU->NumSuccs == 0)
// If SU does not have a use, i.e. it doesn't produce a value that would
// be consumed (e.g. store), then it terminates a chain of computation.
return MaxCycle;
}
+/// calcMaxScratches - Returns an cost estimate of the worse case requirement
+/// for scratch registers. Live-in operands and live-out results don't count
+/// since they are "fixed".
+static unsigned calcMaxScratches(const SUnit *SU) {
+ unsigned Scratches = 0;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl) continue; // ignore chain preds
+ if (I->Dep->Node->getOpcode() != ISD::CopyFromReg)
+ Scratches++;
+ }
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl) continue; // ignore chain succs
+ if (I->Dep->Node->getOpcode() != ISD::CopyToReg)
+ Scratches += 10;
+ }
+ return Scratches;
+}
+
// Bottom up
bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
// There used to be a special tie breaker here that looked for
if (LDist < RDist)
return true;
else if (LDist == RDist) {
- if (left->Height > right->Height)
+ // Intuitively, it's good to push down instructions whose results are
+ // liveout so their long live ranges won't conflict with other values
+ // which are needed inside the BB. Further prioritize liveout instructions
+ // by the number of operands which are calculated within the BB.
+ unsigned LScratch = calcMaxScratches(left);
+ unsigned RScratch = calcMaxScratches(right);
+ if (LScratch > RScratch)
return true;
- else if (left->Height == right->Height)
- if (left->Depth < right->Depth)
+ else if (LScratch == RScratch)
+ if (left->Height > right->Height)
return true;
- else if (left->Depth == right->Depth)
- if (left->CycleBound > right->CycleBound)
+ else if (left->Height == right->Height)
+ if (left->Depth < right->Depth)
return true;
+ else if (left->Depth == right->Depth)
+ if (left->CycleBound > right->CycleBound)
+ return true;
}
}
return false;
for (unsigned i = 0; i != NumOps; ++i) {
if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) != -1) {
SDNode *DU = SU->Node->getOperand(i).Val;
- if (Op == (*SUnitMap)[DU][SU->InstanceNo])
+ if ((*SUnitMap).find(DU) != (*SUnitMap).end() &&
+ Op == (*SUnitMap)[DU][SU->InstanceNo])
return true;
}
}
return false;
}
+/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
+/// physical register def.
+static bool canClobberPhysRegDefs(SUnit *SuccSU, SUnit *SU,
+ const TargetInstrInfo *TII,
+ const MRegisterInfo *MRI) {
+ SDNode *N = SuccSU->Node;
+ unsigned NumDefs = TII->getNumDefs(N->getTargetOpcode());
+ const unsigned *ImpDefs = TII->getImplicitDefs(N->getTargetOpcode());
+ if (!ImpDefs)
+ return false;
+ const unsigned *SUImpDefs = TII->getImplicitDefs(SU->Node->getTargetOpcode());
+ if (!SUImpDefs)
+ return false;
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ MVT::ValueType VT = N->getValueType(i);
+ if (VT == MVT::Flag || VT == MVT::Other)
+ continue;
+ unsigned Reg = ImpDefs[i - NumDefs];
+ for (;*SUImpDefs; ++SUImpDefs) {
+ unsigned SUReg = *SUImpDefs;
+ if (MRI->regsOverlap(Reg, SUReg))
+ return true;
+ }
+ }
+ return false;
+}
+
/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
/// it as a def&use operand. Add a pseudo control edge from it to the other
/// node (if it won't create a cycle) so the two-address one will be scheduled
for (unsigned j = 0; j != NumOps; ++j) {
if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) != -1) {
SDNode *DU = SU->Node->getOperand(j).Val;
+ if ((*SUnitMap).find(DU) == (*SUnitMap).end())
+ continue;
SUnit *DUSU = (*SUnitMap)[DU][SU->InstanceNo];
if (!DUSU) continue;
for (SUnit::succ_iterator I = DUSU->Succs.begin(),E = DUSU->Succs.end();
I != E; ++I) {
if (I->isCtrl) continue;
SUnit *SuccSU = I->Dep;
- // Don't constraint nodes with implicit defs. It can create cycles
- // plus it may increase register pressures.
- if (SuccSU == SU || SuccSU->hasPhysRegDefs)
+ if (SuccSU == SU)
+ continue;
+ // Be conservative. Ignore if nodes aren't at roughly the same
+ // depth and height.
+ if (SuccSU->Height < SU->Height && (SU->Height - SuccSU->Height) > 1)
continue;
- // Be conservative. Ignore if nodes aren't at the same depth.
- if (SuccSU->Depth != SU->Depth)
+ if (!SuccSU->Node || !SuccSU->Node->isTargetOpcode())
+ continue;
+ // Don't constrain nodes with physical register defs if the
+ // predecessor can cloober them.
+ if (SuccSU->hasPhysRegDefs) {
+ if (canClobberPhysRegDefs(SuccSU, SU, TII, MRI))
+ continue;
+ }
+ // Don't constraint extract_subreg / insert_subreg these may be
+ // coalesced away. We don't them close to their uses.
+ unsigned SuccOpc = SuccSU->Node->getTargetOpcode();
+ if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG ||
+ SuccOpc == TargetInstrInfo::INSERT_SUBREG)
continue;
if ((!canClobber(SuccSU, DUSU) ||
(hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
SelectionDAG *DAG,
MachineBasicBlock *BB) {
const TargetInstrInfo *TII = DAG->getTarget().getInstrInfo();
+ const MRegisterInfo *MRI = DAG->getTarget().getRegisterInfo();
return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), true,
- new BURegReductionPriorityQueue<bu_ls_rr_sort>(TII));
+ new BURegReductionPriorityQueue<bu_ls_rr_sort>(TII, MRI));
}
llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,