X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FSelectionDAG%2FScheduleDAGRRList.cpp;h=6ea7c80687820ec878251f7c200c7edaaed91816;hb=4ee451de366474b9c228b4e5fa573795a715216d;hp=0575b41d1f554adbad4a8c93da899b70bf22f833;hpb=f10c973797cf79da802f9b0118543cbd50954c9c;p=oota-llvm.git diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 0575b41d1f5..6ea7c806878 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -2,8 +2,8 @@ // // The LLVM Compiler Infrastructure // -// This file was developed by Evan Cheng and is distributed under the -// University of Illinois Open Source License. See LICENSE.TXT for details. +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // @@ -156,7 +156,7 @@ void ScheduleDAGRRList::CommuteNodesToReducePressure() { continue; SDNode *OpN = SU->Node->getOperand(j).Val; - SUnit *OpSU = SUnitMap[OpN][SU->InstanceNo]; + SUnit *OpSU = isPassiveNode(OpN) ? NULL : SUnitMap[OpN][SU->InstanceNo]; if (OpSU && OperandSeen.count(OpSU) == 1) { // Ok, so SU is not the last use of OpSU, but SU is two-address so // it will clobber OpSU. Try to commute SU if no other source operands @@ -165,7 +165,7 @@ void ScheduleDAGRRList::CommuteNodesToReducePressure() { for (unsigned k = 0; k < NumOps; ++k) { if (k != j) { OpN = SU->Node->getOperand(k).Val; - OpSU = SUnitMap[OpN][SU->InstanceNo]; + OpSU = isPassiveNode(OpN) ? NULL : SUnitMap[OpN][SU->InstanceNo]; if (OpSU && OperandSeen.count(OpSU) == 1) { DoCommute = false; break; @@ -397,17 +397,19 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { return NULL; SUnit *NewSU; - for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) - if (N->getValueType(i) == MVT::Flag) - return NULL; bool TryUnfold = false; + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + MVT::ValueType VT = N->getValueType(i); + if (VT == MVT::Flag) + return NULL; + else if (VT == MVT::Other) + TryUnfold = true; + } for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { const SDOperand &Op = N->getOperand(i); MVT::ValueType VT = Op.Val->getValueType(Op.ResNo); if (VT == MVT::Flag) return NULL; - else if (VT == MVT::Other) - TryUnfold = true; } if (TryUnfold) { @@ -420,30 +422,16 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { N = NewNodes[1]; SDNode *LoadNode = NewNodes[0]; - std::vector Deleted; unsigned NumVals = N->getNumValues(); unsigned OldNumVals = SU->Node->getNumValues(); for (unsigned i = 0; i != NumVals; ++i) - DAG.ReplaceAllUsesOfValueWith(SDOperand(SU->Node, i), - SDOperand(N, i), Deleted); + DAG.ReplaceAllUsesOfValueWith(SDOperand(SU->Node, i), SDOperand(N, i)); DAG.ReplaceAllUsesOfValueWith(SDOperand(SU->Node, OldNumVals-1), - SDOperand(LoadNode, 1), Deleted); + SDOperand(LoadNode, 1)); - SUnit *LoadSU = NewSUnit(LoadNode); SUnit *NewSU = NewSUnit(N); - SUnitMap[LoadNode].push_back(LoadSU); SUnitMap[N].push_back(NewSU); - const TargetInstrDescriptor *TID = &TII->get(LoadNode->getTargetOpcode()); - for (unsigned i = 0; i != TID->numOperands; ++i) { - if (TID->getOperandConstraint(i, TOI::TIED_TO) != -1) { - LoadSU->isTwoAddress = true; - break; - } - } - if (TID->Flags & M_COMMUTABLE) - LoadSU->isCommutable = true; - - TID = &TII->get(N->getTargetOpcode()); + const TargetInstrDescriptor *TID = &TII->get(N->getTargetOpcode()); for (unsigned i = 0; i != TID->numOperands; ++i) { if (TID->getOperandConstraint(i, TOI::TIED_TO) != -1) { NewSU->isTwoAddress = true; @@ -452,13 +440,30 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { } if (TID->Flags & M_COMMUTABLE) NewSU->isCommutable = true; - // FIXME: Calculate height / depth and propagate the changes? - LoadSU->Depth = NewSU->Depth = SU->Depth; - LoadSU->Height = NewSU->Height = SU->Height; - ComputeLatency(LoadSU); + NewSU->Depth = SU->Depth; + NewSU->Height = SU->Height; ComputeLatency(NewSU); + // LoadNode may already exist. This can happen when there is another + // load from the same location and producing the same type of value + // but it has different alignment or volatileness. + bool isNewLoad = true; + SUnit *LoadSU; + DenseMap >::iterator SMI = + SUnitMap.find(LoadNode); + if (SMI != SUnitMap.end()) { + LoadSU = SMI->second.front(); + isNewLoad = false; + } else { + LoadSU = NewSUnit(LoadNode); + SUnitMap[LoadNode].push_back(LoadSU); + + LoadSU->Depth = SU->Depth; + LoadSU->Height = SU->Height; + ComputeLatency(LoadSU); + } + SUnit *ChainPred = NULL; SmallVector ChainSuccs; SmallVector LoadPreds; @@ -484,12 +489,14 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { } SU->removePred(ChainPred, true, false); - LoadSU->addPred(ChainPred, true, false); + if (isNewLoad) + LoadSU->addPred(ChainPred, true, false); for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { SDep *Pred = &LoadPreds[i]; SU->removePred(Pred->Dep, Pred->isCtrl, Pred->isSpecial); - LoadSU->addPred(Pred->Dep, Pred->isCtrl, Pred->isSpecial, - Pred->Reg, Pred->Cost); + if (isNewLoad) + LoadSU->addPred(Pred->Dep, Pred->isCtrl, Pred->isSpecial, + Pred->Reg, Pred->Cost); } for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { SDep *Pred = &NodePreds[i]; @@ -506,12 +513,15 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { SDep *Succ = &ChainSuccs[i]; Succ->Dep->removePred(SU, Succ->isCtrl, Succ->isSpecial); - Succ->Dep->addPred(LoadSU, Succ->isCtrl, Succ->isSpecial, - Succ->Reg, Succ->Cost); + if (isNewLoad) + Succ->Dep->addPred(LoadSU, Succ->isCtrl, Succ->isSpecial, + Succ->Reg, Succ->Cost); } - NewSU->addPred(LoadSU, false, false); + if (isNewLoad) + NewSU->addPred(LoadSU, false, false); - AvailableQueue->addNode(LoadSU); + if (isNewLoad) + AvailableQueue->addNode(LoadSU); AvailableQueue->addNode(NewSU); ++NumUnfolds; @@ -519,8 +529,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { if (NewSU->NumSuccsLeft == 0) { NewSU->isAvailable = true; return NewSU; - } else - SU = NewSU; + } + SU = NewSU; } DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; @@ -566,7 +576,6 @@ void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC, SmallVector &Copies) { - abort(); SUnit *CopyFromSU = NewSUnit(NULL); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; @@ -1055,9 +1064,11 @@ namespace { std::vector SethiUllmanNumbers; const TargetInstrInfo *TII; + const MRegisterInfo *MRI; public: - explicit BURegReductionPriorityQueue(const TargetInstrInfo *tii) - : TII(tii) {} + explicit BURegReductionPriorityQueue(const TargetInstrInfo *tii, + const MRegisterInfo *mri) + : TII(tii), MRI(mri) {} void initNodes(DenseMap > &sumap, std::vector &sunits) { @@ -1096,6 +1107,11 @@ namespace { // CopyToReg should be close to its uses to facilitate coalescing and // avoid spilling. return 0; + else if (Opc == TargetInstrInfo::EXTRACT_SUBREG || + Opc == TargetInstrInfo::INSERT_SUBREG) + // EXTRACT_SUBREG / INSERT_SUBREG should be close to its use to + // facilitate coalescing. + return 0; else if (SU->NumSuccs == 0) // If SU does not have a use, i.e. it doesn't produce a value that would // be consumed (e.g. store), then it terminates a chain of computation. @@ -1184,6 +1200,26 @@ static unsigned closestSucc(const SUnit *SU) { return MaxCycle; } +/// calcMaxScratches - Returns an cost estimate of the worse case requirement +/// for scratch registers. Live-in operands and live-out results don't count +/// since they are "fixed". +static unsigned calcMaxScratches(const SUnit *SU) { + unsigned Scratches = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl) continue; // ignore chain preds + if (I->Dep->Node->getOpcode() != ISD::CopyFromReg) + Scratches++; + } + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl) continue; // ignore chain succs + if (I->Dep->Node->getOpcode() != ISD::CopyToReg) + Scratches += 10; + } + return Scratches; +} + // Bottom up bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { // There used to be a special tie breaker here that looked for @@ -1226,14 +1262,23 @@ bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { if (LDist < RDist) return true; else if (LDist == RDist) { - if (left->Height > right->Height) + // Intuitively, it's good to push down instructions whose results are + // liveout so their long live ranges won't conflict with other values + // which are needed inside the BB. Further prioritize liveout instructions + // by the number of operands which are calculated within the BB. + unsigned LScratch = calcMaxScratches(left); + unsigned RScratch = calcMaxScratches(right); + if (LScratch > RScratch) return true; - else if (left->Height == right->Height) - if (left->Depth < right->Depth) + else if (LScratch == RScratch) + if (left->Height > right->Height) return true; - else if (left->Depth == right->Depth) - if (left->CycleBound > right->CycleBound) + else if (left->Height == right->Height) + if (left->Depth < right->Depth) return true; + else if (left->Depth == right->Depth) + if (left->CycleBound > right->CycleBound) + return true; } } return false; @@ -1248,7 +1293,8 @@ bool BURegReductionPriorityQueue::canClobber(SUnit *SU, SUnit *Op) { for (unsigned i = 0; i != NumOps; ++i) { if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) != -1) { SDNode *DU = SU->Node->getOperand(i).Val; - if (Op == (*SUnitMap)[DU][SU->InstanceNo]) + if ((*SUnitMap).find(DU) != (*SUnitMap).end() && + Op == (*SUnitMap)[DU][SU->InstanceNo]) return true; } } @@ -1270,6 +1316,33 @@ static bool hasCopyToRegUse(SUnit *SU) { return false; } +/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's +/// physical register def. +static bool canClobberPhysRegDefs(SUnit *SuccSU, SUnit *SU, + const TargetInstrInfo *TII, + const MRegisterInfo *MRI) { + SDNode *N = SuccSU->Node; + unsigned NumDefs = TII->getNumDefs(N->getTargetOpcode()); + const unsigned *ImpDefs = TII->getImplicitDefs(N->getTargetOpcode()); + if (!ImpDefs) + return false; + const unsigned *SUImpDefs = TII->getImplicitDefs(SU->Node->getTargetOpcode()); + if (!SUImpDefs) + return false; + for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { + MVT::ValueType VT = N->getValueType(i); + if (VT == MVT::Flag || VT == MVT::Other) + continue; + unsigned Reg = ImpDefs[i - NumDefs]; + for (;*SUImpDefs; ++SUImpDefs) { + unsigned SUReg = *SUImpDefs; + if (MRI->regsOverlap(Reg, SUReg)) + return true; + } + } + return false; +} + /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses /// it as a def&use operand. Add a pseudo control edge from it to the other /// node (if it won't create a cycle) so the two-address one will be scheduled @@ -1294,18 +1367,33 @@ void BURegReductionPriorityQueue::AddPseudoTwoAddrDeps() { for (unsigned j = 0; j != NumOps; ++j) { if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) != -1) { SDNode *DU = SU->Node->getOperand(j).Val; + if ((*SUnitMap).find(DU) == (*SUnitMap).end()) + continue; SUnit *DUSU = (*SUnitMap)[DU][SU->InstanceNo]; if (!DUSU) continue; for (SUnit::succ_iterator I = DUSU->Succs.begin(),E = DUSU->Succs.end(); I != E; ++I) { if (I->isCtrl) continue; SUnit *SuccSU = I->Dep; - // Don't constraint nodes with implicit defs. It can create cycles - // plus it may increase register pressures. - if (SuccSU == SU || SuccSU->hasPhysRegDefs) + if (SuccSU == SU) + continue; + // Be conservative. Ignore if nodes aren't at roughly the same + // depth and height. + if (SuccSU->Height < SU->Height && (SU->Height - SuccSU->Height) > 1) continue; - // Be conservative. Ignore if nodes aren't at the same depth. - if (SuccSU->Depth != SU->Depth) + if (!SuccSU->Node || !SuccSU->Node->isTargetOpcode()) + continue; + // Don't constrain nodes with physical register defs if the + // predecessor can cloober them. + if (SuccSU->hasPhysRegDefs) { + if (canClobberPhysRegDefs(SuccSU, SU, TII, MRI)) + continue; + } + // Don't constraint extract_subreg / insert_subreg these may be + // coalesced away. We don't them close to their uses. + unsigned SuccOpc = SuccSU->Node->getTargetOpcode(); + if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG || + SuccOpc == TargetInstrInfo::INSERT_SUBREG) continue; if ((!canClobber(SuccSU, DUSU) || (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || @@ -1490,8 +1578,9 @@ llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, SelectionDAG *DAG, MachineBasicBlock *BB) { const TargetInstrInfo *TII = DAG->getTarget().getInstrInfo(); + const MRegisterInfo *MRI = DAG->getTarget().getRegisterInfo(); return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), true, - new BURegReductionPriorityQueue(TII)); + new BURegReductionPriorityQueue(TII, MRI)); } llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,