1 //===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements the ScheduleDAG class, which is a base class used by
11 // scheduling implementation classes.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "pre-RA-sched"
16 #include "SDNodeDbgValue.h"
17 #include "ScheduleDAGSDNodes.h"
18 #include "InstrEmitter.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/Target/TargetMachine.h"
21 #include "llvm/Target/TargetInstrInfo.h"
22 #include "llvm/Target/TargetLowering.h"
23 #include "llvm/Target/TargetRegisterInfo.h"
24 #include "llvm/Target/TargetSubtarget.h"
25 #include "llvm/ADT/DenseMap.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallSet.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/raw_ostream.h"
34 STATISTIC(LoadsClustered, "Number of loads clustered together");
36 ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
40 /// Run - perform scheduling.
42 void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb,
43 MachineBasicBlock::iterator insertPos) {
45 ScheduleDAG::Run(bb, insertPos);
48 /// NewSUnit - Creates a new SUnit and return a ptr to it.
50 SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
52 const SUnit *Addr = 0;
56 SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));
57 assert((Addr == 0 || Addr == &SUnits[0]) &&
58 "SUnits std::vector reallocated on the fly!");
59 SUnits.back().OrigNode = &SUnits.back();
60 SUnit *SU = &SUnits.back();
61 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
62 SU->SchedulingPref = TLI.getSchedulingPreference(N);
66 SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
67 SUnit *SU = NewSUnit(Old->getNode());
68 SU->OrigNode = Old->OrigNode;
69 SU->Latency = Old->Latency;
70 SU->isTwoAddress = Old->isTwoAddress;
71 SU->isCommutable = Old->isCommutable;
72 SU->hasPhysRegDefs = Old->hasPhysRegDefs;
73 SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
74 SU->SchedulingPref = Old->SchedulingPref;
79 /// CheckForPhysRegDependency - Check if the dependency between def and use of
80 /// a specified operand is a physical register dependency. If so, returns the
81 /// register and the cost of copying the register.
82 static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
83 const TargetRegisterInfo *TRI,
84 const TargetInstrInfo *TII,
85 unsigned &PhysReg, int &Cost) {
86 if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
89 unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
90 if (TargetRegisterInfo::isVirtualRegister(Reg))
93 unsigned ResNo = User->getOperand(2).getResNo();
94 if (Def->isMachineOpcode()) {
95 const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
96 if (ResNo >= II.getNumDefs() &&
97 II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
99 const TargetRegisterClass *RC =
100 TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo));
101 Cost = RC->getCopyCost();
106 static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
108 SmallVector<EVT, 4> VTs;
109 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
110 VTs.push_back(N->getValueType(i));
112 VTs.push_back(MVT::Flag);
113 SmallVector<SDValue, 4> Ops;
114 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
115 Ops.push_back(N->getOperand(i));
118 SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
119 DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size());
122 /// ClusterNeighboringLoads - Force nearby loads together by "flagging" them.
123 /// This function finds loads of the same base and different offsets. If the
124 /// offsets are not far apart (target specific), it add MVT::Flag inputs and
125 /// outputs to ensure they are scheduled together and in order. This
126 /// optimization may benefit some targets by improving cache locality.
127 void ScheduleDAGSDNodes::ClusterNeighboringLoads() {
128 SmallPtrSet<SDNode*, 16> Visited;
129 SmallVector<int64_t, 4> Offsets;
130 DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode.
131 for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
132 E = DAG->allnodes_end(); NI != E; ++NI) {
134 if (!Node || !Node->isMachineOpcode())
137 unsigned Opc = Node->getMachineOpcode();
138 const TargetInstrDesc &TID = TII->get(Opc);
143 unsigned NumOps = Node->getNumOperands();
144 if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
145 Chain = Node->getOperand(NumOps-1).getNode();
149 // Look for other loads of the same chain. Find loads that are loading from
150 // the same base pointer and different offsets.
154 bool Cluster = false;
157 for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
160 if (User == Node || !Visited.insert(User))
162 int64_t Offset1, Offset2;
163 if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
165 // FIXME: Should be ok if they addresses are identical. But earlier
166 // optimizations really should have eliminated one of the loads.
168 if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
169 Offsets.push_back(Offset1);
170 O2SMap.insert(std::make_pair(Offset2, User));
171 Offsets.push_back(Offset2);
172 if (Offset2 < Offset1) {
174 BaseOffset = Offset2;
176 BaseOffset = Offset1;
184 // Sort them in increasing order.
185 std::sort(Offsets.begin(), Offsets.end());
187 // Check if the loads are close enough.
188 SmallVector<SDNode*, 4> Loads;
189 unsigned NumLoads = 0;
190 int64_t BaseOff = Offsets[0];
191 SDNode *BaseLoad = O2SMap[BaseOff];
192 Loads.push_back(BaseLoad);
193 for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
194 int64_t Offset = Offsets[i];
195 SDNode *Load = O2SMap[Offset];
196 if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,
198 break; // Stop right here. Ignore loads that are further away.
199 Loads.push_back(Load);
206 // Cluster loads by adding MVT::Flag outputs and inputs. This also
207 // ensure they are scheduled in order of increasing addresses.
208 SDNode *Lead = Loads[0];
209 AddFlags(Lead, SDValue(0,0), true, DAG);
210 SDValue InFlag = SDValue(Lead, Lead->getNumValues()-1);
211 for (unsigned i = 1, e = Loads.size(); i != e; ++i) {
212 bool OutFlag = i < e-1;
213 SDNode *Load = Loads[i];
214 AddFlags(Load, InFlag, OutFlag, DAG);
216 InFlag = SDValue(Load, Load->getNumValues()-1);
222 void ScheduleDAGSDNodes::BuildSchedUnits() {
223 // During scheduling, the NodeId field of SDNode is used to map SDNodes
224 // to their associated SUnits by holding SUnits table indices. A value
225 // of -1 means the SDNode does not yet have an associated SUnit.
226 unsigned NumNodes = 0;
227 for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
228 E = DAG->allnodes_end(); NI != E; ++NI) {
233 // Reserve entries in the vector for each of the SUnits we are creating. This
234 // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
236 // FIXME: Multiply by 2 because we may clone nodes during scheduling.
237 // This is a temporary workaround.
238 SUnits.reserve(NumNodes * 2);
240 // Add all nodes in depth first order.
241 SmallVector<SDNode*, 64> Worklist;
242 SmallPtrSet<SDNode*, 64> Visited;
243 Worklist.push_back(DAG->getRoot().getNode());
244 Visited.insert(DAG->getRoot().getNode());
246 while (!Worklist.empty()) {
247 SDNode *NI = Worklist.pop_back_val();
249 // Add all operands to the worklist unless they've already been added.
250 for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
251 if (Visited.insert(NI->getOperand(i).getNode()))
252 Worklist.push_back(NI->getOperand(i).getNode());
254 if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
257 // If this node has already been processed, stop now.
258 if (NI->getNodeId() != -1) continue;
260 SUnit *NodeSUnit = NewSUnit(NI);
262 // See if anything is flagged to this node, if so, add them to flagged
263 // nodes. Nodes can have at most one flag input and one flag output. Flags
264 // are required to be the last operand and result of a node.
266 // Scan up to find flagged preds.
268 while (N->getNumOperands() &&
269 N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) {
270 N = N->getOperand(N->getNumOperands()-1).getNode();
271 assert(N->getNodeId() == -1 && "Node already inserted!");
272 N->setNodeId(NodeSUnit->NodeNum);
275 // Scan down to find any flagged succs.
277 while (N->getValueType(N->getNumValues()-1) == MVT::Flag) {
278 SDValue FlagVal(N, N->getNumValues()-1);
280 // There are either zero or one users of the Flag result.
281 bool HasFlagUse = false;
282 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
284 if (FlagVal.isOperandOf(*UI)) {
286 assert(N->getNodeId() == -1 && "Node already inserted!");
287 N->setNodeId(NodeSUnit->NodeNum);
291 if (!HasFlagUse) break;
294 // If there are flag operands involved, N is now the bottom-most node
295 // of the sequence of nodes that are flagged together.
297 NodeSUnit->setNode(N);
298 assert(N->getNodeId() == -1 && "Node already inserted!");
299 N->setNodeId(NodeSUnit->NodeNum);
301 // Assign the Latency field of NodeSUnit using target-provided information.
302 ComputeLatency(NodeSUnit);
306 void ScheduleDAGSDNodes::AddSchedEdges() {
307 const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
309 // Check to see if the scheduler cares about latencies.
310 bool UnitLatencies = ForceUnitLatencies();
312 // Pass 2: add the preds, succs, etc.
313 for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
314 SUnit *SU = &SUnits[su];
315 SDNode *MainNode = SU->getNode();
317 if (MainNode->isMachineOpcode()) {
318 unsigned Opc = MainNode->getMachineOpcode();
319 const TargetInstrDesc &TID = TII->get(Opc);
320 for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
321 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
322 SU->isTwoAddress = true;
326 if (TID.isCommutable())
327 SU->isCommutable = true;
330 // Find all predecessors and successors of the group.
331 for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) {
332 if (N->isMachineOpcode() &&
333 TII->get(N->getMachineOpcode()).getImplicitDefs()) {
334 SU->hasPhysRegClobbers = true;
335 unsigned NumUsed = InstrEmitter::CountResults(N);
336 while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
337 --NumUsed; // Skip over unused values at the end.
338 if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
339 SU->hasPhysRegDefs = true;
342 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
343 SDNode *OpN = N->getOperand(i).getNode();
344 if (isPassiveNode(OpN)) continue; // Not scheduled.
345 SUnit *OpSU = &SUnits[OpN->getNodeId()];
346 assert(OpSU && "Node has no SUnit!");
347 if (OpSU == SU) continue; // In the same group.
349 EVT OpVT = N->getOperand(i).getValueType();
350 assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
351 bool isChain = OpVT == MVT::Other;
353 unsigned PhysReg = 0;
355 // Determine if this is a physical register dependency.
356 CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
357 assert((PhysReg == 0 || !isChain) &&
358 "Chain dependence via physreg data?");
359 // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
360 // emits a copy from the physical register to a virtual register unless
361 // it requires a cross class copy (cost < 0). That means we are only
362 // treating "expensive to copy" register dependency as physical register
363 // dependency. This may change in the future though.
367 const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
368 OpSU->Latency, PhysReg);
369 if (!isChain && !UnitLatencies) {
370 ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
371 ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
380 /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
381 /// are input. This SUnit graph is similar to the SelectionDAG, but
382 /// excludes nodes that aren't interesting to scheduling, and represents
383 /// flagged together nodes with a single SUnit.
384 void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
385 // Cluster loads from "near" addresses into combined SUnits.
386 ClusterNeighboringLoads();
387 // Populate the SUnits array.
389 // Compute all the scheduling dependencies between nodes.
393 void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
394 // Check to see if the scheduler cares about latencies.
395 if (ForceUnitLatencies()) {
400 const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
401 if (InstrItins.isEmpty()) {
406 // Compute the latency for the node. We use the sum of the latencies for
407 // all nodes flagged together into this SUnit.
409 for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
410 if (N->isMachineOpcode()) {
411 SU->Latency += InstrItins.
412 getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
416 void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
417 unsigned OpIdx, SDep& dep) const{
418 // Check to see if the scheduler cares about latencies.
419 if (ForceUnitLatencies())
422 const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
423 if (InstrItins.isEmpty())
426 if (dep.getKind() != SDep::Data)
429 unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
430 if (Def->isMachineOpcode() && Use->isMachineOpcode()) {
431 const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
432 if (DefIdx >= II.getNumDefs())
434 int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx);
437 const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
438 int UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx);
440 int Latency = DefCycle - UseCycle + 1;
442 dep.setLatency(Latency);
447 void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
448 if (!SU->getNode()) {
449 dbgs() << "PHYS REG COPY\n";
453 SU->getNode()->dump(DAG);
455 SmallVector<SDNode *, 4> FlaggedNodes;
456 for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
457 FlaggedNodes.push_back(N);
458 while (!FlaggedNodes.empty()) {
460 FlaggedNodes.back()->dump(DAG);
462 FlaggedNodes.pop_back();
468 bool operator()(const std::pair<unsigned, MachineInstr*> &A,
469 const std::pair<unsigned, MachineInstr*> &B) {
470 return A.first < B.first;
475 // ProcessSourceNode - Process nodes with source order numbers. These are added
476 // to a vector which EmitSchedule use to determine how to insert dbg_value
477 // instructions in the right order.
478 static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
479 InstrEmitter &Emitter,
480 DenseMap<SDValue, unsigned> &VRBaseMap,
481 SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
482 SmallSet<unsigned, 8> &Seen) {
483 unsigned Order = DAG->GetOrdering(N);
484 if (!Order || !Seen.insert(Order))
487 MachineBasicBlock *BB = Emitter.getBlock();
488 if (BB->empty() || BB->back().isPHI()) {
489 // Did not insert any instruction.
490 Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
494 Orders.push_back(std::make_pair(Order, &BB->back()));
495 if (!N->getHasDebugValue())
497 // Opportunistically insert immediate dbg_value uses, i.e. those with source
498 // order number right after the N.
499 MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
500 SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N);
501 for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
502 if (DVs[i]->isInvalidated())
504 unsigned DVOrder = DVs[i]->getOrder();
505 if (DVOrder == ++Order) {
506 MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
508 Orders.push_back(std::make_pair(DVOrder, DbgMI));
509 BB->insert(InsertPos, DbgMI);
511 DVs[i]->setIsInvalidated();
517 /// EmitSchedule - Emit the machine code in scheduled order.
518 MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
519 InstrEmitter Emitter(BB, InsertPos);
520 DenseMap<SDValue, unsigned> VRBaseMap;
521 DenseMap<SUnit*, unsigned> CopyVRBaseMap;
522 SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
523 SmallSet<unsigned, 8> Seen;
524 bool HasDbg = DAG->hasDebugValues();
526 // If this is the first BB, emit byval parameter dbg_value's.
527 if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
528 SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
529 SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
530 for (; PDI != PDE; ++PDI) {
531 MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
533 BB->insert(BB->end(), DbgMI);
537 for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
538 SUnit *SU = Sequence[i];
540 // Null SUnit* is a noop.
545 // For pre-regalloc scheduling, create instructions corresponding to the
546 // SDNode and any flagged SDNodes and append them to the block.
547 if (!SU->getNode()) {
549 EmitPhysRegCopy(SU, CopyVRBaseMap);
553 SmallVector<SDNode *, 4> FlaggedNodes;
554 for (SDNode *N = SU->getNode()->getFlaggedNode(); N;
555 N = N->getFlaggedNode())
556 FlaggedNodes.push_back(N);
557 while (!FlaggedNodes.empty()) {
558 SDNode *N = FlaggedNodes.back();
559 Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,
561 // Remember the source order of the inserted instruction.
563 ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
564 FlaggedNodes.pop_back();
566 Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
568 // Remember the source order of the inserted instruction.
570 ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders,
574 // Insert all the dbg_values which have not already been inserted in source
577 MachineBasicBlock::iterator BBBegin = BB->empty() ? BB->end() : BB->begin();
578 while (BBBegin != BB->end() && BBBegin->isPHI())
581 // Sort the source order instructions and use the order to insert debug
583 std::sort(Orders.begin(), Orders.end(), OrderSorter());
585 SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
586 SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
587 // Now emit the rest according to source order.
588 unsigned LastOrder = 0;
589 MachineInstr *LastMI = 0;
590 for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
591 unsigned Order = Orders[i].first;
592 MachineInstr *MI = Orders[i].second;
593 // Insert all SDDbgValue's whose order(s) are before "Order".
596 MachineBasicBlock *MIBB = MI->getParent();
598 unsigned LastDIOrder = 0;
601 (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
603 assert((*DI)->getOrder() >= LastDIOrder &&
604 "SDDbgValue nodes must be in source order!");
605 LastDIOrder = (*DI)->getOrder();
607 if ((*DI)->isInvalidated())
609 MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
612 // Insert to start of the BB (after PHIs).
613 BB->insert(BBBegin, DbgMI);
615 MachineBasicBlock::iterator Pos = MI;
616 MIBB->insert(llvm::next(Pos), DbgMI);
623 // Add trailing DbgValue's before the terminator. FIXME: May want to add
624 // some of them before one or more conditional branches?
626 MachineBasicBlock *InsertBB = Emitter.getBlock();
627 MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator();
628 if (!(*DI)->isInvalidated()) {
629 MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap);
631 InsertBB->insert(Pos, DbgMI);
637 BB = Emitter.getBlock();
638 InsertPos = Emitter.getInsertPos();