1 //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements the ScheduleDAGInstrs class, which implements re-scheduling
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "sched-instrs"
16 #include "ScheduleDAGInstrs.h"
17 #include "llvm/Operator.h"
18 #include "llvm/Analysis/AliasAnalysis.h"
19 #include "llvm/Analysis/ValueTracking.h"
20 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineMemOperand.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/PseudoSourceValue.h"
25 #include "llvm/MC/MCInstrItineraries.h"
26 #include "llvm/Target/TargetMachine.h"
27 #include "llvm/Target/TargetInstrInfo.h"
28 #include "llvm/Target/TargetRegisterInfo.h"
29 #include "llvm/Target/TargetSubtargetInfo.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include "llvm/ADT/SmallSet.h"
35 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
36 const MachineLoopInfo &mli,
37 const MachineDominatorTree &mdt,
40 : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
41 InstrItins(mf.getTarget().getInstrItineraryData()), IsPostRA(IsPostRAFlag),
42 LIS(lis), UnitLatencies(false), LoopRegs(MLI, MDT), FirstDbgValue(0) {
43 assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
45 assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
46 "Virtual registers must be removed prior to PostRA scheduling");
49 /// Run - perform scheduling.
51 void ScheduleDAGInstrs::Run(MachineBasicBlock *bb,
52 MachineBasicBlock::iterator begin,
53 MachineBasicBlock::iterator end,
57 InsertPosIndex = endcount;
59 // Check to see if the scheduler cares about latencies.
60 UnitLatencies = ForceUnitLatencies();
62 ScheduleDAG::Run(bb, end);
65 /// getUnderlyingObjectFromInt - This is the function that does the work of
66 /// looking through basic ptrtoint+arithmetic+inttoptr sequences.
67 static const Value *getUnderlyingObjectFromInt(const Value *V) {
69 if (const Operator *U = dyn_cast<Operator>(V)) {
70 // If we find a ptrtoint, we can transfer control back to the
71 // regular getUnderlyingObjectFromInt.
72 if (U->getOpcode() == Instruction::PtrToInt)
73 return U->getOperand(0);
74 // If we find an add of a constant or a multiplied value, it's
75 // likely that the other operand will lead us to the base
76 // object. We don't have to worry about the case where the
77 // object address is somehow being computed by the multiply,
78 // because our callers only care when the result is an
79 // identifibale object.
80 if (U->getOpcode() != Instruction::Add ||
81 (!isa<ConstantInt>(U->getOperand(1)) &&
82 Operator::getOpcode(U->getOperand(1)) != Instruction::Mul))
88 assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
92 /// getUnderlyingObject - This is a wrapper around GetUnderlyingObject
93 /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
94 static const Value *getUnderlyingObject(const Value *V) {
95 // First just call Value::getUnderlyingObject to let it do what it does.
97 V = GetUnderlyingObject(V);
98 // If it found an inttoptr, use special code to continue climing.
99 if (Operator::getOpcode(V) != Instruction::IntToPtr)
101 const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
102 // If that succeeded in finding a pointer, continue the search.
103 if (!O->getType()->isPointerTy())
110 /// getUnderlyingObjectForInstr - If this machine instr has memory reference
111 /// information and it can be tracked to a normal reference to a known
112 /// object, return the Value for that object. Otherwise return null.
113 static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
114 const MachineFrameInfo *MFI,
117 if (!MI->hasOneMemOperand() ||
118 !(*MI->memoperands_begin())->getValue() ||
119 (*MI->memoperands_begin())->isVolatile())
122 const Value *V = (*MI->memoperands_begin())->getValue();
126 V = getUnderlyingObject(V);
127 if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
128 // For now, ignore PseudoSourceValues which may alias LLVM IR values
129 // because the code that uses this function has no way to cope with
131 if (PSV->isAliased(MFI))
134 MayAlias = PSV->mayAlias(MFI);
138 if (isIdentifiedObject(V))
144 void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
145 LoopRegs.Deps.clear();
146 if (MachineLoop *ML = MLI.getLoopFor(BB))
147 if (BB == ML->getLoopLatch())
148 LoopRegs.VisitLoop(ML);
151 /// Initialize the map with the number of registers.
152 void ScheduleDAGInstrs::Reg2SUnitsMap::setRegLimit(unsigned Limit) {
153 PhysRegSet.setUniverse(Limit);
154 SUnits.resize(Limit);
157 /// Clear the map without deallocating storage.
158 void ScheduleDAGInstrs::Reg2SUnitsMap::clear() {
159 for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) {
165 /// AddSchedBarrierDeps - Add dependencies from instructions in the current
166 /// list of instructions being scheduled to scheduling barrier by adding
167 /// the exit SU to the register defs and use list. This is because we want to
168 /// make sure instructions which define registers that are either used by
169 /// the terminator or are live-out are properly scheduled. This is
170 /// especially important when the definition latency of the return value(s)
171 /// are too high to be hidden by the branch or when the liveout registers
172 /// used by instructions in the fallthrough block.
173 void ScheduleDAGInstrs::AddSchedBarrierDeps() {
174 MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0;
175 ExitSU.setInstr(ExitMI);
176 bool AllDepKnown = ExitMI &&
177 (ExitMI->isCall() || ExitMI->isBarrier());
178 if (ExitMI && AllDepKnown) {
179 // If it's a call or a barrier, add dependencies on the defs and uses of
181 for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
182 const MachineOperand &MO = ExitMI->getOperand(i);
183 if (!MO.isReg() || MO.isDef()) continue;
184 unsigned Reg = MO.getReg();
185 if (Reg == 0) continue;
187 if (TRI->isPhysicalRegister(Reg))
188 Uses[Reg].push_back(&ExitSU);
190 assert(!IsPostRA && "Virtual register encountered after regalloc.");
193 // For others, e.g. fallthrough, conditional branch, assume the exit
194 // uses all the registers that are livein to the successor blocks.
195 SmallSet<unsigned, 8> Seen;
196 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
197 SE = BB->succ_end(); SI != SE; ++SI)
198 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
199 E = (*SI)->livein_end(); I != E; ++I) {
201 if (Seen.insert(Reg))
202 Uses[Reg].push_back(&ExitSU);
207 /// MO is an operand of SU's instruction that defines a physical register. Add
208 /// data dependencies from SU to any uses of the physical register.
209 void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
210 const MachineOperand &MO) {
211 assert(MO.isDef() && "expect physreg def");
213 // Ask the target if address-backscheduling is desirable, and if so how much.
214 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
215 unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
216 unsigned DataLatency = SU->Latency;
218 for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) {
219 if (!Uses.contains(*Alias))
221 std::vector<SUnit*> &UseList = Uses[*Alias];
222 for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
223 SUnit *UseSU = UseList[i];
226 unsigned LDataLatency = DataLatency;
227 // Optionally add in a special extra latency for nodes that
229 // TODO: Perhaps we should get rid of
230 // SpecialAddressLatency and just move this into
231 // adjustSchedDependency for the targets that care about it.
232 if (SpecialAddressLatency != 0 && !UnitLatencies &&
234 MachineInstr *UseMI = UseSU->getInstr();
235 const MCInstrDesc &UseMCID = UseMI->getDesc();
236 int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias);
237 assert(RegUseIndex >= 0 && "UseMI doesn't use register!");
238 if (RegUseIndex >= 0 &&
239 (UseMI->mayLoad() || UseMI->mayStore()) &&
240 (unsigned)RegUseIndex < UseMCID.getNumOperands() &&
241 UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass())
242 LDataLatency += SpecialAddressLatency;
244 // Adjust the dependence latency using operand def/use
245 // information (if any), and then allow the target to
246 // perform its own adjustments.
247 const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias);
248 if (!UnitLatencies) {
249 ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
250 ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
257 /// addPhysRegDeps - Add register dependencies (data, anti, and output) from
258 /// this SUnit to following instructions in the same scheduling region that
259 /// depend the physical register referenced at OperIdx.
260 void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
261 const MachineInstr *MI = SU->getInstr();
262 const MachineOperand &MO = MI->getOperand(OperIdx);
264 // Optionally add output and anti dependencies. For anti
265 // dependencies we use a latency of 0 because for a multi-issue
266 // target we want to allow the defining instruction to issue
267 // in the same cycle as the using instruction.
268 // TODO: Using a latency of 1 here for output dependencies assumes
269 // there's no cost for reusing registers.
270 SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
271 for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) {
272 if (!Defs.contains(*Alias))
274 std::vector<SUnit *> &DefList = Defs[*Alias];
275 for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
276 SUnit *DefSU = DefList[i];
277 if (DefSU == &ExitSU)
280 (Kind != SDep::Output || !MO.isDead() ||
281 !DefSU->getInstr()->registerDefIsDead(*Alias))) {
282 if (Kind == SDep::Anti)
283 DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias));
285 unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx,
287 DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias));
294 // Either insert a new Reg2SUnits entry with an empty SUnits list, or
295 // retrieve the existing SUnits list for this register's uses.
296 // Push this SUnit on the use list.
297 Uses[MO.getReg()].push_back(SU);
300 addPhysRegDataDeps(SU, MO);
302 // Either insert a new Reg2SUnits entry with an empty SUnits list, or
303 // retrieve the existing SUnits list for this register's defs.
304 std::vector<SUnit *> &DefList = Defs[MO.getReg()];
306 // If a def is going to wrap back around to the top of the loop,
308 if (!UnitLatencies && DefList.empty()) {
309 LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg());
310 if (I != LoopRegs.Deps.end()) {
311 const MachineOperand *UseMO = I->second.first;
312 unsigned Count = I->second.second;
313 const MachineInstr *UseMI = UseMO->getParent();
314 unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
315 const MCInstrDesc &UseMCID = UseMI->getDesc();
316 const TargetSubtargetInfo &ST =
317 TM.getSubtarget<TargetSubtargetInfo>();
318 unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
319 // TODO: If we knew the total depth of the region here, we could
320 // handle the case where the whole loop is inside the region but
321 // is large enough that the isScheduleHigh trick isn't needed.
322 if (UseMOIdx < UseMCID.getNumOperands()) {
323 // Currently, we only support scheduling regions consisting of
324 // single basic blocks. Check to see if the instruction is in
325 // the same region by checking to see if it has the same parent.
326 if (UseMI->getParent() != MI->getParent()) {
327 unsigned Latency = SU->Latency;
328 if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass())
329 Latency += SpecialAddressLatency;
330 // This is a wild guess as to the portion of the latency which
331 // will be overlapped by work done outside the current
332 // scheduling region.
333 Latency -= std::min(Latency, Count);
334 // Add the artificial edge.
335 ExitSU.addPred(SDep(SU, SDep::Order, Latency,
336 /*Reg=*/0, /*isNormalMemory=*/false,
337 /*isMustAlias=*/false,
338 /*isArtificial=*/true));
339 } else if (SpecialAddressLatency > 0 &&
340 UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
341 // The entire loop body is within the current scheduling region
342 // and the latency of this operation is assumed to be greater
343 // than the latency of the loop.
344 // TODO: Recursively mark data-edge predecessors as
345 // isScheduleHigh too.
346 SU->isScheduleHigh = true;
349 LoopRegs.Deps.erase(I);
353 // clear this register's use list
354 if (Uses.contains(MO.getReg()))
355 Uses[MO.getReg()].clear();
360 // Calls will not be reordered because of chain dependencies (see
361 // below). Since call operands are dead, calls may continue to be added
362 // to the DefList making dependence checking quadratic in the size of
363 // the block. Instead, we leave only one call at the back of the
366 while (!DefList.empty() && DefList.back()->isCall)
369 // Defs are pushed in the order they are visited and never reordered.
370 DefList.push_back(SU);
374 /// addVRegDefDeps - Add register output and data dependencies from this SUnit
375 /// to instructions that occur later in the same scheduling region if they read
376 /// from or write to the virtual register defined at OperIdx.
378 /// TODO: Hoist loop induction variable increments. This has to be
379 /// reevaluated. Generally, IV scheduling should be done before coalescing.
380 void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
381 const MachineInstr *MI = SU->getInstr();
382 unsigned Reg = MI->getOperand(OperIdx).getReg();
384 // SSA defs do not have output/anti dependencies.
385 // The current operand is a def, so we have at least one.
386 if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end())
389 // Add output dependence to the next nearest def of this vreg.
391 // Unless this definition is dead, the output dependence should be
392 // transitively redundant with antidependencies from this definition's
393 // uses. We're conservative for now until we have a way to guarantee the uses
394 // are not eliminated sometime during scheduling. The output dependence edge
395 // is also useful if output latency exceeds def-use latency.
396 VReg2SUnitMap::iterator DefI = findVRegDef(Reg);
397 if (DefI == VRegDefs.end())
398 VRegDefs.insert(VReg2SUnit(Reg, SU));
400 SUnit *DefSU = DefI->SU;
401 if (DefSU != SU && DefSU != &ExitSU) {
402 unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx,
404 DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg));
410 /// addVRegUseDeps - Add a register data dependency if the instruction that
411 /// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
412 /// register antidependency from this SUnit to instructions that occur later in
413 /// the same scheduling region if they write the virtual register.
415 /// TODO: Handle ExitSU "uses" properly.
416 void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
417 MachineInstr *MI = SU->getInstr();
418 unsigned Reg = MI->getOperand(OperIdx).getReg();
420 // Lookup this operand's reaching definition.
421 assert(LIS && "vreg dependencies requires LiveIntervals");
422 SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot();
423 LiveInterval *LI = &LIS->getInterval(Reg);
424 VNInfo *VNI = LI->getVNInfoBefore(UseIdx);
425 // VNI will be valid because MachineOperand::readsReg() is checked by caller.
426 MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
427 // Phis and other noninstructions (after coalescing) have a NULL Def.
429 SUnit *DefSU = getSUnit(Def);
431 // The reaching Def lives within this scheduling region.
432 // Create a data dependence.
434 // TODO: Handle "special" address latencies cleanly.
435 const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg);
436 if (!UnitLatencies) {
437 // Adjust the dependence latency using operand def/use information, then
438 // allow the target to perform its own adjustments.
439 ComputeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
440 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
441 ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
447 // Add antidependence to the following def of the vreg it uses.
448 VReg2SUnitMap::iterator DefI = findVRegDef(Reg);
449 if (DefI != VRegDefs.end() && DefI->SU != SU)
450 DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg));
453 /// Create an SUnit for each real instruction, numbered in top-down toplological
454 /// order. The instruction order A < B, implies that no edge exists from B to A.
456 /// Map each real instruction to its SUnit.
458 /// After initSUnits, the SUnits vector is cannot be resized and the scheduler
459 /// may hang onto SUnit pointers. We may relax this in the future by using SUnit
460 /// IDs instead of pointers.
461 void ScheduleDAGInstrs::initSUnits() {
462 // We'll be allocating one SUnit for each real instruction in the region,
463 // which is contained within a basic block.
464 SUnits.reserve(BB->size());
466 for (MachineBasicBlock::iterator I = Begin; I != InsertPos; ++I) {
467 MachineInstr *MI = I;
468 if (MI->isDebugValue())
471 SUnit *SU = NewSUnit(MI);
474 SU->isCall = MI->isCall();
475 SU->isCommutable = MI->isCommutable();
477 // Assign the Latency field of SU using target-provided information.
485 void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
486 // Create an SUnit for each real instruction.
489 // We build scheduling units by walking a block's instruction list from bottom
492 // Remember where a generic side-effecting instruction is as we procede.
493 SUnit *BarrierChain = 0, *AliasChain = 0;
495 // Memory references to specific known memory locations are tracked
496 // so that they can be given more precise dependencies. We track
497 // separately the known memory locations that may alias and those
498 // that are known not to alias
499 std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
500 std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
502 // Remove any stale debug info; sometimes BuildSchedGraph is called again
503 // without emitting the info from the previous call.
505 FirstDbgValue = NULL;
507 assert(Defs.empty() && Uses.empty() &&
508 "Only BuildGraph should update Defs/Uses");
509 Defs.setRegLimit(TRI->getNumRegs());
510 Uses.setRegLimit(TRI->getNumRegs());
512 assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
513 // FIXME: Allow SparseSet to reserve space for the creation of virtual
514 // registers during scheduling. Don't artificially inflate the Universe
515 // because we want to assert that vregs are not created during DAG building.
516 VRegDefs.setUniverse(MRI.getNumVirtRegs());
518 // Model data dependencies between instructions being scheduled and the
520 AddSchedBarrierDeps();
522 // Walk the list of instructions, from bottom moving up.
523 MachineInstr *PrevMI = NULL;
524 for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
526 MachineInstr *MI = prior(MII);
528 DbgValues.push_back(std::make_pair(PrevMI, MI));
532 if (MI->isDebugValue()) {
537 assert(!MI->isTerminator() && !MI->isLabel() &&
538 "Cannot schedule terminators or labels!");
540 SUnit *SU = MISUnitMap[MI];
541 assert(SU && "No SUnit mapped to this MI");
543 // Add register-based dependencies (data, anti, and output).
544 for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
545 const MachineOperand &MO = MI->getOperand(j);
546 if (!MO.isReg()) continue;
547 unsigned Reg = MO.getReg();
548 if (Reg == 0) continue;
550 if (TRI->isPhysicalRegister(Reg))
551 addPhysRegDeps(SU, j);
553 assert(!IsPostRA && "Virtual register encountered!");
555 addVRegDefDeps(SU, j);
556 else if (MO.readsReg()) // ignore undef operands
557 addVRegUseDeps(SU, j);
561 // Add chain dependencies.
562 // Chain dependencies used to enforce memory order should have
563 // latency of 0 (except for true dependency of Store followed by
564 // aliased Load... we estimate that with a single cycle of latency
565 // assuming the hardware will bypass)
566 // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
567 // after stack slots are lowered to actual addresses.
568 // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
569 // produce more precise dependence information.
570 #define STORE_LOAD_LATENCY 1
571 unsigned TrueMemOrderLatency = 0;
572 if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
573 (MI->hasVolatileMemoryRef() &&
574 (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) {
575 // Be conservative with these and add dependencies on all memory
576 // references, even those that are known to not alias.
577 for (std::map<const Value *, SUnit *>::iterator I =
578 NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
579 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
581 for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
582 NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
583 for (unsigned i = 0, e = I->second.size(); i != e; ++i)
584 I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
586 NonAliasMemDefs.clear();
587 NonAliasMemUses.clear();
588 // Add SU to the barrier chain.
590 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
595 // Chain all possibly aliasing memory references though SU.
597 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
599 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
600 PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
601 for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
602 E = AliasMemDefs.end(); I != E; ++I) {
603 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
605 for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
606 AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
607 for (unsigned i = 0, e = I->second.size(); i != e; ++i)
608 I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
610 PendingLoads.clear();
611 AliasMemDefs.clear();
612 AliasMemUses.clear();
613 } else if (MI->mayStore()) {
614 bool MayAlias = true;
615 TrueMemOrderLatency = STORE_LOAD_LATENCY;
616 if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
617 // A store to a specific PseudoSourceValue. Add precise dependencies.
618 // Record the def in MemDefs, first adding a dep if there is
620 std::map<const Value *, SUnit *>::iterator I =
621 ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
622 std::map<const Value *, SUnit *>::iterator IE =
623 ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
625 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
626 /*isNormalMemory=*/true));
630 AliasMemDefs[V] = SU;
632 NonAliasMemDefs[V] = SU;
634 // Handle the uses in MemUses, if there are any.
635 std::map<const Value *, std::vector<SUnit *> >::iterator J =
636 ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
637 std::map<const Value *, std::vector<SUnit *> >::iterator JE =
638 ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
640 for (unsigned i = 0, e = J->second.size(); i != e; ++i)
641 J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency,
642 /*Reg=*/0, /*isNormalMemory=*/true));
646 // Add dependencies from all the PendingLoads, i.e. loads
647 // with no underlying object.
648 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
649 PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
650 // Add dependence on alias chain, if needed.
652 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
654 // Add dependence on barrier chain, if needed.
656 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
658 // Treat all other stores conservatively.
659 goto new_alias_chain;
662 if (!ExitSU.isPred(SU))
663 // Push store's up a bit to avoid them getting in between cmp
665 ExitSU.addPred(SDep(SU, SDep::Order, 0,
666 /*Reg=*/0, /*isNormalMemory=*/false,
667 /*isMustAlias=*/false,
668 /*isArtificial=*/true));
669 } else if (MI->mayLoad()) {
670 bool MayAlias = true;
671 TrueMemOrderLatency = 0;
672 if (MI->isInvariantLoad(AA)) {
673 // Invariant load, no chain dependencies needed!
676 getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
677 // A load from a specific PseudoSourceValue. Add precise dependencies.
678 std::map<const Value *, SUnit *>::iterator I =
679 ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
680 std::map<const Value *, SUnit *>::iterator IE =
681 ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
683 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
684 /*isNormalMemory=*/true));
686 AliasMemUses[V].push_back(SU);
688 NonAliasMemUses[V].push_back(SU);
690 // A load with no underlying object. Depend on all
691 // potentially aliasing stores.
692 for (std::map<const Value *, SUnit *>::iterator I =
693 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
694 I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
696 PendingLoads.push_back(SU);
700 // Add dependencies on alias and barrier chains, if needed.
701 if (MayAlias && AliasChain)
702 AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
704 BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
709 FirstDbgValue = PrevMI;
714 PendingLoads.clear();
718 void ScheduleDAGInstrs::FinishBlock() {
722 void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
723 // Compute the latency for the node.
724 if (!InstrItins || InstrItins->isEmpty()) {
727 // Simplistic target-independent heuristic: assume that loads take
729 if (SU->getInstr()->mayLoad())
732 SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
736 void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
738 if (!InstrItins || InstrItins->isEmpty())
741 // For a data dependency with a known register...
742 if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
745 const unsigned Reg = dep.getReg();
747 // ... find the definition of the register in the defining
749 MachineInstr *DefMI = Def->getInstr();
750 int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
752 const MachineOperand &MO = DefMI->getOperand(DefIdx);
753 if (MO.isReg() && MO.isImplicit() &&
754 DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
755 // This is an implicit def, getOperandLatency() won't return the correct
757 // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
758 // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
759 // What we want is to compute latency between def of %D6/%D7 and use of
761 unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
762 if (DefMI->getOperand(Op2).isReg())
765 MachineInstr *UseMI = Use->getInstr();
766 // For all uses of the register, calculate the maxmimum latency
769 for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
770 const MachineOperand &MO = UseMI->getOperand(i);
771 if (!MO.isReg() || !MO.isUse())
773 unsigned MOReg = MO.getReg();
777 int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx,
779 Latency = std::max(Latency, UseCycle);
782 // UseMI is null, then it must be a scheduling barrier.
783 if (!InstrItins || InstrItins->isEmpty())
785 unsigned DefClass = DefMI->getDesc().getSchedClass();
786 Latency = InstrItins->getOperandCycle(DefClass, DefIdx);
789 // If we found a latency, then replace the existing dependence latency.
791 dep.setLatency(Latency);
795 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
796 SU->getInstr()->dump();
799 std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
801 raw_string_ostream oss(s);
804 else if (SU == &ExitSU)
807 SU->getInstr()->print(oss);
811 // EmitSchedule - Emit the machine code in scheduled order.
812 MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
815 // If first instruction was a DBG_VALUE then put it back.
817 BB->splice(InsertPos, BB, FirstDbgValue);
819 // Then re-insert them according to the given schedule.
820 for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
821 if (SUnit *SU = Sequence[i])
822 BB->splice(InsertPos, BB, SU->getInstr());
824 // Null SUnit* is a noop.
827 // Update the Begin iterator, as the first instruction in the block
828 // may have been scheduled later.
830 Begin = prior(InsertPos);
833 // Reinsert any remaining debug_values.
834 for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
835 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
836 std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
837 MachineInstr *DbgValue = P.first;
838 MachineBasicBlock::iterator OrigPrivMI = P.second;
839 BB->splice(++OrigPrivMI, BB, DbgValue);
842 FirstDbgValue = NULL;