+static bool tryGreater(int TryVal, int CandVal,
+ GenericSchedulerBase::SchedCandidate &TryCand,
+ GenericSchedulerBase::SchedCandidate &Cand,
+ GenericSchedulerBase::CandReason Reason) {
+ if (TryVal > CandVal) {
+ TryCand.Reason = Reason;
+ return true;
+ }
+ if (TryVal < CandVal) {
+ if (Cand.Reason > Reason)
+ Cand.Reason = Reason;
+ return true;
+ }
+ Cand.setRepeat(Reason);
+ return false;
+}
+
+static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
+ GenericSchedulerBase::SchedCandidate &Cand,
+ SchedBoundary &Zone) {
+ if (Zone.isTop()) {
+ if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
+ if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+ TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
+ return true;
+ }
+ if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+ TryCand, Cand, GenericSchedulerBase::TopPathReduce))
+ return true;
+ }
+ else {
+ if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
+ if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+ TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
+ return true;
+ }
+ if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+ TryCand, Cand, GenericSchedulerBase::BotPathReduce))
+ return true;
+ }
+ return false;
+}
+
+static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand,
+ bool IsTop) {
+ DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
+ << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n');
+}
+
+void GenericScheduler::initialize(ScheduleDAGMI *dag) {
+ assert(dag->hasVRegLiveness() &&
+ "(PreRA)GenericScheduler needs vreg liveness");
+ DAG = static_cast<ScheduleDAGMILive*>(dag);
+ SchedModel = DAG->getSchedModel();
+ TRI = DAG->TRI;
+
+ Rem.init(DAG, SchedModel);
+ Top.init(DAG, SchedModel, &Rem);
+ Bot.init(DAG, SchedModel, &Rem);
+
+ // Initialize resource counts.
+
+ // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
+ // are disabled, then these HazardRecs will be disabled.
+ const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
+ if (!Top.HazardRec) {
+ Top.HazardRec =
+ DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
+ Itin, DAG);
+ }
+ if (!Bot.HazardRec) {
+ Bot.HazardRec =
+ DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
+ Itin, DAG);
+ }
+}
+
+/// Initialize the per-region scheduling policy.
+void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned NumRegionInstrs) {
+ const MachineFunction &MF = *Begin->getParent()->getParent();
+ const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
+
+ // Avoid setting up the register pressure tracker for small regions to save
+ // compile time. As a rough heuristic, only track pressure when the number of
+ // schedulable instructions exceeds half the integer register file.
+ RegionPolicy.ShouldTrackPressure = true;
+ for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
+ MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
+ if (TLI->isTypeLegal(LegalIntVT)) {
+ unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
+ TLI->getRegClassFor(LegalIntVT));
+ RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
+ }
+ }
+
+ // For generic targets, we default to bottom-up, because it's simpler and more
+ // compile-time optimizations have been implemented in that direction.
+ RegionPolicy.OnlyBottomUp = true;
+
+ // Allow the subtarget to override default policy.
+ MF.getSubtarget().overrideSchedPolicy(RegionPolicy, Begin, End,
+ NumRegionInstrs);
+
+ // After subtarget overrides, apply command line options.
+ if (!EnableRegPressure)
+ RegionPolicy.ShouldTrackPressure = false;
+
+ // Check -misched-topdown/bottomup can force or unforce scheduling direction.
+ // e.g. -misched-bottomup=false allows scheduling in both directions.
+ assert((!ForceTopDown || !ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+ if (ForceBottomUp.getNumOccurrences() > 0) {
+ RegionPolicy.OnlyBottomUp = ForceBottomUp;
+ if (RegionPolicy.OnlyBottomUp)
+ RegionPolicy.OnlyTopDown = false;
+ }
+ if (ForceTopDown.getNumOccurrences() > 0) {
+ RegionPolicy.OnlyTopDown = ForceTopDown;
+ if (RegionPolicy.OnlyTopDown)
+ RegionPolicy.OnlyBottomUp = false;
+ }
+}
+
+/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
+/// critical path by more cycles than it takes to drain the instruction buffer.
+/// We estimate an upper bounds on in-flight instructions as:
+///
+/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
+/// InFlightIterations = AcyclicPath / CyclesPerIteration
+/// InFlightResources = InFlightIterations * LoopResources
+///
+/// TODO: Check execution resources in addition to IssueCount.
+void GenericScheduler::checkAcyclicLatency() {
+ if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
+ return;
+
+ // Scaled number of cycles per loop iteration.
+ unsigned IterCount =
+ std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
+ Rem.RemIssueCount);
+ // Scaled acyclic critical path.
+ unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
+ // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
+ unsigned InFlightCount =
+ (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
+ unsigned BufferLimit =
+ SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
+
+ Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
+
+ DEBUG(dbgs() << "IssueCycles="
+ << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
+ << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
+ << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
+ << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
+ << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
+ if (Rem.IsAcyclicLatencyLimited)
+ dbgs() << " ACYCLIC LATENCY LIMIT\n");
+}
+
+void GenericScheduler::registerRoots() {
+ Rem.CriticalPath = DAG->ExitSU.getDepth();
+
+ // Some roots may not feed into ExitSU. Check all of them in case.
+ for (std::vector<SUnit*>::const_iterator
+ I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
+ if ((*I)->getDepth() > Rem.CriticalPath)
+ Rem.CriticalPath = (*I)->getDepth();
+ }
+ DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
+ if (DumpCriticalPathLength) {
+ errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
+ }
+
+ if (EnableCyclicPath) {
+ Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
+ checkAcyclicLatency();
+ }
+}
+
+static bool tryPressure(const PressureChange &TryP,
+ const PressureChange &CandP,
+ GenericSchedulerBase::SchedCandidate &TryCand,
+ GenericSchedulerBase::SchedCandidate &Cand,
+ GenericSchedulerBase::CandReason Reason) {
+ int TryRank = TryP.getPSetOrMax();
+ int CandRank = CandP.getPSetOrMax();
+ // If both candidates affect the same set, go with the smallest increase.
+ if (TryRank == CandRank) {
+ return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
+ Reason);
+ }
+ // If one candidate decreases and the other increases, go with it.
+ // Invalid candidates have UnitInc==0.
+ if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
+ Reason)) {
+ return true;
+ }
+ // If the candidates are decreasing pressure, reverse priority.
+ if (TryP.getUnitInc() < 0)
+ std::swap(TryRank, CandRank);
+ return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
+}
+
+static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
+ return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
+}
+
+/// Minimize physical register live ranges. Regalloc wants them adjacent to
+/// their physreg def/use.
+///
+/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
+/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
+/// with the operation that produces or consumes the physreg. We'll do this when
+/// regalloc has support for parallel copies.
+static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
+ const MachineInstr *MI = SU->getInstr();
+ if (!MI->isCopy())
+ return 0;
+
+ unsigned ScheduledOper = isTop ? 1 : 0;
+ unsigned UnscheduledOper = isTop ? 0 : 1;
+ // If we have already scheduled the physreg produce/consumer, immediately
+ // schedule the copy.
+ if (TargetRegisterInfo::isPhysicalRegister(
+ MI->getOperand(ScheduledOper).getReg()))
+ return 1;
+ // If the physreg is at the boundary, defer it. Otherwise schedule it
+ // immediately to free the dependent. We can hoist the copy later.
+ bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
+ if (TargetRegisterInfo::isPhysicalRegister(
+ MI->getOperand(UnscheduledOper).getReg()))
+ return AtBoundary ? -1 : 1;
+ return 0;
+}
+
+/// Apply a set of heursitics to a new candidate. Heuristics are currently
+/// hierarchical. This may be more efficient than a graduated cost model because
+/// we don't need to evaluate all aspects of the model for each node in the
+/// queue. But it's really done to make the heuristics easier to debug and
+/// statistically analyze.
+///
+/// \param Cand provides the policy and current best candidate.
+/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
+/// \param Zone describes the scheduled zone that we are extending.
+/// \param RPTracker describes reg pressure within the scheduled zone.
+/// \param TempTracker is a scratch pressure tracker to reuse in queries.
+void GenericScheduler::tryCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand,
+ SchedBoundary &Zone,
+ const RegPressureTracker &RPTracker,
+ RegPressureTracker &TempTracker) {
+
+ if (DAG->isTrackingPressure()) {
+ // Always initialize TryCand's RPDelta.
+ if (Zone.isTop()) {
+ TempTracker.getMaxDownwardPressureDelta(
+ TryCand.SU->getInstr(),
+ TryCand.RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+ }
+ else {
+ if (VerifyScheduling) {
+ TempTracker.getMaxUpwardPressureDelta(
+ TryCand.SU->getInstr(),
+ &DAG->getPressureDiff(TryCand.SU),
+ TryCand.RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+ }
+ else {
+ RPTracker.getUpwardPressureDelta(
+ TryCand.SU->getInstr(),
+ DAG->getPressureDiff(TryCand.SU),
+ TryCand.RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+ }
+ }
+ }
+ DEBUG(if (TryCand.RPDelta.Excess.isValid())
+ dbgs() << " SU(" << TryCand.SU->NodeNum << ") "
+ << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet())
+ << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n");
+
+ // Initialize the candidate if needed.
+ if (!Cand.isValid()) {
+ TryCand.Reason = NodeOrder;
+ return;
+ }
+
+ if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
+ biasPhysRegCopy(Cand.SU, Zone.isTop()),
+ TryCand, Cand, PhysRegCopy))
+ return;
+
+ // Avoid exceeding the target's limit.
+ if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
+ Cand.RPDelta.Excess,
+ TryCand, Cand, RegExcess))
+ return;
+
+ // Avoid increasing the max critical pressure in the scheduled region.
+ if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
+ Cand.RPDelta.CriticalMax,
+ TryCand, Cand, RegCritical))
+ return;
+
+ // For loops that are acyclic path limited, aggressively schedule for latency.
+ // This can result in very long dependence chains scheduled in sequence, so
+ // once every cycle (when CurrMOps == 0), switch to normal heuristics.
+ if (Rem.IsAcyclicLatencyLimited && !Zone.getCurrMOps()
+ && tryLatency(TryCand, Cand, Zone))
+ return;
+
+ // Prioritize instructions that read unbuffered resources by stall cycles.
+ if (tryLess(Zone.getLatencyStallCycles(TryCand.SU),
+ Zone.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
+ return;
+
+ // Keep clustered nodes together to encourage downstream peephole
+ // optimizations which may reduce resource requirements.
+ //
+ // This is a best effort to set things up for a post-RA pass. Optimizations
+ // like generating loads of multiple registers should ideally be done within
+ // the scheduler pass by combining the loads during DAG postprocessing.
+ const SUnit *NextClusterSU =
+ Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+ if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
+ TryCand, Cand, Cluster))
+ return;
+
+ // Weak edges are for clustering and other constraints.
+ if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
+ getWeakLeft(Cand.SU, Zone.isTop()),
+ TryCand, Cand, Weak)) {
+ return;
+ }
+ // Avoid increasing the max pressure of the entire region.
+ if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
+ Cand.RPDelta.CurrentMax,
+ TryCand, Cand, RegMax))
+ return;
+
+ // Avoid critical resource consumption and balance the schedule.
+ TryCand.initResourceDelta(DAG, SchedModel);
+ if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+ TryCand, Cand, ResourceReduce))
+ return;
+ if (tryGreater(TryCand.ResDelta.DemandedResources,
+ Cand.ResDelta.DemandedResources,
+ TryCand, Cand, ResourceDemand))
+ return;
+
+ // Avoid serializing long latency dependence chains.
+ // For acyclic path limited loops, latency was already checked above.
+ if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited
+ && tryLatency(TryCand, Cand, Zone)) {
+ return;
+ }
+
+ // Prefer immediate defs/users of the last scheduled instruction. This is a
+ // local pressure avoidance strategy that also makes the machine code
+ // readable.
+ if (tryGreater(Zone.isNextSU(TryCand.SU), Zone.isNextSU(Cand.SU),
+ TryCand, Cand, NextDefUse))
+ return;
+
+ // Fall through to original instruction order.
+ if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
+ || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
+ TryCand.Reason = NodeOrder;
+ }
+}
+
+/// Pick the best candidate from the queue.