1 //===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief R600 Machine Scheduler interface
12 // TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
14 //===----------------------------------------------------------------------===//
16 #define DEBUG_TYPE "misched"
18 #include "R600MachineScheduler.h"
19 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/Pass.h"
22 #include "llvm/PassManager.h"
23 #include "llvm/Support/raw_ostream.h"
27 void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
30 TII = static_cast<const R600InstrInfo*>(DAG->TII);
31 TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
33 CurInstKind = IDOther;
35 OccupedSlotsMask = 15;
36 InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
37 InstKindLimit[IDOther] = 32;
39 const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
40 InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
43 void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
44 std::vector<SUnit *> &QDst)
46 QDst.insert(QDst.end(), QSrc.begin(), QSrc.end());
50 SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
52 NextInstKind = IDOther;
56 // check if we might want to switch current clause type
57 bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
58 (Available[CurInstKind].empty());
59 bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
60 (!Available[IDFetch].empty() || !Available[IDOther].empty());
62 // We want to scheduled AR defs as soon as possible to make sure they aren't
63 // put in a different ALU clause from their uses.
64 if (!SU && !UnscheduledARDefs.empty()) {
65 SU = UnscheduledARDefs[0];
66 UnscheduledARDefs.erase(UnscheduledARDefs.begin());
70 if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
71 (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
74 if (!SU && !PhysicalRegCopy.empty()) {
75 SU = PhysicalRegCopy.front();
76 PhysicalRegCopy.erase(PhysicalRegCopy.begin());
79 if (CurEmitted >= InstKindLimit[IDAlu])
87 SU = pickOther(IDFetch);
89 NextInstKind = IDFetch;
94 SU = pickOther(IDOther);
96 NextInstKind = IDOther;
99 // We want to schedule the AR uses as late as possible to make sure that
100 // the AR defs have been released.
101 if (!SU && !UnscheduledARUses.empty()) {
102 SU = UnscheduledARUses[0];
103 UnscheduledARUses.erase(UnscheduledARUses.begin());
104 NextInstKind = IDAlu;
110 dbgs() << " ** Pick node **\n";
113 dbgs() << "NO NODE \n";
114 for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
115 const SUnit &S = DAG->SUnits[i];
125 bool IsUnScheduled(const SUnit *SU) {
126 return SU->isScheduled;
130 void Filter(std::vector<SUnit *> &List) {
131 List.erase(std::remove_if(List.begin(), List.end(), IsUnScheduled), List.end());
134 void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
136 for (unsigned i = 0; i < AluLast; i++) {
137 Filter(Available[i]);
142 if (NextInstKind != CurInstKind) {
143 DEBUG(dbgs() << "Instruction Type Switch\n");
144 if (NextInstKind != IDAlu)
145 OccupedSlotsMask = 15;
147 CurInstKind = NextInstKind;
150 if (CurInstKind == IDAlu) {
151 switch (getAluKind(SU)) {
159 for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
160 E = SU->getInstr()->operands_end(); It != E; ++It) {
161 MachineOperand &MO = *It;
162 if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
172 DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
174 if (CurInstKind != IDFetch) {
175 MoveUnits(Pending[IDFetch], Available[IDFetch]);
180 isPhysicalRegCopy(MachineInstr *MI) {
181 if (MI->getOpcode() != AMDGPU::COPY)
184 return !TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg());
187 void R600SchedStrategy::releaseTopNode(SUnit *SU) {
188 DEBUG(dbgs() << "Top Releasing ";SU->dump(DAG););
191 void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
192 DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG););
193 if (isPhysicalRegCopy(SU->getInstr())) {
194 PhysicalRegCopy.push_back(SU);
198 int IK = getInstKind(SU);
200 // Check for AR register defines
201 for (MachineInstr::const_mop_iterator I = SU->getInstr()->operands_begin(),
202 E = SU->getInstr()->operands_end();
204 if (I->isReg() && I->getReg() == AMDGPU::AR_X) {
206 UnscheduledARDefs.push_back(SU);
208 UnscheduledARUses.push_back(SU);
214 // There is no export clause, we can schedule one as soon as its ready
216 Available[IDOther].push_back(SU);
218 Pending[IK].push_back(SU);
222 bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
223 const TargetRegisterClass *RC) const {
224 if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
225 return RC->contains(Reg);
227 return MRI->getRegClass(Reg) == RC;
231 R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
232 MachineInstr *MI = SU->getInstr();
234 switch (MI->getOpcode()) {
237 case AMDGPU::INTERP_PAIR_XY:
238 case AMDGPU::INTERP_PAIR_ZW:
239 case AMDGPU::INTERP_VEC_LOAD:
243 if (MI->getOperand(1).isUndef()) {
244 // MI will become a KILL, don't considers it in scheduling
251 // Does the instruction take a whole IG ?
252 if(TII->isVector(*MI) ||
253 TII->isCubeOp(MI->getOpcode()) ||
254 TII->isReductionOp(MI->getOpcode()))
257 // Is the result already assigned to a channel ?
258 unsigned DestSubReg = MI->getOperand(0).getSubReg();
259 switch (DestSubReg) {
272 // Is the result already member of a X/Y/Z/W class ?
273 unsigned DestReg = MI->getOperand(0).getReg();
274 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
275 regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
277 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
279 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
281 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
283 if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
290 int R600SchedStrategy::getInstKind(SUnit* SU) {
291 int Opcode = SU->getInstr()->getOpcode();
293 if (TII->usesTextureCache(Opcode) || TII->usesVertexCache(Opcode))
296 if (TII->isALUInstr(Opcode)) {
303 case AMDGPU::CONST_COPY:
304 case AMDGPU::INTERP_PAIR_XY:
305 case AMDGPU::INTERP_PAIR_ZW:
306 case AMDGPU::INTERP_VEC_LOAD:
314 SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) {
317 for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
320 InstructionsGroupCandidate.push_back(SU->getInstr());
321 if (TII->canBundle(InstructionsGroupCandidate)) {
322 InstructionsGroupCandidate.pop_back();
323 Q.erase((It + 1).base());
326 InstructionsGroupCandidate.pop_back();
332 void R600SchedStrategy::LoadAlu() {
333 std::vector<SUnit *> &QSrc = Pending[IDAlu];
334 for (unsigned i = 0, e = QSrc.size(); i < e; ++i) {
335 AluKind AK = getAluKind(QSrc[i]);
336 AvailableAlus[AK].push_back(QSrc[i]);
341 void R600SchedStrategy::PrepareNextSlot() {
342 DEBUG(dbgs() << "New Slot\n");
343 assert (OccupedSlotsMask && "Slot wasn't filled");
344 OccupedSlotsMask = 0;
345 InstructionsGroupCandidate.clear();
349 void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
350 unsigned DestReg = MI->getOperand(0).getReg();
351 // PressureRegister crashes if an operand is def and used in the same inst
352 // and we try to constraint its regclass
353 for (MachineInstr::mop_iterator It = MI->operands_begin(),
354 E = MI->operands_end(); It != E; ++It) {
355 MachineOperand &MO = *It;
356 if (MO.isReg() && !MO.isDef() &&
357 MO.getReg() == MI->getOperand(0).getReg())
360 // Constrains the regclass of DestReg to assign it to Slot
363 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
366 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
369 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
372 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
377 SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
378 static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
379 SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
382 SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
384 AssignSlot(UnslotedSU->getInstr(), Slot);
388 bool R600SchedStrategy::isAvailablesAluEmpty() const {
389 return Pending[IDAlu].empty() && AvailableAlus[AluAny].empty() &&
390 AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() &&
391 AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() &&
392 AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty() &&
393 AvailableAlus[AluPredX].empty();
396 SUnit* R600SchedStrategy::pickAlu() {
397 while (!isAvailablesAluEmpty()) {
398 if (!OccupedSlotsMask) {
399 // Bottom up scheduling : predX must comes first
400 if (!AvailableAlus[AluPredX].empty()) {
401 OccupedSlotsMask = 15;
402 return PopInst(AvailableAlus[AluPredX]);
404 // Flush physical reg copies (RA will discard them)
405 if (!AvailableAlus[AluDiscarded].empty()) {
406 OccupedSlotsMask = 15;
407 return PopInst(AvailableAlus[AluDiscarded]);
409 // If there is a T_XYZW alu available, use it
410 if (!AvailableAlus[AluT_XYZW].empty()) {
411 OccupedSlotsMask = 15;
412 return PopInst(AvailableAlus[AluT_XYZW]);
415 for (int Chan = 3; Chan > -1; --Chan) {
416 bool isOccupied = OccupedSlotsMask & (1 << Chan);
418 SUnit *SU = AttemptFillSlot(Chan);
420 OccupedSlotsMask |= (1 << Chan);
421 InstructionsGroupCandidate.push_back(SU->getInstr());
431 SUnit* R600SchedStrategy::pickOther(int QID) {
433 std::vector<SUnit *> &AQ = Available[QID];
436 MoveUnits(Pending[QID], AQ);
440 AQ.resize(AQ.size() - 1);