+bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
+ unsigned TargetFlags = get(Opcode).TSFlags;
+
+ return ((TargetFlags & R600_InstFlag::LDS_1A) |
+ (TargetFlags & R600_InstFlag::LDS_1A1D) |
+ (TargetFlags & R600_InstFlag::LDS_1A2D));
+}
+
+bool R600InstrInfo::isLDSNoRetInstr(unsigned Opcode) const {
+ return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) == -1;
+}
+
+bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const {
+ return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1;
+}
+
+bool R600InstrInfo::canBeConsideredALU(const MachineInstr *MI) const {
+ if (isALUInstr(MI->getOpcode()))
+ return true;
+ if (isVector(*MI) || isCubeOp(MI->getOpcode()))
+ return true;
+ switch (MI->getOpcode()) {
+ case AMDGPU::PRED_X:
+ case AMDGPU::INTERP_PAIR_XY:
+ case AMDGPU::INTERP_PAIR_ZW:
+ case AMDGPU::INTERP_VEC_LOAD:
+ case AMDGPU::COPY:
+ case AMDGPU::DOT_4:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
+ if (ST.hasCaymanISA())
+ return false;
+ return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU);
+}
+
+bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const {
+ return isTransOnly(MI->getOpcode());
+}
+
+bool R600InstrInfo::isVectorOnly(unsigned Opcode) const {
+ return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU);
+}
+
+bool R600InstrInfo::isVectorOnly(const MachineInstr *MI) const {
+ return isVectorOnly(MI->getOpcode());
+}
+
+bool R600InstrInfo::isExport(unsigned Opcode) const {
+ return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT);
+}
+
+bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
+ return ST.hasVertexCache() && IS_VTX(get(Opcode));
+}
+
+bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const {
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
+ return MFI->getShaderType() != ShaderType::COMPUTE &&
+ usesVertexCache(MI->getOpcode());
+}
+
+bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
+ return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode));
+}
+
+bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
+ return (MFI->getShaderType() == ShaderType::COMPUTE &&
+ usesVertexCache(MI->getOpcode())) ||
+ usesTextureCache(MI->getOpcode());
+}
+
+bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
+ switch (Opcode) {
+ case AMDGPU::KILLGT:
+ case AMDGPU::GROUP_BARRIER:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const {
+ return MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
+}
+
+bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const {
+ return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
+}
+
+bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const {
+ if (!isALUInstr(MI->getOpcode())) {
+ return false;
+ }
+ for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
+ E = MI->operands_end(); I != E; ++I) {
+ if (!I->isReg() || !I->isUse() ||
+ TargetRegisterInfo::isVirtualRegister(I->getReg()))
+ continue;
+
+ if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
+ return true;
+ }
+ return false;
+}
+
+int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
+ static const unsigned OpTable[] = {
+ AMDGPU::OpName::src0,
+ AMDGPU::OpName::src1,
+ AMDGPU::OpName::src2
+ };
+
+ assert (SrcNum < 3);
+ return getOperandIdx(Opcode, OpTable[SrcNum]);
+}
+
+#define SRC_SEL_ROWS 11
+int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
+ static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = {
+ {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
+ {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
+ {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
+ {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
+ {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
+ {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
+ {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
+ {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
+ {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
+ {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
+ {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
+ };
+
+ for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) {
+ if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) {
+ return getOperandIdx(Opcode, SrcSelTable[i][1]);
+ }
+ }
+ return -1;
+}
+#undef SRC_SEL_ROWS
+
+SmallVector<std::pair<MachineOperand *, int64_t>, 3>
+R600InstrInfo::getSrcs(MachineInstr *MI) const {
+ SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
+
+ if (MI->getOpcode() == AMDGPU::DOT_4) {
+ static const unsigned OpTable[8][2] = {
+ {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
+ {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
+ {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
+ {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
+ {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
+ {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
+ {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
+ {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W},
+ };
+
+ for (unsigned j = 0; j < 8; j++) {
+ MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
+ OpTable[j][0]));
+ unsigned Reg = MO.getReg();
+ if (Reg == AMDGPU::ALU_CONST) {
+ unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(),
+ OpTable[j][1])).getImm();
+ Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
+ continue;
+ }
+
+ }
+ return Result;
+ }
+
+ static const unsigned OpTable[3][2] = {
+ {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
+ {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
+ {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
+ };
+
+ for (unsigned j = 0; j < 3; j++) {
+ int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
+ if (SrcIdx < 0)
+ break;
+ MachineOperand &MO = MI->getOperand(SrcIdx);
+ unsigned Reg = MI->getOperand(SrcIdx).getReg();
+ if (Reg == AMDGPU::ALU_CONST) {
+ unsigned Sel = MI->getOperand(
+ getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
+ Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
+ continue;
+ }
+ if (Reg == AMDGPU::ALU_LITERAL_X) {
+ unsigned Imm = MI->getOperand(
+ getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm();
+ Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm));
+ continue;
+ }
+ Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0));
+ }
+ return Result;
+}
+
+std::vector<std::pair<int, unsigned> >
+R600InstrInfo::ExtractSrcs(MachineInstr *MI,
+ const DenseMap<unsigned, unsigned> &PV,
+ unsigned &ConstCount) const {
+ ConstCount = 0;
+ const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI);
+ const std::pair<int, unsigned> DummyPair(-1, 0);
+ std::vector<std::pair<int, unsigned> > Result;
+ unsigned i = 0;
+ for (unsigned n = Srcs.size(); i < n; ++i) {
+ unsigned Reg = Srcs[i].first->getReg();
+ unsigned Index = RI.getEncodingValue(Reg) & 0xff;
+ if (Reg == AMDGPU::OQAP) {
+ Result.push_back(std::pair<int, unsigned>(Index, 0));
+ }
+ if (PV.find(Reg) != PV.end()) {
+ // 255 is used to tells its a PS/PV reg
+ Result.push_back(std::pair<int, unsigned>(255, 0));
+ continue;
+ }
+ if (Index > 127) {
+ ConstCount++;
+ Result.push_back(DummyPair);
+ continue;
+ }
+ unsigned Chan = RI.getHWRegChan(Reg);
+ Result.push_back(std::pair<int, unsigned>(Index, Chan));
+ }
+ for (; i < 3; ++i)
+ Result.push_back(DummyPair);
+ return Result;
+}
+
+static std::vector<std::pair<int, unsigned> >
+Swizzle(std::vector<std::pair<int, unsigned> > Src,
+ R600InstrInfo::BankSwizzle Swz) {
+ if (Src[0] == Src[1])
+ Src[1].first = -1;
+ switch (Swz) {
+ case R600InstrInfo::ALU_VEC_012_SCL_210:
+ break;
+ case R600InstrInfo::ALU_VEC_021_SCL_122:
+ std::swap(Src[1], Src[2]);
+ break;
+ case R600InstrInfo::ALU_VEC_102_SCL_221:
+ std::swap(Src[0], Src[1]);
+ break;
+ case R600InstrInfo::ALU_VEC_120_SCL_212:
+ std::swap(Src[0], Src[1]);
+ std::swap(Src[0], Src[2]);
+ break;
+ case R600InstrInfo::ALU_VEC_201:
+ std::swap(Src[0], Src[2]);
+ std::swap(Src[0], Src[1]);
+ break;
+ case R600InstrInfo::ALU_VEC_210:
+ std::swap(Src[0], Src[2]);
+ break;
+ }
+ return Src;
+}
+
+static unsigned
+getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
+ switch (Swz) {
+ case R600InstrInfo::ALU_VEC_012_SCL_210: {
+ unsigned Cycles[3] = { 2, 1, 0};
+ return Cycles[Op];
+ }
+ case R600InstrInfo::ALU_VEC_021_SCL_122: {
+ unsigned Cycles[3] = { 1, 2, 2};
+ return Cycles[Op];
+ }
+ case R600InstrInfo::ALU_VEC_120_SCL_212: {
+ unsigned Cycles[3] = { 2, 1, 2};
+ return Cycles[Op];
+ }
+ case R600InstrInfo::ALU_VEC_102_SCL_221: {
+ unsigned Cycles[3] = { 2, 2, 1};
+ return Cycles[Op];
+ }
+ default:
+ llvm_unreachable("Wrong Swizzle for Trans Slot");
+ return 0;
+ }
+}
+
+/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
+/// in the same Instruction Group while meeting read port limitations given a
+/// Swz swizzle sequence.
+unsigned R600InstrInfo::isLegalUpTo(
+ const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
+ const std::vector<R600InstrInfo::BankSwizzle> &Swz,
+ const std::vector<std::pair<int, unsigned> > &TransSrcs,
+ R600InstrInfo::BankSwizzle TransSwz) const {
+ int Vector[4][3];
+ memset(Vector, -1, sizeof(Vector));
+ for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
+ const std::vector<std::pair<int, unsigned> > &Srcs =
+ Swizzle(IGSrcs[i], Swz[i]);
+ for (unsigned j = 0; j < 3; j++) {
+ const std::pair<int, unsigned> &Src = Srcs[j];
+ if (Src.first < 0 || Src.first == 255)
+ continue;
+ if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
+ if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
+ Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {
+ // The value from output queue A (denoted by register OQAP) can
+ // only be fetched during the first cycle.
+ return false;
+ }
+ // OQAP does not count towards the normal read port restrictions
+ continue;
+ }
+ if (Vector[Src.second][j] < 0)
+ Vector[Src.second][j] = Src.first;
+ if (Vector[Src.second][j] != Src.first)
+ return i;
+ }
+ }
+ // Now check Trans Alu
+ for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
+ const std::pair<int, unsigned> &Src = TransSrcs[i];
+ unsigned Cycle = getTransSwizzle(TransSwz, i);
+ if (Src.first < 0)
+ continue;
+ if (Src.first == 255)
+ continue;
+ if (Vector[Src.second][Cycle] < 0)
+ Vector[Src.second][Cycle] = Src.first;
+ if (Vector[Src.second][Cycle] != Src.first)
+ return IGSrcs.size() - 1;
+ }
+ return IGSrcs.size();
+}
+
+/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
+/// (in lexicographic term) swizzle sequence assuming that all swizzles after
+/// Idx can be skipped
+static bool
+NextPossibleSolution(
+ std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
+ unsigned Idx) {
+ assert(Idx < SwzCandidate.size());
+ int ResetIdx = Idx;
+ while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)
+ ResetIdx --;
+ for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {
+ SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
+ }
+ if (ResetIdx == -1)
+ return false;
+ int NextSwizzle = SwzCandidate[ResetIdx] + 1;
+ SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle;
+ return true;
+}
+
+/// Enumerate all possible Swizzle sequence to find one that can meet all
+/// read port requirements.
+bool R600InstrInfo::FindSwizzleForVectorSlot(
+ const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
+ std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
+ const std::vector<std::pair<int, unsigned> > &TransSrcs,
+ R600InstrInfo::BankSwizzle TransSwz) const {
+ unsigned ValidUpTo = 0;
+ do {
+ ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
+ if (ValidUpTo == IGSrcs.size())
+ return true;
+ } while (NextPossibleSolution(SwzCandidate, ValidUpTo));
+ return false;
+}
+
+/// Instructions in Trans slot can't read gpr at cycle 0 if they also read
+/// a const, and can't read a gpr at cycle 1 if they read 2 const.
+static bool
+isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
+ const std::vector<std::pair<int, unsigned> > &TransOps,
+ unsigned ConstCount) {
+ // TransALU can't read 3 constants
+ if (ConstCount > 2)
+ return false;
+ for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
+ const std::pair<int, unsigned> &Src = TransOps[i];
+ unsigned Cycle = getTransSwizzle(TransSwz, i);
+ if (Src.first < 0)
+ continue;
+ if (ConstCount > 0 && Cycle == 0)
+ return false;
+ if (ConstCount > 1 && Cycle == 1)
+ return false;
+ }
+ return true;
+}
+
+bool
+R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
+ const DenseMap<unsigned, unsigned> &PV,
+ std::vector<BankSwizzle> &ValidSwizzle,
+ bool isLastAluTrans)
+ const {
+ //Todo : support shared src0 - src1 operand
+
+ std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
+ ValidSwizzle.clear();
+ unsigned ConstCount;
+ BankSwizzle TransBS = ALU_VEC_012_SCL_210;
+ for (unsigned i = 0, e = IG.size(); i < e; ++i) {
+ IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount));
+ unsigned Op = getOperandIdx(IG[i]->getOpcode(),
+ AMDGPU::OpName::bank_swizzle);
+ ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
+ IG[i]->getOperand(Op).getImm());
+ }
+ std::vector<std::pair<int, unsigned> > TransOps;
+ if (!isLastAluTrans)
+ return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
+
+ TransOps = std::move(IGSrcs.back());
+ IGSrcs.pop_back();
+ ValidSwizzle.pop_back();
+
+ static const R600InstrInfo::BankSwizzle TransSwz[] = {
+ ALU_VEC_012_SCL_210,
+ ALU_VEC_021_SCL_122,
+ ALU_VEC_120_SCL_212,
+ ALU_VEC_102_SCL_221
+ };
+ for (unsigned i = 0; i < 4; i++) {
+ TransBS = TransSwz[i];
+ if (!isConstCompatible(TransBS, TransOps, ConstCount))
+ continue;
+ bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
+ TransBS);
+ if (Result) {
+ ValidSwizzle.push_back(TransBS);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+bool
+R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
+ const {
+ assert (Consts.size() <= 12 && "Too many operands in instructions group");
+ unsigned Pair1 = 0, Pair2 = 0;
+ for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
+ unsigned ReadConstHalf = Consts[i] & 2;
+ unsigned ReadConstIndex = Consts[i] & (~3);
+ unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
+ if (!Pair1) {
+ Pair1 = ReadHalfConst;
+ continue;
+ }
+ if (Pair1 == ReadHalfConst)
+ continue;
+ if (!Pair2) {
+ Pair2 = ReadHalfConst;
+ continue;
+ }
+ if (Pair2 != ReadHalfConst)
+ return false;
+ }
+ return true;
+}
+
+bool
+R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
+ const {
+ std::vector<unsigned> Consts;
+ SmallSet<int64_t, 4> Literals;
+ for (unsigned i = 0, n = MIs.size(); i < n; i++) {
+ MachineInstr *MI = MIs[i];
+ if (!isALUInstr(MI->getOpcode()))
+ continue;
+
+ const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Srcs =
+ getSrcs(MI);
+
+ for (unsigned j = 0, e = Srcs.size(); j < e; j++) {
+ std::pair<MachineOperand *, unsigned> Src = Srcs[j];
+ if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X)
+ Literals.insert(Src.second);
+ if (Literals.size() > 4)
+ return false;
+ if (Src.first->getReg() == AMDGPU::ALU_CONST)
+ Consts.push_back(Src.second);
+ if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
+ AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
+ unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
+ unsigned Chan = RI.getHWRegChan(Src.first->getReg());
+ Consts.push_back((Index << 2) | Chan);
+ }
+ }
+ }
+ return fitsConstReadLimitations(Consts);
+}
+
+DFAPacketizer *
+R600InstrInfo::CreateTargetScheduleState(const TargetSubtargetInfo &STI) const {
+ const InstrItineraryData *II = STI.getInstrItineraryData();
+ return static_cast<const AMDGPUSubtarget &>(STI).createDFAPacketizer(II);