bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
switch(Opcode) {
default: return false;
- case AMDGPU::DOT4_r600_pseudo:
- case AMDGPU::DOT4_eg_pseudo:
- return true;
}
}
usesTextureCache(MI->getOpcode());
}
+SmallVector<std::pair<MachineOperand *, int64_t>, 3>
+R600InstrInfo::getSrcs(MachineInstr *MI) const {
+ SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
+
+ static const R600Operands::Ops OpTable[3][2] = {
+ {R600Operands::SRC0, R600Operands::SRC0_SEL},
+ {R600Operands::SRC1, R600Operands::SRC1_SEL},
+ {R600Operands::SRC2, R600Operands::SRC2_SEL},
+ };
+
+ for (unsigned j = 0; j < 3; j++) {
+ int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
+ if (SrcIdx < 0)
+ break;
+ MachineOperand &MO = MI->getOperand(SrcIdx);
+ unsigned Reg = MI->getOperand(SrcIdx).getReg();
+ if (Reg == AMDGPU::ALU_CONST) {
+ unsigned Sel = MI->getOperand(
+ getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
+ Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
+ continue;
+ }
+ if (Reg == AMDGPU::ALU_LITERAL_X) {
+ unsigned Imm = MI->getOperand(
+ getOperandIdx(MI->getOpcode(), R600Operands::IMM)).getImm();
+ Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm));
+ continue;
+ }
+ Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0));
+ }
+ return Result;
+}
+
+std::vector<std::pair<int, unsigned> >
+R600InstrInfo::ExtractSrcs(MachineInstr *MI,
+ const DenseMap<unsigned, unsigned> &PV)
+ const {
+ const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI);
+ const std::pair<int, unsigned> DummyPair(-1, 0);
+ std::vector<std::pair<int, unsigned> > Result;
+ unsigned i = 0;
+ for (unsigned n = Srcs.size(); i < n; ++i) {
+ unsigned Reg = Srcs[i].first->getReg();
+ unsigned Index = RI.getEncodingValue(Reg) & 0xff;
+ unsigned Chan = RI.getHWRegChan(Reg);
+ if (Index > 127) {
+ Result.push_back(DummyPair);
+ continue;
+ }
+ if (PV.find(Index) != PV.end()) {
+ Result.push_back(DummyPair);
+ continue;
+ }
+ Result.push_back(std::pair<int, unsigned>(Index, Chan));
+ }
+ for (; i < 3; ++i)
+ Result.push_back(DummyPair);
+ return Result;
+}
+
+static std::vector<std::pair<int, unsigned> >
+Swizzle(std::vector<std::pair<int, unsigned> > Src,
+ R600InstrInfo::BankSwizzle Swz) {
+ switch (Swz) {
+ case R600InstrInfo::ALU_VEC_012:
+ break;
+ case R600InstrInfo::ALU_VEC_021:
+ std::swap(Src[1], Src[2]);
+ break;
+ case R600InstrInfo::ALU_VEC_102:
+ std::swap(Src[0], Src[1]);
+ break;
+ case R600InstrInfo::ALU_VEC_120:
+ std::swap(Src[0], Src[1]);
+ std::swap(Src[0], Src[2]);
+ break;
+ case R600InstrInfo::ALU_VEC_201:
+ std::swap(Src[0], Src[2]);
+ std::swap(Src[0], Src[1]);
+ break;
+ case R600InstrInfo::ALU_VEC_210:
+ std::swap(Src[0], Src[2]);
+ break;
+ }
+ return Src;
+}
+
+static bool
+isLegal(const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
+ const std::vector<R600InstrInfo::BankSwizzle> &Swz,
+ unsigned CheckedSize) {
+ int Vector[4][3];
+ memset(Vector, -1, sizeof(Vector));
+ for (unsigned i = 0; i < CheckedSize; i++) {
+ const std::vector<std::pair<int, unsigned> > &Srcs =
+ Swizzle(IGSrcs[i], Swz[i]);
+ for (unsigned j = 0; j < 3; j++) {
+ const std::pair<int, unsigned> &Src = Srcs[j];
+ if (Src.first < 0)
+ continue;
+ if (Vector[Src.second][j] < 0)
+ Vector[Src.second][j] = Src.first;
+ if (Vector[Src.second][j] != Src.first)
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool recursiveFitsFPLimitation(
+const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
+std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
+unsigned Depth = 0) {
+ if (!isLegal(IGSrcs, SwzCandidate, Depth))
+ return false;
+ if (IGSrcs.size() == Depth)
+ return true;
+ unsigned i = SwzCandidate[Depth];
+ for (; i < 6; i++) {
+ SwzCandidate[Depth] = (R600InstrInfo::BankSwizzle) i;
+ if (recursiveFitsFPLimitation(IGSrcs, SwzCandidate, Depth + 1))
+ return true;
+ }
+ SwzCandidate[Depth] = R600InstrInfo::ALU_VEC_012;
+ return false;
+}
+
+bool
+R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
+ const DenseMap<unsigned, unsigned> &PV,
+ std::vector<BankSwizzle> &ValidSwizzle)
+ const {
+ //Todo : support shared src0 - src1 operand
+
+ std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
+ ValidSwizzle.clear();
+ for (unsigned i = 0, e = IG.size(); i < e; ++i) {
+ IGSrcs.push_back(ExtractSrcs(IG[i], PV));
+ unsigned Op = getOperandIdx(IG[i]->getOpcode(),
+ R600Operands::BANK_SWIZZLE);
+ ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
+ IG[i]->getOperand(Op).getImm());
+ }
+ bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle);
+ if (!Result)
+ return false;
+ return true;
+}
+
+
bool
R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
const {
R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
std::vector<unsigned> Consts;
for (unsigned i = 0, n = MIs.size(); i < n; i++) {
- const MachineInstr *MI = MIs[i];
-
- const R600Operands::Ops OpTable[3][2] = {
- {R600Operands::SRC0, R600Operands::SRC0_SEL},
- {R600Operands::SRC1, R600Operands::SRC1_SEL},
- {R600Operands::SRC2, R600Operands::SRC2_SEL},
- };
-
+ MachineInstr *MI = MIs[i];
if (!isALUInstr(MI->getOpcode()))
continue;
- for (unsigned j = 0; j < 3; j++) {
- int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
- if (SrcIdx < 0)
- break;
- unsigned Reg = MI->getOperand(SrcIdx).getReg();
- if (Reg == AMDGPU::ALU_CONST) {
- unsigned Const = MI->getOperand(
- getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
- Consts.push_back(Const);
- continue;
- }
- if (AMDGPU::R600_KC0RegClass.contains(Reg) ||
- AMDGPU::R600_KC1RegClass.contains(Reg)) {
- unsigned Index = RI.getEncodingValue(Reg) & 0xff;
- unsigned Chan = RI.getHWRegChan(Reg);
+ const SmallVector<std::pair<MachineOperand *, int64_t>, 3> &Srcs =
+ getSrcs(MI);
+
+ for (unsigned j = 0, e = Srcs.size(); j < e; j++) {
+ std::pair<MachineOperand *, unsigned> Src = Srcs[j];
+ if (Src.first->getReg() == AMDGPU::ALU_CONST)
+ Consts.push_back(Src.second);
+ if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
+ AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
+ unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
+ unsigned Chan = RI.getHWRegChan(Src.first->getReg());
Consts.push_back((Index << 2) | Chan);
- continue;
}
}
}
return MIB;
}
+#define OPERAND_CASE(Label) \
+ case Label: { \
+ static const R600Operands::VecOps Ops[] = \
+ { \
+ Label##_X, \
+ Label##_Y, \
+ Label##_Z, \
+ Label##_W \
+ }; \
+ return Ops[Slot]; \
+ }
+
+static R600Operands::VecOps
+getSlotedOps(R600Operands::Ops Op, unsigned Slot) {
+ switch (Op) {
+ OPERAND_CASE(R600Operands::UPDATE_EXEC_MASK)
+ OPERAND_CASE(R600Operands::UPDATE_PREDICATE)
+ OPERAND_CASE(R600Operands::WRITE)
+ OPERAND_CASE(R600Operands::OMOD)
+ OPERAND_CASE(R600Operands::DST_REL)
+ OPERAND_CASE(R600Operands::CLAMP)
+ OPERAND_CASE(R600Operands::SRC0)
+ OPERAND_CASE(R600Operands::SRC0_NEG)
+ OPERAND_CASE(R600Operands::SRC0_REL)
+ OPERAND_CASE(R600Operands::SRC0_ABS)
+ OPERAND_CASE(R600Operands::SRC0_SEL)
+ OPERAND_CASE(R600Operands::SRC1)
+ OPERAND_CASE(R600Operands::SRC1_NEG)
+ OPERAND_CASE(R600Operands::SRC1_REL)
+ OPERAND_CASE(R600Operands::SRC1_ABS)
+ OPERAND_CASE(R600Operands::SRC1_SEL)
+ OPERAND_CASE(R600Operands::PRED_SEL)
+ default:
+ llvm_unreachable("Wrong Operand");
+ }
+}
+
+#undef OPERAND_CASE
+
+static int
+getVecOperandIdx(R600Operands::VecOps Op) {
+ return 1 + Op;
+}
+
+
+MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
+ MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
+ const {
+ assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
+ unsigned Opcode;
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
+ Opcode = AMDGPU::DOT4_r600;
+ else
+ Opcode = AMDGPU::DOT4_eg;
+ MachineBasicBlock::iterator I = MI;
+ MachineOperand &Src0 = MI->getOperand(
+ getVecOperandIdx(getSlotedOps(R600Operands::SRC0, Slot)));
+ MachineOperand &Src1 = MI->getOperand(
+ getVecOperandIdx(getSlotedOps(R600Operands::SRC1, Slot)));
+ MachineInstr *MIB = buildDefaultInstruction(
+ MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
+ static const R600Operands::Ops Operands[14] = {
+ R600Operands::UPDATE_EXEC_MASK,
+ R600Operands::UPDATE_PREDICATE,
+ R600Operands::WRITE,
+ R600Operands::OMOD,
+ R600Operands::DST_REL,
+ R600Operands::CLAMP,
+ R600Operands::SRC0_NEG,
+ R600Operands::SRC0_REL,
+ R600Operands::SRC0_ABS,
+ R600Operands::SRC0_SEL,
+ R600Operands::SRC1_NEG,
+ R600Operands::SRC1_REL,
+ R600Operands::SRC1_ABS,
+ R600Operands::SRC1_SEL,
+ };
+
+ for (unsigned i = 0; i < 14; i++) {
+ MachineOperand &MO = MI->getOperand(
+ getVecOperandIdx(getSlotedOps(Operands[i], Slot)));
+ assert (MO.isImm());
+ setImmOperand(MIB, Operands[i], MO.getImm());
+ }
+ MIB->getOperand(20).setImm(0);
+ return MIB;
+}
+
MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
MachineBasicBlock::iterator I,
unsigned DstReg,