+} // End usesCustomInserter = 1
+
+class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
+ : VTX_WORD0_eg, VTX_READ<name, buffer_id, outs, pattern> {
+
+ // Static fields
+ let VC_INST = 0;
+ let FETCH_TYPE = 2;
+ let FETCH_WHOLE_QUAD = 0;
+ let BUFFER_ID = buffer_id;
+ let SRC_REL = 0;
+ // XXX: We can infer this field based on the SRC_GPR. This would allow us
+ // to store vertex addresses in any channel, not just X.
+ let SRC_SEL_X = 0;
+
+ let Inst{31-0} = Word0;
+}
+
+class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id,
+ (outs R600_TReg32_X:$dst_gpr), pattern> {
+
+ let MEGA_FETCH_COUNT = 1;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 7; // Masked
+ let DST_SEL_Z = 7; // Masked
+ let DST_SEL_W = 7; // Masked
+ let DATA_FORMAT = 1; // FMT_8
+}
+
+class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id,
+ (outs R600_TReg32_X:$dst_gpr), pattern> {
+ let MEGA_FETCH_COUNT = 2;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 7; // Masked
+ let DST_SEL_Z = 7; // Masked
+ let DST_SEL_W = 7; // Masked
+ let DATA_FORMAT = 5; // FMT_16
+
+}
+
+class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id,
+ (outs R600_TReg32_X:$dst_gpr), pattern> {
+
+ let MEGA_FETCH_COUNT = 4;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 7; // Masked
+ let DST_SEL_Z = 7; // Masked
+ let DST_SEL_W = 7; // Masked
+ let DATA_FORMAT = 0xD; // COLOR_32
+
+ // This is not really necessary, but there were some GPU hangs that appeared
+ // to be caused by ALU instructions in the next instruction group that wrote
+ // to the $src_gpr registers of the VTX_READ.
+ // e.g.
+ // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
+ // %T2_X<def> = MOV %ZERO
+ //Adding this constraint prevents this from happening.
+ let Constraints = "$src_gpr.ptr = $dst_gpr";
+}
+
+class VTX_READ_64_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <"VTX_READ_64 $dst_gpr.XY, $src_gpr", buffer_id,
+ (outs R600_Reg64:$dst_gpr), pattern> {
+
+ let MEGA_FETCH_COUNT = 8;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 1;
+ let DST_SEL_Z = 7;
+ let DST_SEL_W = 7;
+ let DATA_FORMAT = 0x1D; // COLOR_32_32
+}
+
+class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
+ : VTX_READ_eg <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id,
+ (outs R600_Reg128:$dst_gpr), pattern> {
+
+ let MEGA_FETCH_COUNT = 16;
+ let DST_SEL_X = 0;
+ let DST_SEL_Y = 1;
+ let DST_SEL_Z = 2;
+ let DST_SEL_W = 3;
+ let DATA_FORMAT = 0x22; // COLOR_32_32_32_32
+
+ // XXX: Need to force VTX_READ_128 instructions to write to the same register
+ // that holds its buffer address to avoid potential hangs. We can't use
+ // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst
+ // registers are different sizes.
+}
+
+//===----------------------------------------------------------------------===//
+// VTX Read from parameter memory space
+//===----------------------------------------------------------------------===//
+
+def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
+ [(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))]
+>;
+
+def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
+ [(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))]
+>;
+
+def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
+ [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
+>;
+
+def VTX_READ_PARAM_64_eg : VTX_READ_64_eg <0,
+ [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
+>;
+
+def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
+ [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
+>;
+
+//===----------------------------------------------------------------------===//
+// VTX Read from global memory space
+//===----------------------------------------------------------------------===//
+
+// 8-bit reads
+def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
+ [(set i32:$dst_gpr, (az_extloadi8_global ADDRVTX_READ:$src_gpr))]
+>;
+
+def VTX_READ_GLOBAL_16_eg : VTX_READ_16_eg <1,
+ [(set i32:$dst_gpr, (az_extloadi16_global ADDRVTX_READ:$src_gpr))]
+>;
+
+// 32-bit reads
+def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
+ [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
+>;
+
+// 64-bit reads
+def VTX_READ_GLOBAL_64_eg : VTX_READ_64_eg <1,
+ [(set v2i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
+>;
+
+// 128-bit reads
+def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
+ [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
+>;
+
+} // End Predicates = [isEG]
+
+//===----------------------------------------------------------------------===//
+// Evergreen / Cayman Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isEGorCayman] in {
+
+ // BFE_UINT - bit_extract, an optimization for mask and shift
+ // Src0 = Input
+ // Src1 = Offset
+ // Src2 = Width
+ //
+ // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width)
+ //
+ // Example Usage:
+ // (Offset, Width)
+ //
+ // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0
+ // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8
+ // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16
+ // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24
+ def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
+ [(set i32:$dst, (int_AMDIL_bit_extract_u32 i32:$src0, i32:$src1,
+ i32:$src2))],
+ VecALU
+ >;
+ def : BFEPattern <BFE_UINT_eg>;
+
+ def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>;
+ defm : BFIPatterns <BFI_INT_eg>;
+
+ def MULADD_UINT24_eg : R600_3OP <0x10, "MULADD_UINT24",
+ [(set i32:$dst, (add (mul U24:$src0, U24:$src1), i32:$src2))], VecALU
+ >;
+ def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>;
+ def : ROTRPattern <BIT_ALIGN_INT_eg>;
+
+ def MULADD_eg : MULADD_Common<0x14>;
+ def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
+ def ASHR_eg : ASHR_Common<0x15>;
+ def LSHR_eg : LSHR_Common<0x16>;
+ def LSHL_eg : LSHL_Common<0x17>;
+ def CNDE_eg : CNDE_Common<0x19>;
+ def CNDGT_eg : CNDGT_Common<0x1A>;
+ def CNDGE_eg : CNDGE_Common<0x1B>;
+ def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
+ def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
+ def MUL_UINT24_eg : R600_2OP <0xB5, "MUL_UINT24",
+ [(set i32:$dst, (mul U24:$src0, U24:$src1))], VecALU
+ >;
+ def DOT4_eg : DOT4_Common<0xBE>;