X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FR600%2FR600Instructions.td;h=a883c839e6200141bea42f1be9d6ba6315552ab3;hb=12af22e8cc217827cf4f118b0f5e4ebbda9925ae;hp=329b32788e6504e84e47b8f8ced723f8c40d124a;hpb=dfef7cbfc6a96d129b99750f554c7dbc000d3228;p=oota-llvm.git diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 329b32788e6..a883c839e62 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// R600 Tablegen instruction definitions +// TableGen definitions for instructions which are available on R600 family +// GPUs. // //===----------------------------------------------------------------------===// @@ -75,7 +76,6 @@ def ADDRDWord : ComplexPattern; def ADDRVTX_READ : ComplexPattern; def ADDRGA_CONST_OFFSET : ComplexPattern; def ADDRGA_VAR_OFFSET : ComplexPattern; -def ADDRIndirect : ComplexPattern; def R600_Pred : PredicateOperand inst, string opName, list pattern, class R600_1OP_Helper inst, string opName, SDPatternOperator node, InstrItinClass itin = AnyALU> : R600_1OP ; // If you add or change the operands for R600_2OP instructions, you must @@ -161,10 +161,10 @@ class R600_2OP inst, string opName, list pattern, } class R600_2OP_Helper inst, string opName, SDPatternOperator node, - InstrItinClass itim = AnyALU> : + InstrItinClass itin = AnyALU> : R600_2OP ; // If you add our change the operands for R600_3OP instructions, you must @@ -216,7 +216,7 @@ class R600_REDUCTION inst, dag ins, string asm, list pattern, def TEX_SHADOW : PatLeaf< (imm), [{uint32_t TType = (uint32_t)N->getZExtValue(); - return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13); + return (TType >= 6 && TType <= 8) || TType == 13; }] >; @@ -336,17 +336,6 @@ def load_param_exti8 : LoadParamFrag; def load_param_exti16 : LoadParamFrag; def isR600 : Predicate<"Subtarget.getGeneration() <= AMDGPUSubtarget::R700">; -def isR700 : Predicate<"Subtarget.getGeneration() == AMDGPUSubtarget::R700">; -def isEG : Predicate< - "Subtarget.getGeneration() >= AMDGPUSubtarget::EVERGREEN && " - "Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS && " - "!Subtarget.hasCaymanISA()">; - -def isCayman : Predicate<"Subtarget.hasCaymanISA()">; -def isEGorCayman : Predicate<"Subtarget.getGeneration() == " - "AMDGPUSubtarget::EVERGREEN" - "|| Subtarget.getGeneration() ==" - "AMDGPUSubtarget::NORTHERN_ISLANDS">; def isR600toCayman : Predicate< "Subtarget.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS">; @@ -418,7 +407,7 @@ def INTERP_VEC_LOAD : AMDGPUShaderInst < (outs R600_Reg128:$dst), (ins i32imm:$src0), "INTERP_LOAD $src0 : $dst", - []>; + [(set R600_Reg128:$dst, (int_R600_interp_const imm:$src0))]>; def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> { let bank_swizzle = 5; @@ -643,6 +632,9 @@ ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG { def CF_ALU : ALU_CLAUSE<8, "ALU">; def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">; def CF_ALU_POP_AFTER : ALU_CLAUSE<10, "ALU_POP_AFTER">; +def CF_ALU_CONTINUE : ALU_CLAUSE<13, "ALU_CONTINUE">; +def CF_ALU_BREAK : ALU_CLAUSE<14, "ALU_BREAK">; +def CF_ALU_ELSE_AFTER : ALU_CLAUSE<15, "ALU_ELSE_AFTER">; def FETCH_CLAUSE : AMDGPUInst <(outs), (ins i32imm:$addr), "Fetch clause starting at $addr:", [] > { @@ -729,7 +721,7 @@ def SETNE_DX10 : R600_2OP < >; def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>; -def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>; +def TRUNC : R600_1OP_Helper <0x11, "TRUNC", ftrunc>; def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>; def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>; def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>; @@ -823,12 +815,12 @@ def CNDE_INT : R600_3OP < def CNDGE_INT : R600_3OP < 0x1E, "CNDGE_INT", - [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GE))] + [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGE))] >; def CNDGT_INT : R600_3OP < 0x1D, "CNDGT_INT", - [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GT))] + [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGT))] >; //===----------------------------------------------------------------------===// @@ -881,6 +873,9 @@ def TEX_SAMPLE_C_L : R600_TEX <0x19, "TEX_SAMPLE_C_L">; def TEX_SAMPLE_LB : R600_TEX <0x12, "TEX_SAMPLE_LB">; def TEX_SAMPLE_C_LB : R600_TEX <0x1A, "TEX_SAMPLE_C_LB">; def TEX_LD : R600_TEX <0x03, "TEX_LD">; +def TEX_LDPTR : R600_TEX <0x03, "TEX_LDPTR"> { + let INST_MOD = 1; +} def TEX_GET_TEXTURE_RESINFO : R600_TEX <0x04, "TEX_GET_TEXTURE_RESINFO">; def TEX_GET_GRADIENTS_H : R600_TEX <0x07, "TEX_GET_GRADIENTS_H">; def TEX_GET_GRADIENTS_V : R600_TEX <0x08, "TEX_GET_GRADIENTS_V">; @@ -899,6 +894,7 @@ defm : TexPattern<6, TEX_LD, v4i32>; defm : TexPattern<7, TEX_GET_TEXTURE_RESINFO, v4i32>; defm : TexPattern<8, TEX_GET_GRADIENTS_H>; defm : TexPattern<9, TEX_GET_GRADIENTS_V>; +defm : TexPattern<10, TEX_LDPTR, v4i32>; //===----------------------------------------------------------------------===// // Helper classes for common instructions @@ -919,6 +915,11 @@ class MULADD_IEEE_Common inst> : R600_3OP < [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))] >; +class FMA_Common inst> : R600_3OP < + inst, "FMA", + [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))] +>; + class CNDE_Common inst> : R600_3OP < inst, "CNDE", [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))] @@ -1072,7 +1073,7 @@ class RECIP_CLAMPED_Common inst> : R600_1OP < } class RECIP_IEEE_Common inst> : R600_1OP < - inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))] + inst, "RECIP_IEEE", [(set f32:$dst, (AMDGPUrcp f32:$src0))] > { let Itinerary = TransALU; } @@ -1083,18 +1084,21 @@ class RECIP_UINT_Common inst> : R600_1OP_Helper < let Itinerary = TransALU; } +// Clamped to maximum. class RECIPSQRT_CLAMPED_Common inst> : R600_1OP_Helper < - inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq + inst, "RECIPSQRT_CLAMPED", AMDGPUrsq_clamped > { let Itinerary = TransALU; } -class RECIPSQRT_IEEE_Common inst> : R600_1OP < - inst, "RECIPSQRT_IEEE", [] +class RECIPSQRT_IEEE_Common inst> : R600_1OP_Helper < + inst, "RECIPSQRT_IEEE", AMDGPUrsq_legacy > { let Itinerary = TransALU; } +// TODO: There is also RECIPSQRT_FF which clamps to zero. + class SIN_Common inst> : R600_1OP < inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{ let Trig = 1; @@ -1107,10 +1111,15 @@ class COS_Common inst> : R600_1OP < let Itinerary = TransALU; } +def CLAMP_R600 : CLAMP ; +def FABS_R600 : FABS; +def FNEG_R600 : FNEG; + //===----------------------------------------------------------------------===// // Helper patterns for complex intrinsics //===----------------------------------------------------------------------===// +// FIXME: Should be predicated on unsafe fp math. multiclass DIV_Common { def : Pat< (int_AMDGPU_div f32:$src0, f32:$src1), @@ -1121,6 +1130,8 @@ def : Pat< (fdiv f32:$src0, f32:$src1), (MUL_IEEE $src0, (recip_ieee $src1)) >; + +def : RcpPat; } class TGSI_LIT_Z_Common @@ -1129,6 +1140,16 @@ class TGSI_LIT_Z_Common ; +// FROUND pattern +class FROUNDPat : Pat < + (AMDGPUround f32:$x), + (CNDGE $x, + (CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x)), + (CNDGT (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x)) + ) +>; + + //===----------------------------------------------------------------------===// // R600 / R700 Instructions //===----------------------------------------------------------------------===// @@ -1170,11 +1191,14 @@ let Predicates = [isR600] in { def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common; def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>; + defm : RsqPat; + + def : FROUNDPat ; def R600_ExportSwz : ExportSwzInst { let Word1{20-17} = 0; // BURST_COUNT let Word1{21} = eop; - let Word1{22} = 1; // VALID_PIXEL_MODE + let Word1{22} = 0; // VALID_PIXEL_MODE let Word1{30-23} = inst; let Word1{31} = 1; // BARRIER } @@ -1183,7 +1207,7 @@ let Predicates = [isR600] in { def R600_ExportBuf : ExportBufInst { let Word1{20-17} = 0; // BURST_COUNT let Word1{21} = eop; - let Word1{22} = 1; // VALID_PIXEL_MODE + let Word1{22} = 0; // VALID_PIXEL_MODE let Word1{30-23} = inst; let Word1{31} = 1; // BARRIER } @@ -1220,6 +1244,10 @@ let Predicates = [isR600] in { "JUMP @$ADDR POP:$POP_COUNT"> { let CNT = 0; } + def CF_PUSH_ELSE_R600 : CF_CLAUSE_R600<12, (ins i32imm:$ADDR), + "PUSH_ELSE @$ADDR"> { + let CNT = 0; + } def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "ELSE @$ADDR POP:$POP_COUNT"> { let CNT = 0; @@ -1242,559 +1270,6 @@ let Predicates = [isR600] in { } -//===----------------------------------------------------------------------===// -// R700 Only instructions -//===----------------------------------------------------------------------===// - -let Predicates = [isR700] in { - def SIN_r700 : SIN_Common<0x6E>; - def COS_r700 : COS_Common<0x6F>; -} - -//===----------------------------------------------------------------------===// -// Evergreen / Cayman store instructions -//===----------------------------------------------------------------------===// - -let Predicates = [isEGorCayman] in { - -class CF_MEM_RAT_CACHELESS rat_inst, bits<4> rat_id, bits<4> mask, dag ins, - string name, list pattern> - : EG_CF_RAT <0x57, rat_inst, rat_id, mask, (outs), ins, - "MEM_RAT_CACHELESS "#name, pattern>; - -class CF_MEM_RAT rat_inst, bits<4> rat_id, dag ins, string name, - list pattern> - : EG_CF_RAT <0x56, rat_inst, rat_id, 0xf /* mask */, (outs), ins, - "MEM_RAT "#name, pattern>; - -def RAT_MSKOR : CF_MEM_RAT <0x11, 0, - (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr), - "MSKOR $rw_gpr.XW, $index_gpr", - [(mskor_global v4i32:$rw_gpr, i32:$index_gpr)] -> { - let eop = 0; -} - -} // End Predicates = [isEGorCayman] - - -//===----------------------------------------------------------------------===// -// Evergreen Only instructions -//===----------------------------------------------------------------------===// - -let Predicates = [isEG] in { - -def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>; -defm DIV_eg : DIV_Common; - -def MULLO_INT_eg : MULLO_INT_Common<0x8F>; -def MULHI_INT_eg : MULHI_INT_Common<0x90>; -def MULLO_UINT_eg : MULLO_UINT_Common<0x91>; -def MULHI_UINT_eg : MULHI_UINT_Common<0x92>; -def RECIP_UINT_eg : RECIP_UINT_Common<0x94>; -def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>; -def EXP_IEEE_eg : EXP_IEEE_Common<0x81>; -def LOG_IEEE_eg : LOG_IEEE_Common<0x83>; -def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; -def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; -def SIN_eg : SIN_Common<0x8D>; -def COS_eg : COS_Common<0x8E>; - -def : POW_Common ; -def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; - -//===----------------------------------------------------------------------===// -// Memory read/write instructions -//===----------------------------------------------------------------------===// - -let usesCustomInserter = 1 in { - -// 32-bit store -def RAT_WRITE_CACHELESS_32_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x1, - (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), - "STORE_RAW $rw_gpr, $index_gpr, $eop", - [(global_store i32:$rw_gpr, i32:$index_gpr)] ->; - -// 64-bit store -def RAT_WRITE_CACHELESS_64_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x3, - (ins R600_Reg64:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), - "STORE_RAW $rw_gpr.XY, $index_gpr, $eop", - [(global_store v2i32:$rw_gpr, i32:$index_gpr)] ->; - -//128-bit store -def RAT_WRITE_CACHELESS_128_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0xf, - (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), - "STORE_RAW $rw_gpr.XYZW, $index_gpr, $eop", - [(global_store v4i32:$rw_gpr, i32:$index_gpr)] ->; - -} // End usesCustomInserter = 1 - -class VTX_READ_eg buffer_id, dag outs, list pattern> - : VTX_WORD0_eg, VTX_READ { - - // Static fields - let VC_INST = 0; - let FETCH_TYPE = 2; - let FETCH_WHOLE_QUAD = 0; - let BUFFER_ID = buffer_id; - let SRC_REL = 0; - // XXX: We can infer this field based on the SRC_GPR. This would allow us - // to store vertex addresses in any channel, not just X. - let SRC_SEL_X = 0; - - let Inst{31-0} = Word0; -} - -class VTX_READ_8_eg buffer_id, list pattern> - : VTX_READ_eg <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id, - (outs R600_TReg32_X:$dst_gpr), pattern> { - - let MEGA_FETCH_COUNT = 1; - let DST_SEL_X = 0; - let DST_SEL_Y = 7; // Masked - let DST_SEL_Z = 7; // Masked - let DST_SEL_W = 7; // Masked - let DATA_FORMAT = 1; // FMT_8 -} - -class VTX_READ_16_eg buffer_id, list pattern> - : VTX_READ_eg <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id, - (outs R600_TReg32_X:$dst_gpr), pattern> { - let MEGA_FETCH_COUNT = 2; - let DST_SEL_X = 0; - let DST_SEL_Y = 7; // Masked - let DST_SEL_Z = 7; // Masked - let DST_SEL_W = 7; // Masked - let DATA_FORMAT = 5; // FMT_16 - -} - -class VTX_READ_32_eg buffer_id, list pattern> - : VTX_READ_eg <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id, - (outs R600_TReg32_X:$dst_gpr), pattern> { - - let MEGA_FETCH_COUNT = 4; - let DST_SEL_X = 0; - let DST_SEL_Y = 7; // Masked - let DST_SEL_Z = 7; // Masked - let DST_SEL_W = 7; // Masked - let DATA_FORMAT = 0xD; // COLOR_32 - - // This is not really necessary, but there were some GPU hangs that appeared - // to be caused by ALU instructions in the next instruction group that wrote - // to the $src_gpr registers of the VTX_READ. - // e.g. - // %T3_X = VTX_READ_PARAM_32_eg %T2_X, 24 - // %T2_X = MOV %ZERO - //Adding this constraint prevents this from happening. - let Constraints = "$src_gpr.ptr = $dst_gpr"; -} - -class VTX_READ_64_eg buffer_id, list pattern> - : VTX_READ_eg <"VTX_READ_64 $dst_gpr.XY, $src_gpr", buffer_id, - (outs R600_Reg64:$dst_gpr), pattern> { - - let MEGA_FETCH_COUNT = 8; - let DST_SEL_X = 0; - let DST_SEL_Y = 1; - let DST_SEL_Z = 7; - let DST_SEL_W = 7; - let DATA_FORMAT = 0x1D; // COLOR_32_32 -} - -class VTX_READ_128_eg buffer_id, list pattern> - : VTX_READ_eg <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id, - (outs R600_Reg128:$dst_gpr), pattern> { - - let MEGA_FETCH_COUNT = 16; - let DST_SEL_X = 0; - let DST_SEL_Y = 1; - let DST_SEL_Z = 2; - let DST_SEL_W = 3; - let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 - - // XXX: Need to force VTX_READ_128 instructions to write to the same register - // that holds its buffer address to avoid potential hangs. We can't use - // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst - // registers are different sizes. -} - -//===----------------------------------------------------------------------===// -// VTX Read from parameter memory space -//===----------------------------------------------------------------------===// - -def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0, - [(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))] ->; - -def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0, - [(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))] ->; - -def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, - [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] ->; - -def VTX_READ_PARAM_64_eg : VTX_READ_64_eg <0, - [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] ->; - -def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, - [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] ->; - -//===----------------------------------------------------------------------===// -// VTX Read from global memory space -//===----------------------------------------------------------------------===// - -// 8-bit reads -def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1, - [(set i32:$dst_gpr, (az_extloadi8_global ADDRVTX_READ:$src_gpr))] ->; - -def VTX_READ_GLOBAL_16_eg : VTX_READ_16_eg <1, - [(set i32:$dst_gpr, (az_extloadi16_global ADDRVTX_READ:$src_gpr))] ->; - -// 32-bit reads -def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1, - [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] ->; - -// 64-bit reads -def VTX_READ_GLOBAL_64_eg : VTX_READ_64_eg <1, - [(set v2i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] ->; - -// 128-bit reads -def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, - [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] ->; - -} // End Predicates = [isEG] - -//===----------------------------------------------------------------------===// -// Evergreen / Cayman Instructions -//===----------------------------------------------------------------------===// - -let Predicates = [isEGorCayman] in { - - // BFE_UINT - bit_extract, an optimization for mask and shift - // Src0 = Input - // Src1 = Offset - // Src2 = Width - // - // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width) - // - // Example Usage: - // (Offset, Width) - // - // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0 - // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8 - // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16 - // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24 - def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT", - [(set i32:$dst, (int_AMDIL_bit_extract_u32 i32:$src0, i32:$src1, - i32:$src2))], - VecALU - >; - def : BFEPattern ; - - def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>; - defm : BFIPatterns ; - - def MULADD_UINT24_eg : R600_3OP <0x10, "MULADD_UINT24", - [(set i32:$dst, (add (mul U24:$src0, U24:$src1), i32:$src2))], VecALU - >; - def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>; - def : ROTRPattern ; - - def MULADD_eg : MULADD_Common<0x14>; - def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>; - def ASHR_eg : ASHR_Common<0x15>; - def LSHR_eg : LSHR_Common<0x16>; - def LSHL_eg : LSHL_Common<0x17>; - def CNDE_eg : CNDE_Common<0x19>; - def CNDGT_eg : CNDGT_Common<0x1A>; - def CNDGE_eg : CNDGE_Common<0x1B>; - def MUL_LIT_eg : MUL_LIT_Common<0x1F>; - def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>; - def MUL_UINT24_eg : R600_2OP <0xB5, "MUL_UINT24", - [(set i32:$dst, (mul U24:$src0, U24:$src1))], VecALU - >; - def DOT4_eg : DOT4_Common<0xBE>; - defm CUBE_eg : CUBE_Common<0xC0>; - -let hasSideEffects = 1 in { - def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", []>; -} - - def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common; - - def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { - let Pattern = []; - let Itinerary = AnyALU; - } - - def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; - - def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> { - let Pattern = []; - } - - def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>; - -def GROUP_BARRIER : InstR600 < - (outs), (ins), " GROUP_BARRIER", [(int_AMDGPU_barrier_local)], AnyALU>, - R600ALU_Word0, - R600ALU_Word1_OP2 <0x54> { - - let dst = 0; - let dst_rel = 0; - let src0 = 0; - let src0_rel = 0; - let src0_neg = 0; - let src0_abs = 0; - let src1 = 0; - let src1_rel = 0; - let src1_neg = 0; - let src1_abs = 0; - let write = 0; - let omod = 0; - let clamp = 0; - let last = 1; - let bank_swizzle = 0; - let pred_sel = 0; - let update_exec_mask = 0; - let update_pred = 0; - - let Inst{31-0} = Word0; - let Inst{63-32} = Word1; - - let ALUInst = 1; -} - -//===----------------------------------------------------------------------===// -// LDS Instructions -//===----------------------------------------------------------------------===// -class R600_LDS op, dag outs, dag ins, string asm, - list pattern = []> : - - InstR600 , - R600_ALU_LDS_Word0, - R600LDS_Word1 { - - bits<6> offset = 0; - let lds_op = op; - - let Word1{27} = offset{0}; - let Word1{12} = offset{1}; - let Word1{28} = offset{2}; - let Word1{31} = offset{3}; - let Word0{12} = offset{4}; - let Word0{25} = offset{5}; - - - let Inst{31-0} = Word0; - let Inst{63-32} = Word1; - - let ALUInst = 1; - let HasNativeOperands = 1; - let UseNamedOperandTable = 1; -} - -class R600_LDS_1A lds_op, string name, list pattern> : R600_LDS < - lds_op, - (outs R600_Reg32:$dst), - (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, - LAST:$last, R600_Pred:$pred_sel, - BANK_SWIZZLE:$bank_swizzle), - " "#name#" $last OQAP, $src0$src0_rel $pred_sel", - pattern - > { - - let src1 = 0; - let src1_rel = 0; - let src2 = 0; - let src2_rel = 0; - - let Defs = [OQAP]; - let usesCustomInserter = 1; - let LDS_1A = 1; - let DisableEncoding = "$dst"; -} - -class R600_LDS_1A1D lds_op, dag outs, string name, list pattern, - string dst =""> : - R600_LDS < - lds_op, outs, - (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, - R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel, - LAST:$last, R600_Pred:$pred_sel, - BANK_SWIZZLE:$bank_swizzle), - " "#name#" $last "#dst#"$src0$src0_rel, $src1$src1_rel, $pred_sel", - pattern - > { - - field string BaseOp; - - let src2 = 0; - let src2_rel = 0; - let LDS_1A1D = 1; -} - -class R600_LDS_1A1D_NORET lds_op, string name, list pattern> : - R600_LDS_1A1D { - let BaseOp = name; -} - -class R600_LDS_1A1D_RET lds_op, string name, list pattern> : - R600_LDS_1A1D { - - let BaseOp = name; - let usesCustomInserter = 1; - let DisableEncoding = "$dst"; - let Defs = [OQAP]; -} - -class R600_LDS_1A2D lds_op, string name, list pattern> : - R600_LDS < - lds_op, - (outs), - (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel, - R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel, - R600_Reg32:$src2, REL:$src2_rel, SEL:$src2_sel, - LAST:$last, R600_Pred:$pred_sel, BANK_SWIZZLE:$bank_swizzle), - " "#name# "$last $src0$src0_rel, $src1$src1_rel, $src2$src2_rel, $pred_sel", - pattern> { - let LDS_1A2D = 1; -} - -def LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >; -def LDS_SUB : R600_LDS_1A1D_NORET <0x1, "LDS_SUB", [] >; -def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE", - [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)] ->; -def LDS_BYTE_WRITE : R600_LDS_1A1D_NORET<0x12, "LDS_BYTE_WRITE", - [(truncstorei8_local i32:$src1, i32:$src0)] ->; -def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE", - [(truncstorei16_local i32:$src1, i32:$src0)] ->; -def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD", - [(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))] ->; -def LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB", - [(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))] ->; -def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", - [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))] ->; -def LDS_BYTE_READ_RET : R600_LDS_1A <0x36, "LDS_BYTE_READ_RET", - [(set i32:$dst, (sextloadi8_local i32:$src0))] ->; -def LDS_UBYTE_READ_RET : R600_LDS_1A <0x37, "LDS_UBYTE_READ_RET", - [(set i32:$dst, (az_extloadi8_local i32:$src0))] ->; -def LDS_SHORT_READ_RET : R600_LDS_1A <0x38, "LDS_SHORT_READ_RET", - [(set i32:$dst, (sextloadi16_local i32:$src0))] ->; -def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET", - [(set i32:$dst, (az_extloadi16_local i32:$src0))] ->; - - // TRUNC is used for the FLT_TO_INT instructions to work around a - // perceived problem where the rounding modes are applied differently - // depending on the instruction and the slot they are in. - // See: - // https://bugs.freedesktop.org/show_bug.cgi?id=50232 - // Mesa commit: a1a0974401c467cb86ef818f22df67c21774a38c - // - // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes, - // which do not need to be truncated since the fp values are 0.0f or 1.0f. - // We should look into handling these cases separately. - def : Pat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>; - - def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>; - - // SHA-256 Patterns - def : SHA256MaPattern ; - - def EG_ExportSwz : ExportSwzInst { - let Word1{19-16} = 0; // BURST_COUNT - let Word1{20} = 1; // VALID_PIXEL_MODE - let Word1{21} = eop; - let Word1{29-22} = inst; - let Word1{30} = 0; // MARK - let Word1{31} = 1; // BARRIER - } - defm : ExportPattern; - - def EG_ExportBuf : ExportBufInst { - let Word1{19-16} = 0; // BURST_COUNT - let Word1{20} = 1; // VALID_PIXEL_MODE - let Word1{21} = eop; - let Word1{29-22} = inst; - let Word1{30} = 0; // MARK - let Word1{31} = 1; // BARRIER - } - defm : SteamOutputExportPattern; - - def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT), - "TEX $COUNT @$ADDR"> { - let POP_COUNT = 0; - } - def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT), - "VTX $COUNT @$ADDR"> { - let POP_COUNT = 0; - } - def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR), - "LOOP_START_DX10 @$ADDR"> { - let POP_COUNT = 0; - let COUNT = 0; - } - def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> { - let POP_COUNT = 0; - let COUNT = 0; - } - def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR), - "LOOP_BREAK @$ADDR"> { - let POP_COUNT = 0; - let COUNT = 0; - } - def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR), - "CONTINUE @$ADDR"> { - let POP_COUNT = 0; - let COUNT = 0; - } - def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), - "JUMP @$ADDR POP:$POP_COUNT"> { - let COUNT = 0; - } - def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), - "ELSE @$ADDR POP:$POP_COUNT"> { - let COUNT = 0; - } - def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> { - let ADDR = 0; - let COUNT = 0; - let POP_COUNT = 0; - } - def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), - "POP @$ADDR POP:$POP_COUNT"> { - let COUNT = 0; - } - def CF_END_EG : CF_CLAUSE_EG<0, (ins), "CF_END"> { - let COUNT = 0; - let POP_COUNT = 0; - let ADDR = 0; - let END_OF_PROGRAM = 1; - } - -} // End Predicates = [isEGorCayman] //===----------------------------------------------------------------------===// // Regist loads and stores - for indirect addressing @@ -1802,217 +1277,6 @@ def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET", defm R600_ : RegisterLoadStore ; -//===----------------------------------------------------------------------===// -// Cayman Instructions -//===----------------------------------------------------------------------===// - -let Predicates = [isCayman] in { - -def MULADD_INT24_cm : R600_3OP <0x08, "MULADD_INT24", - [(set i32:$dst, (add (mul I24:$src0, I24:$src1), i32:$src2))], VecALU ->; -def MUL_INT24_cm : R600_2OP <0x5B, "MUL_INT24", - [(set i32:$dst, (mul I24:$src0, I24:$src1))], VecALU ->; - -let isVector = 1 in { - -def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; - -def MULLO_INT_cm : MULLO_INT_Common<0x8F>; -def MULHI_INT_cm : MULHI_INT_Common<0x90>; -def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; -def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; -def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>; -def EXP_IEEE_cm : EXP_IEEE_Common<0x81>; -def LOG_IEEE_cm : LOG_IEEE_Common<0x83>; -def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>; -def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>; -def SIN_cm : SIN_Common<0x8D>; -def COS_cm : COS_Common<0x8E>; -} // End isVector = 1 - -def : POW_Common ; - -defm DIV_cm : DIV_Common; - -// RECIP_UINT emulation for Cayman -// The multiplication scales from [0,1] to the unsigned integer range -def : Pat < - (AMDGPUurecip i32:$src0), - (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)), - (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1))) ->; - - def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> { - let ADDR = 0; - let POP_COUNT = 0; - let COUNT = 0; - } - -def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; - -class RAT_STORE_DWORD mask> : - CF_MEM_RAT_CACHELESS <0x14, 0, mask, - (ins rc:$rw_gpr, R600_TReg32_X:$index_gpr), - "STORE_DWORD $rw_gpr, $index_gpr", - [(global_store vt:$rw_gpr, i32:$index_gpr)]> { - let eop = 0; // This bit is not used on Cayman. -} - -def RAT_STORE_DWORD32 : RAT_STORE_DWORD ; -def RAT_STORE_DWORD64 : RAT_STORE_DWORD ; -def RAT_STORE_DWORD128 : RAT_STORE_DWORD ; - -class VTX_READ_cm buffer_id, dag outs, list pattern> - : VTX_WORD0_cm, VTX_READ { - - // Static fields - let VC_INST = 0; - let FETCH_TYPE = 2; - let FETCH_WHOLE_QUAD = 0; - let BUFFER_ID = buffer_id; - let SRC_REL = 0; - // XXX: We can infer this field based on the SRC_GPR. This would allow us - // to store vertex addresses in any channel, not just X. - let SRC_SEL_X = 0; - let SRC_SEL_Y = 0; - let STRUCTURED_READ = 0; - let LDS_REQ = 0; - let COALESCED_READ = 0; - - let Inst{31-0} = Word0; -} - -class VTX_READ_8_cm buffer_id, list pattern> - : VTX_READ_cm <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id, - (outs R600_TReg32_X:$dst_gpr), pattern> { - - let DST_SEL_X = 0; - let DST_SEL_Y = 7; // Masked - let DST_SEL_Z = 7; // Masked - let DST_SEL_W = 7; // Masked - let DATA_FORMAT = 1; // FMT_8 -} - -class VTX_READ_16_cm buffer_id, list pattern> - : VTX_READ_cm <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id, - (outs R600_TReg32_X:$dst_gpr), pattern> { - let DST_SEL_X = 0; - let DST_SEL_Y = 7; // Masked - let DST_SEL_Z = 7; // Masked - let DST_SEL_W = 7; // Masked - let DATA_FORMAT = 5; // FMT_16 - -} - -class VTX_READ_32_cm buffer_id, list pattern> - : VTX_READ_cm <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id, - (outs R600_TReg32_X:$dst_gpr), pattern> { - - let DST_SEL_X = 0; - let DST_SEL_Y = 7; // Masked - let DST_SEL_Z = 7; // Masked - let DST_SEL_W = 7; // Masked - let DATA_FORMAT = 0xD; // COLOR_32 - - // This is not really necessary, but there were some GPU hangs that appeared - // to be caused by ALU instructions in the next instruction group that wrote - // to the $src_gpr registers of the VTX_READ. - // e.g. - // %T3_X = VTX_READ_PARAM_32_eg %T2_X, 24 - // %T2_X = MOV %ZERO - //Adding this constraint prevents this from happening. - let Constraints = "$src_gpr.ptr = $dst_gpr"; -} - -class VTX_READ_64_cm buffer_id, list pattern> - : VTX_READ_cm <"VTX_READ_64 $dst_gpr, $src_gpr", buffer_id, - (outs R600_Reg64:$dst_gpr), pattern> { - - let DST_SEL_X = 0; - let DST_SEL_Y = 1; - let DST_SEL_Z = 7; - let DST_SEL_W = 7; - let DATA_FORMAT = 0x1D; // COLOR_32_32 -} - -class VTX_READ_128_cm buffer_id, list pattern> - : VTX_READ_cm <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id, - (outs R600_Reg128:$dst_gpr), pattern> { - - let DST_SEL_X = 0; - let DST_SEL_Y = 1; - let DST_SEL_Z = 2; - let DST_SEL_W = 3; - let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 - - // XXX: Need to force VTX_READ_128 instructions to write to the same register - // that holds its buffer address to avoid potential hangs. We can't use - // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst - // registers are different sizes. -} - -//===----------------------------------------------------------------------===// -// VTX Read from parameter memory space -//===----------------------------------------------------------------------===// -def VTX_READ_PARAM_8_cm : VTX_READ_8_cm <0, - [(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))] ->; - -def VTX_READ_PARAM_16_cm : VTX_READ_16_cm <0, - [(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))] ->; - -def VTX_READ_PARAM_32_cm : VTX_READ_32_cm <0, - [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] ->; - -def VTX_READ_PARAM_64_cm : VTX_READ_64_cm <0, - [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] ->; - -def VTX_READ_PARAM_128_cm : VTX_READ_128_cm <0, - [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] ->; - -//===----------------------------------------------------------------------===// -// VTX Read from global memory space -//===----------------------------------------------------------------------===// - -// 8-bit reads -def VTX_READ_GLOBAL_8_cm : VTX_READ_8_cm <1, - [(set i32:$dst_gpr, (az_extloadi8_global ADDRVTX_READ:$src_gpr))] ->; - -def VTX_READ_GLOBAL_16_cm : VTX_READ_16_cm <1, - [(set i32:$dst_gpr, (az_extloadi16_global ADDRVTX_READ:$src_gpr))] ->; - -// 32-bit reads -def VTX_READ_GLOBAL_32_cm : VTX_READ_32_cm <1, - [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] ->; - -// 64-bit reads -def VTX_READ_GLOBAL_64_cm : VTX_READ_64_cm <1, - [(set v2i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] ->; - -// 128-bit reads -def VTX_READ_GLOBAL_128_cm : VTX_READ_128_cm <1, - [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] ->; - -} // End isCayman - -//===----------------------------------------------------------------------===// -// Branch Instructions -//===----------------------------------------------------------------------===// - - -def IF_PREDICATE_SET : ILFormat<(outs), (ins GPRI32:$src), - "IF_PREDICATE_SET $src", []>; //===----------------------------------------------------------------------===// // Pseudo instructions @@ -2087,19 +1351,6 @@ def TXD_SHADOW: InstR600 < } // End isPseudo = 1 } // End usesCustomInserter = 1 -def CLAMP_R600 : CLAMP ; -def FABS_R600 : FABS; -def FNEG_R600 : FNEG; - -//===---------------------------------------------------------------------===// -// Return instruction -//===---------------------------------------------------------------------===// -let isTerminator = 1, isReturn = 1, hasCtrlDep = 1, - usesCustomInserter = 1 in { - def RETURN : ILFormat<(outs), (ins variable_ops), - "RETURN", [(IL_retflag)]>; -} - //===----------------------------------------------------------------------===// // Constant Buffer Addressing Support @@ -2226,14 +1477,55 @@ let Inst{63-32} = Word1; let VTXInst = 1; } +//===---------------------------------------------------------------------===// +// Flow and Program control Instructions +//===---------------------------------------------------------------------===// +class ILFormat pattern> +: Instruction { + + let Namespace = "AMDGPU"; + dag OutOperandList = outs; + dag InOperandList = ins; + let Pattern = pattern; + let AsmString = !strconcat(asmstr, "\n"); + let isPseudo = 1; + let Itinerary = NullALU; + bit hasIEEEFlag = 0; + bit hasZeroOpFlag = 0; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +multiclass BranchConditional { + def _i32 : ILFormat<(outs), + (ins brtarget:$target, rci:$src0), + "; i32 Pseudo branch instruction", + [(Op bb:$target, (i32 rci:$src0))]>; + def _f32 : ILFormat<(outs), + (ins brtarget:$target, rcf:$src0), + "; f32 Pseudo branch instruction", + [(Op bb:$target, (f32 rcf:$src0))]>; +} + +// Only scalar types should generate flow control +multiclass BranchInstr { + def _i32 : ILFormat<(outs), (ins R600_Reg32:$src), + !strconcat(name, " $src"), []>; + def _f32 : ILFormat<(outs), (ins R600_Reg32:$src), + !strconcat(name, " $src"), []>; +} +// Only scalar types should generate flow control +multiclass BranchInstr2 { + def _i32 : ILFormat<(outs), (ins R600_Reg32:$src0, R600_Reg32:$src1), + !strconcat(name, " $src0, $src1"), []>; + def _f32 : ILFormat<(outs), (ins R600_Reg32:$src0, R600_Reg32:$src1), + !strconcat(name, " $src0, $src1"), []>; +} - -//===--------------------------------------------------------------------===// -// Instructions support -//===--------------------------------------------------------------------===// //===---------------------------------------------------------------------===// // Custom Inserter for Branches and returns, this eventually will be a -// seperate pass +// separate pass //===---------------------------------------------------------------------===// let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in { def BRANCH : ILFormat<(outs), (ins brtarget:$target), @@ -2243,13 +1535,22 @@ let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in { } //===---------------------------------------------------------------------===// -// Flow and Program control Instructions +// Return instruction //===---------------------------------------------------------------------===// +let isTerminator = 1, isReturn = 1, hasCtrlDep = 1, + usesCustomInserter = 1 in { + def RETURN : ILFormat<(outs), (ins variable_ops), + "RETURN", [(IL_retflag)]>; +} + +//===----------------------------------------------------------------------===// +// Branch Instructions +//===----------------------------------------------------------------------===// + +def IF_PREDICATE_SET : ILFormat<(outs), (ins R600_Reg32:$src), + "IF_PREDICATE_SET $src", []>; + let isTerminator=1 in { - def SWITCH : ILFormat< (outs), (ins GPRI32:$src), - !strconcat("SWITCH", " $src"), []>; - def CASE : ILFormat< (outs), (ins GPRI32:$src), - !strconcat("CASE", " $src"), []>; def BREAK : ILFormat< (outs), (ins), "BREAK", []>; def CONTINUE : ILFormat< (outs), (ins), @@ -2293,6 +1594,60 @@ let isTerminator=1 in { defm CONTINUEC : BranchInstr2<"CONTINUEC">; } +//===----------------------------------------------------------------------===// +// Indirect addressing pseudo instructions +//===----------------------------------------------------------------------===// + +let isPseudo = 1 in { + +class ExtractVertical : InstR600 < + (outs R600_Reg32:$dst), + (ins vec_rc:$vec, R600_Reg32:$index), "", + [], + AnyALU +>; + +let Constraints = "$dst = $vec" in { + +class InsertVertical : InstR600 < + (outs vec_rc:$dst), + (ins vec_rc:$vec, R600_Reg32:$value, R600_Reg32:$index), "", + [], + AnyALU +>; + +} // End Constraints = "$dst = $vec" + +} // End isPseudo = 1 + +def R600_EXTRACT_ELT_V2 : ExtractVertical ; +def R600_EXTRACT_ELT_V4 : ExtractVertical ; + +def R600_INSERT_ELT_V2 : InsertVertical ; +def R600_INSERT_ELT_V4 : InsertVertical ; + +class ExtractVerticalPat : Pat < + (scalar_ty (extractelt vec_ty:$vec, i32:$index)), + (inst $vec, $index) +>; + +def : ExtractVerticalPat ; +def : ExtractVerticalPat ; +def : ExtractVerticalPat ; +def : ExtractVerticalPat ; + +class InsertVerticalPat : Pat < + (vec_ty (insertelt vec_ty:$vec, scalar_ty:$value, i32:$index)), + (inst $vec, $value, $index) +>; + +def : InsertVerticalPat ; +def : InsertVerticalPat ; +def : InsertVerticalPat ; +def : InsertVerticalPat ; + //===----------------------------------------------------------------------===// // ISel Patterns //===----------------------------------------------------------------------===// @@ -2310,7 +1665,7 @@ def : CND_INT_f32 ; //CNDGE_INT extra pattern def : Pat < - (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_GT), + (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_SGT), (CNDGE_INT $src0, $src1, $src2) >; @@ -2345,9 +1700,6 @@ def : Insert_Element ; def : Insert_Element ; def : Insert_Element ; -def : Vector4_Build ; -def : Vector4_Build ; - def : Extract_Element ; def : Extract_Element ; @@ -2374,6 +1726,12 @@ def : DwordAddrPat ; } // End isR600toCayman Predicate +let Predicates = [isR600] in { +// Intrinsic patterns +defm : Expand24IBitOps; +defm : Expand24UBitOps; +} // End isR600 + def getLDSNoRetOp : InstrMapping { let FilterClass = "R600_LDS_1A1D"; let RowFields = ["BaseOp"];