From: Matt Arsenault Date: Tue, 8 Sep 2015 21:15:00 +0000 (+0000) Subject: AMDGPU/SI: Fix input vcc operand for VOP2b instructions X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=fa7378ca6e0964afaa6317ec6cf8f4501b9cf2f1;p=oota-llvm.git AMDGPU/SI: Fix input vcc operand for VOP2b instructions Adds vcc to output string input for e32. Allows option of using e64 encoding with assembler. Also fixes these instructions not implicitly reading exec. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@247074 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index df6f396a403..9ce6874cad3 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -215,6 +215,10 @@ public: (isReg() && isRegClass(AMDGPU::SReg_64RegClassID)); } + bool isSCSrc64() const { + return (isReg() && isRegClass(AMDGPU::SReg_64RegClassID)) || isInlineImm(); + } + bool isVCSrc32() const { return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID)); } diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index 1e5e04938be..8664c050e26 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -990,7 +990,14 @@ class getVOPSrc1ForVT { // Returns the register class to use for sources of VOP3 instructions for the // given VT. class getVOP3SrcForVT { - RegisterOperand ret = !if(!eq(VT.Size, 64), VCSrc_64, VCSrc_32); + RegisterOperand ret = + !if(!eq(VT.Size, 64), + VCSrc_64, + !if(!eq(VT.Value, i1.Value), + SCSrc_64, + VCSrc_32 + ) + ); } // Returns 1 if the source arguments have modifiers, 0 if they do not. @@ -1070,7 +1077,6 @@ class getAsm64 { "$dst, "#src0#src1#src2#"$clamp"#"$omod"); } - class VOPProfile _ArgVT> { field list ArgVT = _ArgVT; @@ -1132,17 +1138,26 @@ def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>; def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>; def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; -class VOP2b_Profile : VOPProfile<[vt, vt, vt, untyped]> { +// Write out to vcc or arbitrary SGPR. +def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> { let Asm32 = "$dst, vcc, $src0, $src1"; let Asm64 = "$dst, $sdst, $src0, $src1"; let Outs32 = (outs DstRC:$dst); let Outs64 = (outs DstRC:$dst, SReg_64:$sdst); } -def VOP2b_I32_I1_I32_I32 : VOP2b_Profile; - -def VOP2b_I32_I1_I32_I32_VCC : VOP2b_Profile { +// Write out to vcc or arbitrary SGPR and read in from vcc or +// arbitrary SGPR. +def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { let Src0RC32 = VCSrc_32; + let Asm32 = "$dst, vcc, $src0, $src1, vcc"; + let Asm64 = "$dst, $sdst, $src0, $src1, $src2"; + let Outs32 = (outs DstRC:$dst); + let Outs64 = (outs DstRC:$dst, SReg_64:$sdst); + + // Suppress src2 implied by type since the 32-bit encoding uses an + // implicit VCC use. + let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); } // VOPC instructions are a special case because for the 32-bit @@ -1429,32 +1444,19 @@ multiclass VOP3SI_2_m pattern, string opName, string revOp, - bit HasMods = 1, bit UseFullOp = 0> { - def "" : VOP3_Pseudo , - VOP2_REV; - - def _si : VOP3b_Real_si , - VOP3DisableFields<1, 0, HasMods>; - - def _vi : VOP3b_Real_vi , - VOP3DisableFields<1, 0, HasMods>; -} - -multiclass VOP3b_3_m pattern, string opName, string revOp, - bit HasMods = 1, bit UseFullOp = 0> { +// Two operand VOP3b instruction that may have a 3rd SGPR bool operand +// instead of an implicit VCC as in the VOP2b format. +multiclass VOP3b_2_3_m pattern, string opName, string revOp, + bit HasMods = 1, bit useSGPRInput = 0, + bit UseFullOp = 0> { def "" : VOP3_Pseudo ; - def _si : VOP3b_Real_si , - VOP3DisableFields<1, 1, HasMods>; + VOP3DisableFields<1, useSGPRInput, HasMods>; def _vi : VOP3b_Real_vi , - VOP3DisableFields<1, 1, HasMods>; + VOP3DisableFields<1, useSGPRInput, HasMods>; } multiclass VOP3_C_m pat32, dag ins64, string asm64, list pat64, - string revOp, bit HasMods> { + string revOp, bit HasMods, bit useSGPRInput> { - defm _e32 : VOP2_m ; + let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { + defm _e32 : VOP2_m ; + } - defm _e64 : VOP3b_2_m ; } @@ -1596,7 +1600,7 @@ multiclass VOP2bInst ; // A VOP2 instruction that is VOP3-only on VI. @@ -1847,14 +1851,14 @@ multiclass VOP3_VCC_Inst pattern> : - VOP3b_3_m < + VOP3b_2_3_m < op, (outs vrc:$vdst, SReg_64:$sdst), (ins InputModsNoDefault:$src0_modifiers, arc:$src0, InputModsNoDefault:$src1_modifiers, arc:$src1, InputModsNoDefault:$src2_modifiers, arc:$src2, ClampMod:$clamp, omod:$omod), opName#" $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", pattern, - opName, opName, 1, 1 + opName, opName, 1, 0, 1 >; multiclass VOP3b_64 pattern> : diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index c3835411d38..bd22e886920 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -1507,7 +1507,7 @@ let isCommutable = 1 in { defm V_MADAK_F32 : VOP2MADK , "v_madak_f32">; } // End isCommutable = 1 -let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC +let isCommutable = 1 in { // No patterns so that the scalar instructions are always selected. // The scalar versions will be replaced with vector when needed later. @@ -1522,19 +1522,17 @@ defm V_SUBREV_I32 : VOP2bInst , "v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32" >; -let Uses = [VCC] in { // Carry-in comes from VCC defm V_ADDC_U32 : VOP2bInst , "v_addc_u32", - VOP2b_I32_I1_I32_I32_VCC + VOP2b_I32_I1_I32_I32_I1 >; defm V_SUBB_U32 : VOP2bInst , "v_subb_u32", - VOP2b_I32_I1_I32_I32_VCC + VOP2b_I32_I1_I32_I32_I1 >; defm V_SUBBREV_U32 : VOP2bInst , "v_subbrev_u32", - VOP2b_I32_I1_I32_I32_VCC, null_frag, "v_subb_u32" + VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32" >; -} // End Uses = [VCC] -} // End isCommutable = 1, Defs = [VCC] +} // End isCommutable = 1 defm V_READLANE_B32 : VOP2SI_3VI_m < vop3 <0x001, 0x289>, diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td index 1ab9bc4569f..608fe44f485 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/lib/Target/AMDGPU/SIRegisterInfo.td @@ -281,3 +281,13 @@ def VCSrc_64 : RegisterOperand { let OperandType = "OPERAND_REG_INLINE_C"; let ParserMatchClass = RegImmMatcher<"VCSrc64">; } + +//===----------------------------------------------------------------------===// +// SCSrc_* Operands with an SGPR or an inline constant +//===----------------------------------------------------------------------===// + +def SCSrc_64 : RegisterOperand { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_REG_INLINE_C"; + let ParserMatchClass = RegImmMatcher<"SCSrc64">; +} diff --git a/test/MC/AMDGPU/vop2-err.s b/test/MC/AMDGPU/vop2-err.s index 47d7d5bbecb..8d282f9bf7e 100644 --- a/test/MC/AMDGPU/vop2-err.s +++ b/test/MC/AMDGPU/vop2-err.s @@ -35,4 +35,28 @@ v_mul_i32_i24_e64 v1, v2, 100 v_add_i32_e32 v1, s[0:1], v2, v3 // CHECK: error: invalid operand for instruction +v_addc_u32_e32 v1, vcc, v2, v3, s[2:3] +// CHECK: error: invalid operand for instruction + +v_addc_u32_e32 v1, s[0:1], v2, v3, s[2:3] +// CHECK: error: invalid operand for instruction + +v_addc_u32_e32 v1, vcc, v2, v3, -1 +// CHECK: error: invalid operand for instruction + +v_addc_u32_e32 v1, vcc, v2, v3, 123 +// CHECK: error: invalid operand for instruction + +v_addc_u32_e32 v1, vcc, v2, v3, s0 +// CHECK: error: invalid operand for instruction + +v_addc_u32_e32 v1, -1, v2, v3, s0 +// CHECK: error: invalid operand for instruction + +v_addc_u32_e64 v1, s[0:1], v2, v3, 123 +// CHECK: error: invalid operand for instruction + +v_addc_u32 v1, s[0:1], v2, v3, 123 +// CHECK: error: invalid operand for instruction + // TODO: Constant bus restrictions diff --git a/test/MC/AMDGPU/vop2.s b/test/MC/AMDGPU/vop2.s index 0a875608464..2b8249152b7 100644 --- a/test/MC/AMDGPU/vop2.s +++ b/test/MC/AMDGPU/vop2.s @@ -307,29 +307,54 @@ v_subrev_u32 v1, vcc, v2, v3 // VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00] v_subrev_u32 v1, s[0:1], v2, v3 -// SICI: v_addc_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x50] -// VI: v_addc_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x38] -v_addc_u32 v1, vcc, v2, v3 +// SICI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x50] +// VI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x38] +v_addc_u32 v1, vcc, v2, v3, vcc -// SICI: v_addc_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00] -// VI: v_addc_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x02,0x00] -v_addc_u32 v1, s[0:1], v2, v3 +// SICI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x50] +// VI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x38] +v_addc_u32_e32 v1, vcc, v2, v3, vcc -// SICI: v_subb_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x52] -// VI: v_subb_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a] -v_subb_u32 v1, vcc, v2, v3 -// SICI: v_subb_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x52,0xd2,0x02,0x07,0x02,0x00] -// VI: v_subb_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1d,0xd1,0x02,0x07,0x02,0x00] -v_subb_u32 v1, s[0:1], v2, v3 +// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0xaa,0x01] +// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0xaa,0x01] +v_addc_u32 v1, s[0:1], v2, v3, vcc -// SICI: v_subbrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x54] -// VI: v_subbrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c] -v_subbrev_u32 v1, vcc, v2, v3 +// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x0a,0x00] +// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x0a,0x00] +v_addc_u32 v1, s[0:1], v2, v3, s[2:3] -// SICI: v_subbrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x54,0xd2,0x02,0x07,0x02,0x00] -// VI: v_subbrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1e,0xd1,0x02,0x07,0x02,0x00] -v_subbrev_u32 v1, s[0:1], v2, v3 +// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x0a,0x00] +// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x0a,0x00] +v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] + +// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, -1 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x06,0x03] +// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, -1 ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x06,0x03] +v_addc_u32_e64 v1, s[0:1], v2, v3, -1 + +// SI: v_addc_u32_e64 v1, vcc, v2, v3, -1 ; encoding: [0x01,0x6a,0x50,0xd2,0x02,0x07,0x06,0x03] +// VI: v_addc_u32_e64 v1, vcc, v2, v3, -1 ; encoding: [0x01,0x6a,0x1c,0xd1,0x02,0x07,0x06,0x03] +v_addc_u32_e64 v1, vcc, v2, v3, -1 + +// SI: v_addc_u32_e64 v1, vcc, v2, v3, vcc ; encoding: [0x01,0x6a,0x50,0xd2,0x02,0x07,0xaa,0x01] +// VI: v_addc_u32_e64 v1, vcc, v2, v3, vcc ; encoding: [0x01,0x6a,0x1c,0xd1,0x02,0x07,0xaa,0x01] +v_addc_u32_e64 v1, vcc, v2, v3, vcc + +// SI: v_subb_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x52] +// VI: v_subb_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x3a] +v_subb_u32 v1, vcc, v2, v3, vcc + +// SI: v_subb_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x52,0xd2,0x02,0x07,0xaa,0x01] +// VI: v_subb_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1d,0xd1,0x02,0x07,0xaa,0x01] +v_subb_u32 v1, s[0:1], v2, v3, vcc + +// SICI: v_subbrev_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x54] +// VI: v_subbrev_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x3c] +v_subbrev_u32 v1, vcc, v2, v3, vcc + +// SICI: v_subbrev_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x54,0xd2,0x02,0x07,0xaa,0x01] +// VI: v_subbrev_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1e,0xd1,0x02,0x07,0xaa,0x01] +v_subbrev_u32 v1, s[0:1], v2, v3, vcc // SICI: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56] // VI: v_ldexp_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00]