From 63c550368d0999cd1f4d68e9d26d244eca9d5068 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 6 Oct 2015 15:57:53 +0000 Subject: [PATCH] AMDGPU/SI: Add 64-bit versions of v_nop and v_clrexcp Summary: The assembly printing of these is still missing the encoding size suffix, but this will be fixed in a later commit. Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D13436 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@249424 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 21 +++++++++- lib/Target/AMDGPU/SIDefines.h | 3 +- lib/Target/AMDGPU/SIInstrFormats.td | 5 +++ lib/Target/AMDGPU/SIInstrInfo.td | 42 ++++++++++++------- lib/Target/AMDGPU/SIInstructions.td | 10 ++--- test/MC/AMDGPU/vop1.s | 19 +++++++++ test/MC/AMDGPU/vop3-vop1-nosrc.s | 14 +++++++ test/MC/AMDGPU/vop3.s | 17 ++++++++ 8 files changed, 106 insertions(+), 25 deletions(-) create mode 100644 test/MC/AMDGPU/vop3-vop1-nosrc.s diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index c74d16d4386..ad50003213d 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -347,6 +347,11 @@ private: bool ParseSectionDirectiveHSAText(); public: +public: + enum AMDGPUMatchResultTy { + Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY + }; + AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) @@ -556,6 +561,11 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3))) return Match_InvalidOperand; + if ((TSFlags & SIInstrFlags::VOP3) && + (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && + getForcedEncodingSize() != 64) + return Match_PreferE32; + return Match_Success; } @@ -614,6 +624,9 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } return Error(ErrorLoc, "invalid operand for instruction"); } + case Match_PreferE32: + return Error(IDLoc, "internal error: instruction without _e64 suffix " + "should be encoded as e32"); } llvm_unreachable("Implement any new match types added!"); } @@ -1701,8 +1714,12 @@ AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) { } void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { - ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); - unsigned i = 2; + + unsigned i = 1; + const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); + if (Desc.getNumDefs() > 0) { + ((AMDGPUOperand &)*Operands[i++]).addRegOperands(Inst, 1); + } std::map OptionalIdx; diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h index 4c3263911c4..7f79dd34f3b 100644 --- a/lib/Target/AMDGPU/SIDefines.h +++ b/lib/Target/AMDGPU/SIDefines.h @@ -37,7 +37,8 @@ enum { MIMG = 1 << 18, FLAT = 1 << 19, WQM = 1 << 20, - VGPRSpill = 1 << 21 + VGPRSpill = 1 << 21, + VOPAsmPrefer32Bit = 1 << 22 }; } diff --git a/lib/Target/AMDGPU/SIInstrFormats.td b/lib/Target/AMDGPU/SIInstrFormats.td index b16185f11a3..0e883f64caa 100644 --- a/lib/Target/AMDGPU/SIInstrFormats.td +++ b/lib/Target/AMDGPU/SIInstrFormats.td @@ -41,6 +41,10 @@ class InstSI pattern> : field bits<1> WQM = 0; field bits<1> VGPRSpill = 0; + // This bit tells the assembler to use the 32-bit encoding in case it + // is unable to infer the encoding from the operands. + field bits<1> VOPAsmPrefer32Bit = 0; + // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = VM_CNT; let TSFlags{1} = EXP_CNT; @@ -68,6 +72,7 @@ class InstSI pattern> : let TSFlags{19} = FLAT; let TSFlags{20} = WQM; let TSFlags{21} = VGPRSpill; + let TSFlags{22} = VOPAsmPrefer32Bit; let SchedRW = [Write32Bit]; } diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index d309109420f..ce1d081f437 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -989,11 +989,12 @@ def InputModsNoDefault : Operand { let ParserMatchClass = InputModsMatchClass; } -class getNumSrcArgs { +class getNumSrcArgs { int ret = - !if (!eq(Src1.Value, untyped.Value), 1, // VOP1 + !if (!eq(Src0.Value, untyped.Value), 0, + !if (!eq(Src1.Value, untyped.Value), 1, // VOP1 !if (!eq(Src2.Value, untyped.Value), 2, // VOP2 - 3)); // VOP3 + 3))); // VOP3 } // Returns the register class to use for the destination of VOP[123C] @@ -1085,17 +1086,20 @@ class getIns64 { +class getAsm32 { + string dst = "$dst"; + string src0 = ", $src0"; string src1 = ", $src1"; string src2 = ", $src2"; - string ret = "$dst, $src0"# - !if(!eq(NumSrcArgs, 1), "", src1)# - !if(!eq(NumSrcArgs, 3), src2, ""); + string ret = !if(HasDst, dst, "") # + !if(!eq(NumSrcArgs, 1), src0, "") # + !if(!eq(NumSrcArgs, 2), src0#src1, "") # + !if(!eq(NumSrcArgs, 3), src0#src1#src2, ""); } // Returns the assembly string for the inputs and outputs of a VOP3 // instruction. -class getAsm64 { +class getAsm64 { string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); string src1 = !if(!eq(NumSrcArgs, 1), "", !if(!eq(NumSrcArgs, 2), " $src1_modifiers", @@ -1103,7 +1107,7 @@ class getAsm64 { string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); string ret = !if(!eq(HasModifiers, 0), - getAsm32.ret, + getAsm32.ret, "$dst, "#src0#src1#src2#"$clamp"#"$omod"); } @@ -1122,11 +1126,12 @@ class VOPProfile _ArgVT> { field RegisterOperand Src1RC64 = getVOP3SrcForVT.ret; field RegisterOperand Src2RC64 = getVOP3SrcForVT.ret; - field bit HasDst32 = !if(!eq(DstVT, untyped), 0, 1); - field int NumSrcArgs = getNumSrcArgs.ret; + field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1); + field bit HasDst32 = HasDst; + field int NumSrcArgs = getNumSrcArgs.ret; field bit HasModifiers = hasModifiers.ret; - field dag Outs = (outs DstRC:$dst); + field dag Outs = !if(HasDst,(outs DstRC:$dst),(outs)); // VOP3b instructions are a special case with a second explicit // output. This is manually overridden for them. @@ -1137,8 +1142,8 @@ class VOPProfile _ArgVT> { field dag Ins64 = getIns64.ret; - field string Asm32 = getAsm32.ret; - field string Asm64 = getAsm64.ret; + field string Asm32 = getAsm32.ret; + field string Asm64 = getAsm64.ret; } // FIXME: I think these F16/I16 profiles will need to use f16/i16 types in order @@ -1151,6 +1156,8 @@ def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>; def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i32, untyped]>; def VOP_I16_I16_I16 : VOPProfile <[i32, i32, i32, untyped]>; +def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>; + def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>; def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>; def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>; @@ -1246,8 +1253,8 @@ def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> { let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2); let Ins64 = getIns64, 3, HasModifiers>.ret; - let Asm32 = getAsm32<2>.ret; - let Asm64 = getAsm64<2, HasModifiers>.ret; + let Asm32 = getAsm32<1, 2>.ret; + let Asm64 = getAsm64<1, 2, HasModifiers>.ret; } def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>; def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>; @@ -1424,6 +1431,9 @@ class VOP3_Pseudo pattern, string opName> : MnemonicAlias { let isPseudo = 1; let isCodeGenOnly = 1; + + field bit vdst; + field bit src0; } class VOP3_Real_si op, dag outs, dag ins, string asm, string opName> : diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 31768b75597..ec78ab5561f 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -1156,8 +1156,8 @@ defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o" // VOP1 Instructions //===----------------------------------------------------------------------===// -let vdst = 0, src0 = 0 in { -defm V_NOP : VOP1_m , (outs), (ins), "v_nop", [], "v_nop">; +let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in { +defm V_NOP : VOP1Inst , "v_nop", VOP_NONE>; } let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in { @@ -1332,10 +1332,8 @@ defm V_FREXP_EXP_I32_F32 : VOP1Inst , "v_frexp_exp_i32_f32", defm V_FREXP_MANT_F32 : VOP1Inst , "v_frexp_mant_f32", VOP_F32_F32 >; -let vdst = 0, src0 = 0 in { -defm V_CLREXCP : VOP1_m , (outs), (ins), "v_clrexcp", [], - "v_clrexcp" ->; +let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in { +defm V_CLREXCP : VOP1Inst , "v_clrexcp", VOP_NONE>; } defm V_MOVRELD_B32 : VOP1Inst , "v_movreld_b32", VOP_I32_I32>; defm V_MOVRELS_B32 : VOP1Inst , "v_movrels_b32", VOP_I32_I32>; diff --git a/test/MC/AMDGPU/vop1.s b/test/MC/AMDGPU/vop1.s index d0b00fcd189..22a4f91afef 100644 --- a/test/MC/AMDGPU/vop1.s +++ b/test/MC/AMDGPU/vop1.s @@ -8,6 +8,25 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOVI +// Force 32-bit encoding + +// GCN: v_mov_b32_e32 v1, v2 ; encoding: [0x02,0x03,0x02,0x7e] +v_mov_b32_e32 v1, v2 + +// Force 32-bit encoding for special instructions +// FIXME: We should be printing _e32 suffixes for these: + +// GCN: v_nop ; encoding: [0x00,0x00,0x00,0x7e] +v_nop_e32 + +// SICI: v_clrexcp ; encoding: [0x00,0x82,0x00,0x7e] +// VI: v_clrexcp ; encoding: [0x00,0x6a,0x00,0x7e] +v_clrexcp_e32 + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + // GCN: v_nop ; encoding: [0x00,0x00,0x00,0x7e] v_nop diff --git a/test/MC/AMDGPU/vop3-vop1-nosrc.s b/test/MC/AMDGPU/vop3-vop1-nosrc.s new file mode 100644 index 00000000000..ce1a1a7f338 --- /dev/null +++ b/test/MC/AMDGPU/vop3-vop1-nosrc.s @@ -0,0 +1,14 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s --check-prefix=SICI +// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI +// XFAIL: * + +// FIXME: We should be printing _e64 suffixes for these. +// FIXME: When this is fixed delete this file and fix test case in vop3.s + +v_nop_e64 +// SICI: v_nop_e64 ; encoding: [0x00,0x00,0x00,0xd3,0x00,0x00,0x00,0x00] +// VI: v_nop_e64 ; encoding: [0x00,0x00,0x40,0xd1,0x00,0x00,0x00,0x00] + +v_clrexcp_e64 +// SICI: v_clrexcp_e64 ; encoding: [0x00,0x00,0x82,0xd3,0x00,0x00,0x00,0x00] +// VI: v_clrexcp_e64 ; encoding: [0x00,0x00,0x75,0xd1,0x00,0x00,0x00,0x00] diff --git a/test/MC/AMDGPU/vop3.s b/test/MC/AMDGPU/vop3.s index 8dfdf5009b3..712b18e37aa 100644 --- a/test/MC/AMDGPU/vop3.s +++ b/test/MC/AMDGPU/vop3.s @@ -118,6 +118,23 @@ v_cmp_f_i64 s[2:3], v[4:5], v[6:7] // VOP1 Instructions //===----------------------------------------------------------------------===// +// Test forced e64 encoding with e32 operands + +v_mov_b32_e64 v1, v2 +// SICI: v_mov_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x02,0xd3,0x02,0x01,0x00,0x00] +// VI: v_mov_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x41,0xd1,0x02,0x01,0x00,0x00] + +// Force e64 encoding for special instructions. +// FIXME, we should be printing the _e64 suffix for v_nop and v_clrexcp. + +v_nop_e64 +// SICI: v_nop ; encoding: [0x00,0x00,0x00,0xd3,0x00,0x00,0x00,0x00] +// VI: v_nop ; encoding: [0x00,0x00,0x40,0xd1,0x00,0x00,0x00,0x00] + +v_clrexcp_e64 +// SICI: v_clrexcp ; encoding: [0x00,0x00,0x82,0xd3,0x00,0x00,0x00,0x00] +// VI: v_clrexcp ; encoding: [0x00,0x00,0x75,0xd1,0x00,0x00,0x00,0x00] + // // Modifier tests: // -- 2.34.1