From: Asaf Badouh Date: Thu, 5 Nov 2015 08:45:06 +0000 (+0000) Subject: [X86][AVX512] add comi with Sae X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=1de7587fc41cb326cc5a190353309bd662f6caa9;p=oota-llvm.git [X86][AVX512] add comi with Sae add builtin_ia32_vcomisd and builtin_ia32_vcomisd Differential Revision: http://reviews.llvm.org/D14331 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@252153 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 4a8c0818196..a70541625a0 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -6229,6 +6229,12 @@ let TargetPrefix = "x86" in { // Compares let TargetPrefix = "x86" in { // 512-bit + def int_x86_avx512_vcomi_sd : GCCBuiltin<"__builtin_ia32_vcomisd">, + Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, + llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_vcomi_ss : GCCBuiltin<"__builtin_ia32_vcomiss">, + Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, + llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_pcmpeq_b_512 : GCCBuiltin<"__builtin_ia32_pcmpeqb512_mask">, Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 685067a28c6..882cef35d6e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16404,6 +16404,23 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget DAG.getConstant(X86CC, dl, MVT::i8), Cond); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } + case COMI_RM: { // Comparison intrinsics with Sae + SDValue LHS = Op.getOperand(1); + SDValue RHS = Op.getOperand(2); + SDValue CC = Op.getOperand(3); + SDValue Sae = Op.getOperand(4); + auto X86CC = TranslateX86ConstCondToX86CC(CC); + unsigned comiOp = std::get<0>(X86CC) ? IntrData->Opc0 : IntrData->Opc1; + SDValue Cond; + if (cast(Sae)->getZExtValue() != + X86::STATIC_ROUNDING::CUR_DIRECTION) + Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS, Sae); + else + Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS); + SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(std::get<1>(X86CC), dl, MVT::i8), Cond); + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); + } case VSHIFT: return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(), Op.getOperand(1), Op.getOperand(2), DAG); diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 8cb6babd35f..007fedbabab 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -5445,6 +5445,29 @@ let Predicates = [HasAVX512] in { EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; } } + +// Unordered/Ordered scalar fp compare with Sea and set EFLAGS +multiclass avx512_ord_cmp_sae opc, X86VectorVTInfo _, SDNode OpNode, + string OpcodeStr> { + def rb: AVX512, EVEX, EVEX_B, VEX_LIG, EVEX_V128, + Sched<[WriteFAdd]>; +} + +let Defs = [EFLAGS], Predicates = [HasAVX512] in { + defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, X86ucomiSae, "vucomiss">, + AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; + defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, X86ucomiSae, "vucomisd">, + AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; + defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, X86comiSae, "vcomiss">, + AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; + defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, X86comiSae, "vcomisd">, + AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; +} + let Defs = [EFLAGS], Predicates = [HasAVX512] in { defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, "ucomiss">, PS, EVEX, VEX_LIG, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 90710bfdfc0..eff5c0b5476 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -38,6 +38,8 @@ def bc_mmx : PatFrag<(ops node:$in), (x86mmx (bitconvert node:$in))>; def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisFP<1>, SDTCisVT<3, i8>, SDTCisVec<1>]>; +def SDTX86CmpTestSae : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, + SDTCisSameAs<1, 2>, SDTCisInt<3>]>; def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; @@ -66,7 +68,9 @@ def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>; def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>; def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>; def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>; +def X86comiSae : SDNode<"X86ISD::COMI", SDTX86CmpTestSae>; def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; +def X86ucomiSae: SDNode<"X86ISD::UCOMI", SDTX86CmpTestSae>; def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>; //def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>; def X86cvtdq2pd: SDNode<"X86ISD::CVTDQ2PD", diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 7e7dc3a9e61..cf767917da9 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -20,7 +20,7 @@ enum IntrinsicType { INTR_NO_TYPE, GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, FPCLASS, FPCLASSS, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP, - CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, VSHIFT_MASK, COMI, + CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, VSHIFT_MASK, COMI, COMI_RM, INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_2OP_IMM8_MASK, INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK, @@ -1625,6 +1625,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0), + X86_INTRINSIC_DATA(avx512_vcomi_sd, COMI_RM, X86ISD::COMI, X86ISD::UCOMI), + X86_INTRINSIC_DATA(avx512_vcomi_ss, COMI_RM, X86ISD::COMI, X86ISD::UCOMI), X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0), X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0), X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0), @@ -1806,6 +1808,60 @@ static void verifyIntrinsicTables() { "Intrinsic data tables should be sorted by Intrinsic ID"); } +/* +* Get comparison modifier from _mm_comi_round_sd/ss intrinsic +* Return tuple +*/ +static std::tuple TranslateX86ConstCondToX86CC(SDValue &imm) { + ConstantSDNode *CImm = dyn_cast(imm); + unsigned IntImm = CImm->getZExtValue(); + // On a floating point condition, the flags are set as follows: + // ZF PF CF op + // 0 | 0 | 0 | X > Y + // 0 | 0 | 1 | X < Y + // 1 | 0 | 0 | X == Y + // 1 | 1 | 1 | unordered + switch (IntImm) { + default: llvm_unreachable("Invalid floating point compare value for Comi!"); + case _CMP_EQ_OQ: // 0x00 - Equal (ordered, nonsignaling) + case _CMP_EQ_OS: // 0x10 - Equal (ordered, signaling) + return std::make_tuple(true, X86::COND_E); + case _CMP_EQ_UQ: // 0x08 - Equal (unordered, non-signaling) + case _CMP_EQ_US: // 0x18 - Equal (unordered, signaling) + return std::make_tuple(false , X86::COND_E); + case _CMP_LT_OS: // 0x01 - Less-than (ordered, signaling) + case _CMP_LT_OQ: // 0x11 - Less-than (ordered, nonsignaling) + return std::make_tuple(true, X86::COND_B); + case _CMP_NGE_US: // 0x09 - Not-greater-than-or-equal (unordered, signaling) + case _CMP_NGE_UQ: // 0x19 - Not-greater-than-or-equal (unordered, nonsignaling) + return std::make_tuple(false , X86::COND_B); + case _CMP_LE_OS: // 0x02 - Less-than-or-equal (ordered, signaling) + case _CMP_LE_OQ: // 0x12 - Less-than-or-equal (ordered, nonsignaling) + return std::make_tuple(true, X86::COND_BE); + case _CMP_NGT_US: // 0x0A - Not-greater-than (unordered, signaling) + case _CMP_NGT_UQ: // 0x1A - Not-greater-than (unordered, nonsignaling) + return std::make_tuple(false, X86::COND_BE); + case _CMP_GT_OS: // 0x0E - Greater-than (ordered, signaling) + case _CMP_GT_OQ: // 0x1E - Greater-than (ordered, nonsignaling) + return std::make_tuple(true, X86::COND_A); + case _CMP_NLE_US: // 0x06 - Not-less-than-or-equal (unordered,signaling) + case _CMP_NLE_UQ: // 0x16 - Not-less-than-or-equal (unordered, nonsignaling) + return std::make_tuple(false, X86::COND_A); + case _CMP_GE_OS: // 0x0D - Greater-than-or-equal (ordered, signaling) + case _CMP_GE_OQ: // 0x1D - Greater-than-or-equal (ordered, nonsignaling) + return std::make_tuple(true, X86::COND_AE); + case _CMP_NLT_US: // 0x05 - Not-less-than (unordered, signaling) + case _CMP_NLT_UQ: // 0x15 - Not-less-than (unordered, nonsignaling) + return std::make_tuple(false, X86::COND_AE); + case _CMP_NEQ_OQ: // 0x0C - Not-equal (ordered, non-signaling) + case _CMP_NEQ_OS: // 0x1C - Not-equal (ordered, signaling) + return std::make_tuple(true, X86::COND_NE); + case _CMP_NEQ_UQ: // 0x04 - Not-equal (unordered, nonsignaling) + case _CMP_NEQ_US: // 0x14 - Not-equal (unordered, signaling) + return std::make_tuple(false, X86::COND_NE); + } +} + } // End llvm namespace #endif diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 6ce1da55aa4..923ad4d069f 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -4676,3 +4676,78 @@ define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i6 ret <8 x i64> %res2 } +define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) { +; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae +; CHECK: vcomisd {sae}, %xmm1, %xmm0 +; CHECK-NEXT: sete %al + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8) + ret i32 %res +} + +define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) { +; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae +; CHECK: vucomisd {sae}, %xmm1, %xmm0 +; CHECK-NEXT: sete %al + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8) + ret i32 %res +} + +define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) { +; CHECK-LABEL: test_x86_avx512_comi_sd_eq +; CHECK: vcomisd %xmm1, %xmm0 +; CHECK-NEXT: sete %al + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4) + ret i32 %res +} + +define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) { +; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq +; CHECK: vucomisd %xmm1, %xmm0 +; CHECK-NEXT: sete %al + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4) + ret i32 %res +} + +define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) { +; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae +; CHECK: vcomisd {sae}, %xmm1, %xmm0 +; CHECK-NEXT: sbbl %eax, %eax + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8) + ret i32 %res +} + +define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) { +; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae +; CHECK: vucomisd {sae}, %xmm1, %xmm0 +; CHECK-NEXT: sbbl %eax, %eax + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8) + ret i32 %res +} + +define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) { +; CHECK-LABEL: test_x86_avx512_comi_sd_lt +; CHECK: vcomisd %xmm1, %xmm0 +; CHECK-NEXT: sbbl %eax, %eax + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4) + ret i32 %res +} + +define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) { +; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt +; CHECK: vucomisd %xmm1, %xmm0 +; CHECK-NEXT: sbbl %eax, %eax + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4) + ret i32 %res +} + +declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32) + +define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) { +; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt +; CHECK: vucomiss %xmm1, %xmm0 +; CHECK-NEXT: sbbl %eax, %eax + %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4) + ret i32 %res +} + +declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32) diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s index dc0ee7e671b..98ec9ff297c 100644 --- a/test/MC/X86/avx512-encodings.s +++ b/test/MC/X86/avx512-encodings.s @@ -18297,3 +18297,130 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2 // CHECK: encoding: [0xc5,0xf9,0x7e,0xaa,0xfc,0xfd,0xff,0xff] vmovd %xmm5, -516(%rdx) +// CHECK: vcomisd %xmm21, %xmm23 +// CHECK: encoding: [0x62,0xa1,0xfd,0x08,0x2f,0xfd] + vcomisd %xmm21, %xmm23 + +// CHECK: vcomisd {sae}, %xmm21, %xmm23 +// CHECK: encoding: [0x62,0xa1,0xfd,0x18,0x2f,0xfd] + vcomisd {sae}, %xmm21, %xmm23 + +// CHECK: vcomisd (%rcx), %xmm23 +// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2f,0x39] + vcomisd (%rcx), %xmm23 + +// CHECK: vcomisd 291(%rax,%r14,8), %xmm23 +// CHECK: encoding: [0x62,0xa1,0xfd,0x08,0x2f,0xbc,0xf0,0x23,0x01,0x00,0x00] + vcomisd 291(%rax,%r14,8), %xmm23 + +// CHECK: vcomisd 1016(%rdx), %xmm23 +// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2f,0x7a,0x7f] + vcomisd 1016(%rdx), %xmm23 + +// CHECK: vcomisd 1024(%rdx), %xmm23 +// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2f,0xba,0x00,0x04,0x00,0x00] + vcomisd 1024(%rdx), %xmm23 + +// CHECK: vcomisd -1024(%rdx), %xmm23 +// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2f,0x7a,0x80] + vcomisd -1024(%rdx), %xmm23 + +// CHECK: vcomisd -1032(%rdx), %xmm23 +// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2f,0xba,0xf8,0xfb,0xff,0xff] + vcomisd -1032(%rdx), %xmm23 + +// CHECK: vcomiss %xmm28, %xmm14 +// CHECK: encoding: [0x62,0x11,0x7c,0x08,0x2f,0xf4] + vcomiss %xmm28, %xmm14 + +// CHECK: vcomiss {sae}, %xmm28, %xmm14 +// CHECK: encoding: [0x62,0x11,0x7c,0x18,0x2f,0xf4] + vcomiss {sae}, %xmm28, %xmm14 + +// CHECK: vcomiss (%rcx), %xmm14 +// CHECK: encoding: [0xc5,0x78,0x2f,0x31] + vcomiss (%rcx), %xmm14 + +// CHECK: vcomiss 291(%rax,%r14,8), %xmm14 +// CHECK: encoding: [0xc4,0x21,0x78,0x2f,0xb4,0xf0,0x23,0x01,0x00,0x00] + vcomiss 291(%rax,%r14,8), %xmm14 + +// CHECK: vcomiss 508(%rdx), %xmm14 +// CHECK: encoding: [0xc5,0x78,0x2f,0xb2,0xfc,0x01,0x00,0x00] + vcomiss 508(%rdx), %xmm14 + +// CHECK: vcomiss 512(%rdx), %xmm14 +// CHECK: encoding: [0xc5,0x78,0x2f,0xb2,0x00,0x02,0x00,0x00] + vcomiss 512(%rdx), %xmm14 + +// CHECK: vcomiss -512(%rdx), %xmm14 +// CHECK: encoding: [0xc5,0x78,0x2f,0xb2,0x00,0xfe,0xff,0xff] + vcomiss -512(%rdx), %xmm14 + +// CHECK: vcomiss -516(%rdx), %xmm14 +// CHECK: encoding: [0xc5,0x78,0x2f,0xb2,0xfc,0xfd,0xff,0xff] + vcomiss -516(%rdx), %xmm14 + +// CHECK: vucomisd %xmm10, %xmm11 +// CHECK: encoding: [0xc4,0x41,0x79,0x2e,0xda] + vucomisd %xmm10, %xmm11 + +// CHECK: vucomisd {sae}, %xmm10, %xmm11 +// CHECK: encoding: [0x62,0x51,0xfd,0x18,0x2e,0xda] + vucomisd {sae}, %xmm10, %xmm11 + +// CHECK: vucomisd (%rcx), %xmm11 +// CHECK: encoding: [0xc5,0x79,0x2e,0x19] + vucomisd (%rcx), %xmm11 + +// CHECK: vucomisd 291(%rax,%r14,8), %xmm11 +// CHECK: encoding: [0xc4,0x21,0x79,0x2e,0x9c,0xf0,0x23,0x01,0x00,0x00] + vucomisd 291(%rax,%r14,8), %xmm11 + +// CHECK: vucomisd 1016(%rdx), %xmm11 +// CHECK: encoding: [0xc5,0x79,0x2e,0x9a,0xf8,0x03,0x00,0x00] + vucomisd 1016(%rdx), %xmm11 + +// CHECK: vucomisd 1024(%rdx), %xmm11 +// CHECK: encoding: [0xc5,0x79,0x2e,0x9a,0x00,0x04,0x00,0x00] + vucomisd 1024(%rdx), %xmm11 + +// CHECK: vucomisd -1024(%rdx), %xmm11 +// CHECK: encoding: [0xc5,0x79,0x2e,0x9a,0x00,0xfc,0xff,0xff] + vucomisd -1024(%rdx), %xmm11 + +// CHECK: vucomisd -1032(%rdx), %xmm11 +// CHECK: encoding: [0xc5,0x79,0x2e,0x9a,0xf8,0xfb,0xff,0xff] + vucomisd -1032(%rdx), %xmm11 + +// CHECK: vucomiss %xmm11, %xmm22 +// CHECK: encoding: [0x62,0xc1,0x7c,0x08,0x2e,0xf3] + vucomiss %xmm11, %xmm22 + +// CHECK: vucomiss {sae}, %xmm11, %xmm22 +// CHECK: encoding: [0x62,0xc1,0x7c,0x18,0x2e,0xf3] + vucomiss {sae}, %xmm11, %xmm22 + +// CHECK: vucomiss (%rcx), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2e,0x31] + vucomiss (%rcx), %xmm22 + +// CHECK: vucomiss 291(%rax,%r14,8), %xmm22 +// CHECK: encoding: [0x62,0xa1,0x7c,0x08,0x2e,0xb4,0xf0,0x23,0x01,0x00,0x00] + vucomiss 291(%rax,%r14,8), %xmm22 + +// CHECK: vucomiss 508(%rdx), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2e,0x72,0x7f] + vucomiss 508(%rdx), %xmm22 + +// CHECK: vucomiss 512(%rdx), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2e,0xb2,0x00,0x02,0x00,0x00] + vucomiss 512(%rdx), %xmm22 + +// CHECK: vucomiss -512(%rdx), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2e,0x72,0x80] + vucomiss -512(%rdx), %xmm22 + +// CHECK: vucomiss -516(%rdx), %xmm22 +// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2e,0xb2,0xfc,0xfd,0xff,0xff] + vucomiss -516(%rdx), %xmm22