From 9aaefc3baafefeeed53f13791edae55aac6ba04f Mon Sep 17 00:00:00 2001 From: Igor Breger Date: Mon, 2 Nov 2015 07:39:36 +0000 Subject: [PATCH] AVX512: Implemented encoding and intrinsics for VBROADCASTI32x2 and VBROADCASTF32x2 instructions. Differential Revision: http://reviews.llvm.org/D14216 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@251781 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsX86.td | 25 +++ lib/Target/X86/X86InstrAVX512.td | 40 ++++ lib/Target/X86/X86InstrFragmentsSIMD.td | 4 + lib/Target/X86/X86IntrinsicsInfo.h | 10 + test/CodeGen/X86/avx512dq-intrinsics.ll | 40 ++++ test/CodeGen/X86/avx512dqvl-intrinsics.ll | 61 ++++++ test/MC/X86/x86-64-avx512dq.s | 145 +++++++++++++++ test/MC/X86/x86-64-avx512dq_vl.s | 217 ++++++++++++++++++++++ 8 files changed, 542 insertions(+) diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 03dbb216ad9..4a8c0818196 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -4886,6 +4886,31 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8i64_ty], [llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_broadcastf32x2_256 : + GCCBuiltin<"__builtin_ia32_broadcastf32x2_256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v4f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_broadcastf32x2_512 : + GCCBuiltin<"__builtin_ia32_broadcastf32x2_512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v4f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_broadcasti32x2_128 : + GCCBuiltin<"__builtin_ia32_broadcasti32x2_128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_broadcasti32x2_256 : + GCCBuiltin<"__builtin_ia32_broadcasti32x2_256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_broadcasti32x2_512 : + GCCBuiltin<"__builtin_ia32_broadcasti32x2_512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastd_i32_512 : Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty], [IntrNoMem]>; diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 60ba687da62..64c6e8dd7e9 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1047,6 +1047,46 @@ defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8", EVEX_V512, EVEX_CD8<32, CD8VT8>; } +multiclass avx512_broadcast_32x2 opc, string OpcodeStr, + X86VectorVTInfo _Dst, X86VectorVTInfo _Src, + SDNode OpNode = X86SubVBroadcast> { + + defm r : AVX512_maskable, + T8PD, EVEX; + let mayLoad = 1 in + defm m : AVX512_maskable, + T8PD, EVEX, EVEX_CD8<_Src.EltSize, CD8VT2>; +} + +multiclass avx512_common_broadcast_32x2 opc, string OpcodeStr, + AVX512VLVectorVTInfo _> { + let Predicates = [HasDQI] in + defm Z : avx512_broadcast_32x2, + EVEX_V512; + let Predicates = [HasDQI, HasVLX] in + defm Z256 : avx512_broadcast_32x2, + EVEX_V256; +} + +multiclass avx512_common_broadcast_i32x2 opc, string OpcodeStr, + AVX512VLVectorVTInfo _> : + avx512_common_broadcast_32x2 { + + let Predicates = [HasDQI, HasVLX] in + defm Z128 : avx512_broadcast_32x2, EVEX_V128; +} + +defm VPBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", + avx512vl_i32_info>; +defm VPBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", + avx512vl_f32_info>; + def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))), (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>; def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))), diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index d7c90c1780a..90710bfdfc0 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -382,6 +382,10 @@ def X86Vfpclasss : SDNode<"X86ISD::VFPCLASS", SDTypeProfile<1, 2, [SDTCisInt<0 def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST", SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSubVecOfVec<1, 0>]>, []>; +// SDTCisSubVecOfVec restriction cannot be applied for 128 bit version of VBROADCASTI32x2. +def X86SubV32x2Broadcast : SDNode<"X86ISD::SUBV_BROADCAST", + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>, []>; + def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; def X86Vinsert : SDNode<"X86ISD::VINSERT", SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>; diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 3c3e3316d37..7e7dc3a9e61 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -420,6 +420,16 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_blend_w_128, BLEND, X86ISD::SELECT, 0), X86_INTRINSIC_DATA(avx512_mask_blend_w_256, BLEND, X86ISD::SELECT, 0), X86_INTRINSIC_DATA(avx512_mask_blend_w_512, BLEND, X86ISD::SELECT, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_256, INTR_TYPE_1OP_MASK, + X86ISD::SUBV_BROADCAST, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_512, INTR_TYPE_1OP_MASK, + X86ISD::SUBV_BROADCAST, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_128, INTR_TYPE_1OP_MASK, + X86ISD::SUBV_BROADCAST, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_256, INTR_TYPE_1OP_MASK, + X86ISD::SUBV_BROADCAST, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_512, INTR_TYPE_1OP_MASK, + X86ISD::SUBV_BROADCAST, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_b_128, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_b_256, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_b_512, CMP_MASK_CC, X86ISD::CMPM, 0), diff --git a/test/CodeGen/X86/avx512dq-intrinsics.ll b/test/CodeGen/X86/avx512dq-intrinsics.ll index b36f1ef52c1..c25073aaad4 100644 --- a/test/CodeGen/X86/avx512dq-intrinsics.ll +++ b/test/CodeGen/X86/avx512dq-intrinsics.ll @@ -501,3 +501,43 @@ define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0, i8 %x1) { %res2 = add i8 %res, %res1 ret i8 %res2 } + +declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float>, <16 x float>, i16) + +define <16 x float>@test_int_x86_avx512_mask_broadcastf32x2_512(<4 x float> %x0, <16 x float> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x2_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vbroadcastf32x2 %xmm0, %zmm1 {%k1} +; CHECK-NEXT: vbroadcastf32x2 %xmm0, %zmm2 {%k1} {z} +; CHECK-NEXT: vbroadcastf32x2 %xmm0, %zmm0 +; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1 +; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> %x2, i16 %x3) + %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %x3) + %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> %x2, i16 -1) + %res3 = fadd <16 x float> %res, %res1 + %res4 = fadd <16 x float> %res3, %res2 + ret <16 x float> %res4 +} + +declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x2_512(<4 x i32> %x0, <16 x i32> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vbroadcasti32x2 %xmm0, %zmm1 {%k1} +; CHECK-NEXT: vbroadcasti32x2 %xmm0, %zmm2 {%k1} {z} +; CHECK-NEXT: vbroadcasti32x2 %xmm0, %zmm0 +; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1 +; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> %x2, i16 %x3) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %x3) + %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1) + %res3 = add <16 x i32> %res, %res1 + %res4 = add <16 x i32> %res3, %res2 + ret <16 x i32> %res4 +} diff --git a/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/test/CodeGen/X86/avx512dqvl-intrinsics.ll index 22cfe1924b6..598348fe6c5 100644 --- a/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ b/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -1772,3 +1772,64 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0, i8 %x1) { %res2 = add i8 %res, %res1 ret i8 %res2 } + +declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float>, <8 x float>, i8) + +define <8 x float>@test_int_x86_avx512_mask_broadcastf32x2_256(<4 x float> %x0, <8 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x2_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: vbroadcastf32x2 %xmm0, %ymm1 {%k1} +; CHECK-NEXT: vbroadcastf32x2 %xmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vbroadcastf32x2 %xmm0, %ymm0 +; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %x3) + %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> %x2, i8 -1) + %res3 = fadd <8 x float> %res, %res1 + %res4 = fadd <8 x float> %res3, %res2 + ret <8 x float> %res4 +} + +declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32>, <8 x i32>, i8) + +define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x2_256(<4 x i32> %x0, <8 x i32> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: vbroadcasti32x2 %xmm0, %ymm1 {%k1} +; CHECK-NEXT: vbroadcasti32x2 %xmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vbroadcasti32x2 %xmm0, %ymm0 +; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> %x2, i8 %x3) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %x3) + %res2 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1) + %res3 = add <8 x i32> %res, %res1 + %res4 = add <8 x i32> %res3, %res2 + ret <8 x i32> %res4 +} + +declare <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32>, <4 x i32>, i8) + +define <4 x i32>@test_int_x86_avx512_mask_broadcasti32x2_128(<4 x i32> %x0, <4 x i32> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm0 +; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> %x2, i8 %x3) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x3) + %res2 = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> %x2, i8 -1) + %res3 = add <4 x i32> %res, %res1 + %res4 = add <4 x i32> %res3, %res2 + ret <4 x i32> %res4 +} + diff --git a/test/MC/X86/x86-64-avx512dq.s b/test/MC/X86/x86-64-avx512dq.s index 5c9f579e97c..d0b91d69ebd 100644 --- a/test/MC/X86/x86-64-avx512dq.s +++ b/test/MC/X86/x86-64-avx512dq.s @@ -3991,3 +3991,148 @@ // CHECK: vfpclassss $123, -516(%rdx), %k4 // CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0xa2,0xfc,0xfd,0xff,0xff,0x7b] vfpclassss $0x7b,-516(%rdx), %k4 + +// CHECK: vbroadcasti32x2 %xmm31, %zmm30 +// CHECK: encoding: [0x62,0x02,0x7d,0x48,0x59,0xf7] + vbroadcasti32x2 %xmm31, %zmm30 + +// CHECK: vbroadcasti32x2 %xmm31, %zmm30 {%k5} +// CHECK: encoding: [0x62,0x02,0x7d,0x4d,0x59,0xf7] + vbroadcasti32x2 %xmm31, %zmm30 {%k5} + +// CHECK: vbroadcasti32x2 %xmm31, %zmm30 {%k5} {z} +// CHECK: encoding: [0x62,0x02,0x7d,0xcd,0x59,0xf7] + vbroadcasti32x2 %xmm31, %zmm30 {%k5} {z} + +// CHECK: vbroadcasti32x2 (%rcx), %zmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x59,0x31] + vbroadcasti32x2 (%rcx), %zmm30 + +// CHECK: vbroadcasti32x2 291(%rax,%r14,8), %zmm30 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x59,0xb4,0xf0,0x23,0x01,0x00,0x00] + vbroadcasti32x2 291(%rax,%r14,8), %zmm30 + +// CHECK: vbroadcasti32x2 1016(%rdx), %zmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x59,0x72,0x7f] + vbroadcasti32x2 1016(%rdx), %zmm30 + +// CHECK: vbroadcasti32x2 1024(%rdx), %zmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x59,0xb2,0x00,0x04,0x00,0x00] + vbroadcasti32x2 1024(%rdx), %zmm30 + +// CHECK: vbroadcasti32x2 -1024(%rdx), %zmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x59,0x72,0x80] + vbroadcasti32x2 -1024(%rdx), %zmm30 + +// CHECK: vbroadcasti32x2 -1032(%rdx), %zmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x59,0xb2,0xf8,0xfb,0xff,0xff] + vbroadcasti32x2 -1032(%rdx), %zmm30 + +// CHECK: vbroadcasti32x2 %xmm17, %zmm20 +// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x59,0xe1] + vbroadcasti32x2 %xmm17, %zmm20 + +// CHECK: vbroadcasti32x2 %xmm17, %zmm20 {%k1} +// CHECK: encoding: [0x62,0xa2,0x7d,0x49,0x59,0xe1] + vbroadcasti32x2 %xmm17, %zmm20 {%k1} + +// CHECK: vbroadcasti32x2 %xmm17, %zmm20 {%k1} {z} +// CHECK: encoding: [0x62,0xa2,0x7d,0xc9,0x59,0xe1] + vbroadcasti32x2 %xmm17, %zmm20 {%k1} {z} + +// CHECK: vbroadcasti32x2 (%rcx), %zmm20 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x59,0x21] + vbroadcasti32x2 (%rcx), %zmm20 + +// CHECK: vbroadcasti32x2 4660(%rax,%r14,8), %zmm20 +// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x59,0xa4,0xf0,0x34,0x12,0x00,0x00] + vbroadcasti32x2 4660(%rax,%r14,8), %zmm20 + +// CHECK: vbroadcasti32x2 1016(%rdx), %zmm20 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x59,0x62,0x7f] + vbroadcasti32x2 1016(%rdx), %zmm20 + +// CHECK: vbroadcasti32x2 1024(%rdx), %zmm20 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x59,0xa2,0x00,0x04,0x00,0x00] + vbroadcasti32x2 1024(%rdx), %zmm20 + +// CHECK: vbroadcasti32x2 -1024(%rdx), %zmm20 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x59,0x62,0x80] + vbroadcasti32x2 -1024(%rdx), %zmm20 + +// CHECK: vbroadcasti32x2 -1032(%rdx), %zmm20 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x59,0xa2,0xf8,0xfb,0xff,0xff] + vbroadcasti32x2 -1032(%rdx), %zmm20 + +// CHECK: vbroadcastf32x2 %xmm23, %zmm27 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x19,0xdf] + vbroadcastf32x2 %xmm23, %zmm27 + +// CHECK: vbroadcastf32x2 %xmm23, %zmm27 {%k6} +// CHECK: encoding: [0x62,0x22,0x7d,0x4e,0x19,0xdf] + vbroadcastf32x2 %xmm23, %zmm27 {%k6} + +// CHECK: vbroadcastf32x2 %xmm23, %zmm27 {%k6} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xce,0x19,0xdf] + vbroadcastf32x2 %xmm23, %zmm27 {%k6} {z} + +// CHECK: vbroadcastf32x2 (%rcx), %zmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x19,0x19] + vbroadcastf32x2 (%rcx), %zmm27 + +// CHECK: vbroadcastf32x2 291(%rax,%r14,8), %zmm27 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x19,0x9c,0xf0,0x23,0x01,0x00,0x00] + vbroadcastf32x2 291(%rax,%r14,8), %zmm27 + +// CHECK: vbroadcastf32x2 1016(%rdx), %zmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x19,0x5a,0x7f] + vbroadcastf32x2 1016(%rdx), %zmm27 + +// CHECK: vbroadcastf32x2 1024(%rdx), %zmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x19,0x9a,0x00,0x04,0x00,0x00] + vbroadcastf32x2 1024(%rdx), %zmm27 + +// CHECK: vbroadcastf32x2 -1024(%rdx), %zmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x19,0x5a,0x80] + vbroadcastf32x2 -1024(%rdx), %zmm27 + +// CHECK: vbroadcastf32x2 -1032(%rdx), %zmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x19,0x9a,0xf8,0xfb,0xff,0xff] + vbroadcastf32x2 -1032(%rdx), %zmm27 + +// CHECK: vbroadcastf32x2 %xmm21, %zmm27 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x19,0xdd] + vbroadcastf32x2 %xmm21, %zmm27 + +// CHECK: vbroadcastf32x2 %xmm21, %zmm27 {%k5} +// CHECK: encoding: [0x62,0x22,0x7d,0x4d,0x19,0xdd] + vbroadcastf32x2 %xmm21, %zmm27 {%k5} + +// CHECK: vbroadcastf32x2 %xmm21, %zmm27 {%k5} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xcd,0x19,0xdd] + vbroadcastf32x2 %xmm21, %zmm27 {%k5} {z} + +// CHECK: vbroadcastf32x2 (%rcx), %zmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x19,0x19] + vbroadcastf32x2 (%rcx), %zmm27 + +// CHECK: vbroadcastf32x2 4660(%rax,%r14,8), %zmm27 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x19,0x9c,0xf0,0x34,0x12,0x00,0x00] + vbroadcastf32x2 4660(%rax,%r14,8), %zmm27 + +// CHECK: vbroadcastf32x2 1016(%rdx), %zmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x19,0x5a,0x7f] + vbroadcastf32x2 1016(%rdx), %zmm27 + +// CHECK: vbroadcastf32x2 1024(%rdx), %zmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x19,0x9a,0x00,0x04,0x00,0x00] + vbroadcastf32x2 1024(%rdx), %zmm27 + +// CHECK: vbroadcastf32x2 -1024(%rdx), %zmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x19,0x5a,0x80] + vbroadcastf32x2 -1024(%rdx), %zmm27 + +// CHECK: vbroadcastf32x2 -1032(%rdx), %zmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x19,0x9a,0xf8,0xfb,0xff,0xff] + vbroadcastf32x2 -1032(%rdx), %zmm27 + diff --git a/test/MC/X86/x86-64-avx512dq_vl.s b/test/MC/X86/x86-64-avx512dq_vl.s index 54b77e02190..eef6b0cf33e 100644 --- a/test/MC/X86/x86-64-avx512dq_vl.s +++ b/test/MC/X86/x86-64-avx512dq_vl.s @@ -4607,3 +4607,220 @@ // CHECK: vfpclasspsl $123, -516(%rdx){1to8}, %k3 // CHECK: encoding: [0x62,0xf3,0x7d,0x38,0x66,0x9a,0xfc,0xfd,0xff,0xff,0x7b] vfpclasspsl $0x7b,-516(%rdx){1to8}, %k3 + +// CHECK: vbroadcasti32x2 %xmm30, %xmm30 +// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x59,0xf6] + vbroadcasti32x2 %xmm30, %xmm30 + +// CHECK: vbroadcasti32x2 %xmm30, %xmm30 {%k2} +// CHECK: encoding: [0x62,0x02,0x7d,0x0a,0x59,0xf6] + vbroadcasti32x2 %xmm30, %xmm30 {%k2} + +// CHECK: vbroadcasti32x2 %xmm30, %xmm30 {%k2} {z} +// CHECK: encoding: [0x62,0x02,0x7d,0x8a,0x59,0xf6] + vbroadcasti32x2 %xmm30, %xmm30 {%k2} {z} + +// CHECK: vbroadcasti32x2 (%rcx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x59,0x31] + vbroadcasti32x2 (%rcx), %xmm30 + +// CHECK: vbroadcasti32x2 291(%rax,%r14,8), %xmm30 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x59,0xb4,0xf0,0x23,0x01,0x00,0x00] + vbroadcasti32x2 291(%rax,%r14,8), %xmm30 + +// CHECK: vbroadcasti32x2 1016(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x59,0x72,0x7f] + vbroadcasti32x2 1016(%rdx), %xmm30 + +// CHECK: vbroadcasti32x2 1024(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x59,0xb2,0x00,0x04,0x00,0x00] + vbroadcasti32x2 1024(%rdx), %xmm30 + +// CHECK: vbroadcasti32x2 -1024(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x59,0x72,0x80] + vbroadcasti32x2 -1024(%rdx), %xmm30 + +// CHECK: vbroadcasti32x2 -1032(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x59,0xb2,0xf8,0xfb,0xff,0xff] + vbroadcasti32x2 -1032(%rdx), %xmm30 + +// CHECK: vbroadcasti32x2 %xmm28, %ymm26 +// CHECK: encoding: [0x62,0x02,0x7d,0x28,0x59,0xd4] + vbroadcasti32x2 %xmm28, %ymm26 + +// CHECK: vbroadcasti32x2 %xmm28, %ymm26 {%k7} +// CHECK: encoding: [0x62,0x02,0x7d,0x2f,0x59,0xd4] + vbroadcasti32x2 %xmm28, %ymm26 {%k7} + +// CHECK: vbroadcasti32x2 %xmm28, %ymm26 {%k7} {z} +// CHECK: encoding: [0x62,0x02,0x7d,0xaf,0x59,0xd4] + vbroadcasti32x2 %xmm28, %ymm26 {%k7} {z} + +// CHECK: vbroadcasti32x2 (%rcx), %ymm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x59,0x11] + vbroadcasti32x2 (%rcx), %ymm26 + +// CHECK: vbroadcasti32x2 291(%rax,%r14,8), %ymm26 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x59,0x94,0xf0,0x23,0x01,0x00,0x00] + vbroadcasti32x2 291(%rax,%r14,8), %ymm26 + +// CHECK: vbroadcasti32x2 1016(%rdx), %ymm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x59,0x52,0x7f] + vbroadcasti32x2 1016(%rdx), %ymm26 + +// CHECK: vbroadcasti32x2 1024(%rdx), %ymm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x59,0x92,0x00,0x04,0x00,0x00] + vbroadcasti32x2 1024(%rdx), %ymm26 + +// CHECK: vbroadcasti32x2 -1024(%rdx), %ymm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x59,0x52,0x80] + vbroadcasti32x2 -1024(%rdx), %ymm26 + +// CHECK: vbroadcasti32x2 -1032(%rdx), %ymm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x59,0x92,0xf8,0xfb,0xff,0xff] + vbroadcasti32x2 -1032(%rdx), %ymm26 + +// CHECK: vbroadcasti32x2 %xmm28, %xmm30 +// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x59,0xf4] + vbroadcasti32x2 %xmm28, %xmm30 + +// CHECK: vbroadcasti32x2 %xmm28, %xmm30 {%k6} +// CHECK: encoding: [0x62,0x02,0x7d,0x0e,0x59,0xf4] + vbroadcasti32x2 %xmm28, %xmm30 {%k6} + +// CHECK: vbroadcasti32x2 %xmm28, %xmm30 {%k6} {z} +// CHECK: encoding: [0x62,0x02,0x7d,0x8e,0x59,0xf4] + vbroadcasti32x2 %xmm28, %xmm30 {%k6} {z} + +// CHECK: vbroadcasti32x2 (%rcx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x59,0x31] + vbroadcasti32x2 (%rcx), %xmm30 + +// CHECK: vbroadcasti32x2 4660(%rax,%r14,8), %xmm30 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x59,0xb4,0xf0,0x34,0x12,0x00,0x00] + vbroadcasti32x2 4660(%rax,%r14,8), %xmm30 + +// CHECK: vbroadcasti32x2 1016(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x59,0x72,0x7f] + vbroadcasti32x2 1016(%rdx), %xmm30 + +// CHECK: vbroadcasti32x2 1024(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x59,0xb2,0x00,0x04,0x00,0x00] + vbroadcasti32x2 1024(%rdx), %xmm30 + +// CHECK: vbroadcasti32x2 -1024(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x59,0x72,0x80] + vbroadcasti32x2 -1024(%rdx), %xmm30 + +// CHECK: vbroadcasti32x2 -1032(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x59,0xb2,0xf8,0xfb,0xff,0xff] + vbroadcasti32x2 -1032(%rdx), %xmm30 + +// CHECK: vbroadcasti32x2 %xmm28, %ymm23 +// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x59,0xfc] + vbroadcasti32x2 %xmm28, %ymm23 + +// CHECK: vbroadcasti32x2 %xmm28, %ymm23 {%k1} +// CHECK: encoding: [0x62,0x82,0x7d,0x29,0x59,0xfc] + vbroadcasti32x2 %xmm28, %ymm23 {%k1} + +// CHECK: vbroadcasti32x2 %xmm28, %ymm23 {%k1} {z} +// CHECK: encoding: [0x62,0x82,0x7d,0xa9,0x59,0xfc] + vbroadcasti32x2 %xmm28, %ymm23 {%k1} {z} + +// CHECK: vbroadcasti32x2 (%rcx), %ymm23 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x59,0x39] + vbroadcasti32x2 (%rcx), %ymm23 + +// CHECK: vbroadcasti32x2 4660(%rax,%r14,8), %ymm23 +// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x59,0xbc,0xf0,0x34,0x12,0x00,0x00] + vbroadcasti32x2 4660(%rax,%r14,8), %ymm23 + +// CHECK: vbroadcasti32x2 1016(%rdx), %ymm23 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x59,0x7a,0x7f] + vbroadcasti32x2 1016(%rdx), %ymm23 + +// CHECK: vbroadcasti32x2 1024(%rdx), %ymm23 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x59,0xba,0x00,0x04,0x00,0x00] + vbroadcasti32x2 1024(%rdx), %ymm23 + +// CHECK: vbroadcasti32x2 -1024(%rdx), %ymm23 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x59,0x7a,0x80] + vbroadcasti32x2 -1024(%rdx), %ymm23 + +// CHECK: vbroadcasti32x2 -1032(%rdx), %ymm23 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x59,0xba,0xf8,0xfb,0xff,0xff] + vbroadcasti32x2 -1032(%rdx), %ymm23 + +// CHECK: vbroadcastf32x2 %xmm18, %ymm28 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x19,0xe2] + vbroadcastf32x2 %xmm18, %ymm28 + +// CHECK: vbroadcastf32x2 %xmm18, %ymm28 {%k7} +// CHECK: encoding: [0x62,0x22,0x7d,0x2f,0x19,0xe2] + vbroadcastf32x2 %xmm18, %ymm28 {%k7} + +// CHECK: vbroadcastf32x2 %xmm18, %ymm28 {%k7} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xaf,0x19,0xe2] + vbroadcastf32x2 %xmm18, %ymm28 {%k7} {z} + +// CHECK: vbroadcastf32x2 (%rcx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x19,0x21] + vbroadcastf32x2 (%rcx), %ymm28 + +// CHECK: vbroadcastf32x2 291(%rax,%r14,8), %ymm28 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x19,0xa4,0xf0,0x23,0x01,0x00,0x00] + vbroadcastf32x2 291(%rax,%r14,8), %ymm28 + +// CHECK: vbroadcastf32x2 1016(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x19,0x62,0x7f] + vbroadcastf32x2 1016(%rdx), %ymm28 + +// CHECK: vbroadcastf32x2 1024(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x19,0xa2,0x00,0x04,0x00,0x00] + vbroadcastf32x2 1024(%rdx), %ymm28 + +// CHECK: vbroadcastf32x2 -1024(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x19,0x62,0x80] + vbroadcastf32x2 -1024(%rdx), %ymm28 + +// CHECK: vbroadcastf32x2 -1032(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x19,0xa2,0xf8,0xfb,0xff,0xff] + vbroadcastf32x2 -1032(%rdx), %ymm28 + +// CHECK: vbroadcastf32x2 %xmm26, %ymm19 +// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x19,0xda] + vbroadcastf32x2 %xmm26, %ymm19 + +// CHECK: vbroadcastf32x2 %xmm26, %ymm19 {%k7} +// CHECK: encoding: [0x62,0x82,0x7d,0x2f,0x19,0xda] + vbroadcastf32x2 %xmm26, %ymm19 {%k7} + +// CHECK: vbroadcastf32x2 %xmm26, %ymm19 {%k7} {z} +// CHECK: encoding: [0x62,0x82,0x7d,0xaf,0x19,0xda] + vbroadcastf32x2 %xmm26, %ymm19 {%k7} {z} + +// CHECK: vbroadcastf32x2 (%rcx), %ymm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x19,0x19] + vbroadcastf32x2 (%rcx), %ymm19 + +// CHECK: vbroadcastf32x2 4660(%rax,%r14,8), %ymm19 +// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x19,0x9c,0xf0,0x34,0x12,0x00,0x00] + vbroadcastf32x2 4660(%rax,%r14,8), %ymm19 + +// CHECK: vbroadcastf32x2 1016(%rdx), %ymm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x19,0x5a,0x7f] + vbroadcastf32x2 1016(%rdx), %ymm19 + +// CHECK: vbroadcastf32x2 1024(%rdx), %ymm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x19,0x9a,0x00,0x04,0x00,0x00] + vbroadcastf32x2 1024(%rdx), %ymm19 + +// CHECK: vbroadcastf32x2 -1024(%rdx), %ymm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x19,0x5a,0x80] + vbroadcastf32x2 -1024(%rdx), %ymm19 + +// CHECK: vbroadcastf32x2 -1032(%rdx), %ymm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x19,0x9a,0xf8,0xfb,0xff,0xff] + vbroadcastf32x2 -1032(%rdx), %ymm19 + -- 2.34.1