AVX512: Implemented encoding and intrinsics for VBROADCASTI32x2 and VBROADCASTF32x2...
authorIgor Breger <igor.breger@intel.com>
Mon, 2 Nov 2015 07:39:36 +0000 (07:39 +0000)
committerIgor Breger <igor.breger@intel.com>
Mon, 2 Nov 2015 07:39:36 +0000 (07:39 +0000)
Differential Revision: http://reviews.llvm.org/D14216

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@251781 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/IR/IntrinsicsX86.td
lib/Target/X86/X86InstrAVX512.td
lib/Target/X86/X86InstrFragmentsSIMD.td
lib/Target/X86/X86IntrinsicsInfo.h
test/CodeGen/X86/avx512dq-intrinsics.ll
test/CodeGen/X86/avx512dqvl-intrinsics.ll
test/MC/X86/x86-64-avx512dq.s
test/MC/X86/x86-64-avx512dq_vl.s

index 03dbb216ad91c4bacdb07e2c4d9f1ea5e5bb9f45..4a8c0818196a33e3b123a034cb123c2b74602e7b 100644 (file)
@@ -4886,6 +4886,31 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           Intrinsic<[llvm_v8i64_ty],
                     [llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
 
+  def int_x86_avx512_mask_broadcastf32x2_256 :
+          GCCBuiltin<"__builtin_ia32_broadcastf32x2_256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+                    [llvm_v4f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_broadcastf32x2_512 :
+          GCCBuiltin<"__builtin_ia32_broadcastf32x2_512_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+                    [llvm_v4f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_broadcasti32x2_128 :
+          GCCBuiltin<"__builtin_ia32_broadcasti32x2_128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+                    [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_broadcasti32x2_256 :
+          GCCBuiltin<"__builtin_ia32_broadcasti32x2_256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+                    [llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_broadcasti32x2_512 :
+          GCCBuiltin<"__builtin_ia32_broadcasti32x2_512_mask">,
+          Intrinsic<[llvm_v16i32_ty],
+                    [llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+
   def int_x86_avx512_pbroadcastd_i32_512 :
          Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty], [IntrNoMem]>;
 
index 60ba687da62c8960930c5b722dcebf7a052bf422..64c6e8dd7e904f50897f1d5c20abd28c4f64d27e 100644 (file)
@@ -1047,6 +1047,46 @@ defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8",
                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
 }
 
+multiclass avx512_broadcast_32x2<bits<8> opc, string OpcodeStr,
+                                 X86VectorVTInfo _Dst, X86VectorVTInfo _Src,
+                                 SDNode OpNode = X86SubVBroadcast> {
+
+  defm r : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
+                   (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
+                   (_Dst.VT (OpNode (_Src.VT _Src.RC:$src)))>,
+                   T8PD, EVEX;
+  let mayLoad = 1 in
+    defm m : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
+                   (ins _Src.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
+                   (_Dst.VT (OpNode
+                              (_Src.VT (scalar_to_vector(loadi64 addr:$src)))))>,
+                   T8PD, EVEX, EVEX_CD8<_Src.EltSize, CD8VT2>;
+}
+
+multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
+                             AVX512VLVectorVTInfo _> {
+  let Predicates = [HasDQI] in
+    defm Z :    avx512_broadcast_32x2<opc, OpcodeStr, _.info512, _.info128>,
+                                  EVEX_V512;
+  let Predicates = [HasDQI, HasVLX] in
+    defm Z256 : avx512_broadcast_32x2<opc, OpcodeStr, _.info256, _.info128>,
+                                  EVEX_V256;
+}
+
+multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
+                                                       AVX512VLVectorVTInfo _> :
+  avx512_common_broadcast_32x2<opc, OpcodeStr, _> {
+
+  let Predicates = [HasDQI, HasVLX] in
+    defm Z128 : avx512_broadcast_32x2<opc, OpcodeStr, _.info128, _.info128,
+                                      X86SubV32x2Broadcast>, EVEX_V128;
+}
+
+defm VPBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
+                                           avx512vl_i32_info>;
+defm VPBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
+                                           avx512vl_f32_info>;
+
 def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
           (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
 def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
index d7c90c1780aa847a24cf8732d95b4dcf1ef49685..90710bfdfc04d796453c9a0e1c289f1a632a500c 100644 (file)
@@ -382,6 +382,10 @@ def X86Vfpclasss   : SDNode<"X86ISD::VFPCLASS", SDTypeProfile<1, 2, [SDTCisInt<0
 def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
                     SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
                                          SDTCisSubVecOfVec<1, 0>]>, []>;
+// SDTCisSubVecOfVec restriction cannot be applied for 128 bit version of VBROADCASTI32x2.
+def X86SubV32x2Broadcast : SDNode<"X86ISD::SUBV_BROADCAST",
+                    SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>, []>;
+
 def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
 def X86Vinsert   : SDNode<"X86ISD::VINSERT",  SDTypeProfile<1, 3,
                               [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>;
index 3c3e3316d37e0bcdbc1d55d5308cf09e42655a22..7e7dc3a9e61cb8edec2bc66aa78047ac2a11b63c 100644 (file)
@@ -420,6 +420,16 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_blend_w_128,  BLEND, X86ISD::SELECT, 0),
   X86_INTRINSIC_DATA(avx512_mask_blend_w_256,  BLEND, X86ISD::SELECT, 0),
   X86_INTRINSIC_DATA(avx512_mask_blend_w_512,  BLEND, X86ISD::SELECT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::SUBV_BROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::SUBV_BROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::SUBV_BROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::SUBV_BROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::SUBV_BROADCAST, 0),
   X86_INTRINSIC_DATA(avx512_mask_cmp_b_128,  CMP_MASK_CC, X86ISD::CMPM, 0),
   X86_INTRINSIC_DATA(avx512_mask_cmp_b_256,  CMP_MASK_CC, X86ISD::CMPM, 0),
   X86_INTRINSIC_DATA(avx512_mask_cmp_b_512,  CMP_MASK_CC, X86ISD::CMPM, 0),
index b36f1ef52c18834aa1efb25d490dff01f9d3394b..c25073aaad45a52cd19dca3cbba85c4a10d30374 100644 (file)
@@ -501,3 +501,43 @@ define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0, i8 %x1) {
   %res2 = add i8 %res, %res1
   ret i8 %res2
 }
+
+declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float>, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_broadcastf32x2_512(<4 x float> %x0, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x2_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %zmm0
+; CHECK-NEXT:    vaddps %zmm2, %zmm1, %zmm1
+; CHECK-NEXT:    vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float>  %x0, <16 x float> %x2, i16 %x3)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %x3)
+  %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> %x2, i16 -1)
+  %res3 = fadd <16 x float> %res, %res1
+  %res4 = fadd <16 x float> %res3, %res2
+  ret <16 x float> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x2_512(<4 x i32> %x0, <16 x i32> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
+; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32>  %x0, <16 x i32> %x2, i16 %x3)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %x3)
+  %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1)
+  %res3 = add <16 x i32> %res, %res1
+  %res4 = add <16 x i32> %res3, %res2
+  ret <16 x i32> %res4
+}
index 22cfe1924b6b514e125465a977d5b5f8f5a1294e..598348fe6c5b588c7aba06638c03f3b5d7f528fa 100644 (file)
@@ -1772,3 +1772,64 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0, i8 %x1) {
   %res2 = add i8 %res, %res1
   ret i8 %res2
 }
+
+declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_broadcastf32x2_256(<4 x float> %x0, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x2_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %ymm0
+; CHECK-NEXT:    vaddps %ymm2, %ymm1, %ymm1
+; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float>  %x0, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %x3)
+  %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> %x2, i8 -1)
+  %res3 = fadd <8 x float> %res, %res1
+  %res4 = fadd <8 x float> %res3, %res2
+  ret <8 x float> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x2_256(<4 x i32> %x0, <8 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
+; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32>  %x0, <8 x i32> %x2, i8 %x3)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %x3)
+  %res2 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1)
+  %res3 = add <8 x i32> %res, %res1
+  %res4 = add <8 x i32> %res3, %res2
+  ret <8 x i32> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_broadcasti32x2_128(<4 x i32> %x0, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32>  %x0, <4 x i32> %x2, i8 %x3)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x3)
+  %res2 = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> %x2, i8 -1)
+  %res3 = add <4 x i32> %res, %res1
+  %res4 = add <4 x i32> %res3, %res2
+  ret <4 x i32> %res4
+}
+
index 5c9f579e97c8f4578cd9ae5d1faf989e95957205..d0b91d69ebde7fd7941c66b5c531d5f46a0f08ff 100644 (file)
 // CHECK: vfpclassss $123, -516(%rdx), %k4
 // CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x67,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
           vfpclassss $0x7b,-516(%rdx), %k4
+
+// CHECK: vbroadcasti32x2 %xmm31, %zmm30
+// CHECK:  encoding: [0x62,0x02,0x7d,0x48,0x59,0xf7]
+          vbroadcasti32x2 %xmm31, %zmm30
+
+// CHECK: vbroadcasti32x2 %xmm31, %zmm30 {%k5}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x4d,0x59,0xf7]
+          vbroadcasti32x2 %xmm31, %zmm30 {%k5}
+
+// CHECK: vbroadcasti32x2 %xmm31, %zmm30 {%k5} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0xcd,0x59,0xf7]
+          vbroadcasti32x2 %xmm31, %zmm30 {%k5} {z}
+
+// CHECK: vbroadcasti32x2 (%rcx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x59,0x31]
+          vbroadcasti32x2 (%rcx), %zmm30
+
+// CHECK: vbroadcasti32x2 291(%rax,%r14,8), %zmm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x59,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vbroadcasti32x2 291(%rax,%r14,8), %zmm30
+
+// CHECK: vbroadcasti32x2 1016(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x59,0x72,0x7f]
+          vbroadcasti32x2 1016(%rdx), %zmm30
+
+// CHECK: vbroadcasti32x2 1024(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x59,0xb2,0x00,0x04,0x00,0x00]
+          vbroadcasti32x2 1024(%rdx), %zmm30
+
+// CHECK: vbroadcasti32x2 -1024(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x59,0x72,0x80]
+          vbroadcasti32x2 -1024(%rdx), %zmm30
+
+// CHECK: vbroadcasti32x2 -1032(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x59,0xb2,0xf8,0xfb,0xff,0xff]
+          vbroadcasti32x2 -1032(%rdx), %zmm30
+
+// CHECK: vbroadcasti32x2 %xmm17, %zmm20
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x48,0x59,0xe1]
+          vbroadcasti32x2 %xmm17, %zmm20
+
+// CHECK: vbroadcasti32x2 %xmm17, %zmm20 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x49,0x59,0xe1]
+          vbroadcasti32x2 %xmm17, %zmm20 {%k1}
+
+// CHECK: vbroadcasti32x2 %xmm17, %zmm20 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0xc9,0x59,0xe1]
+          vbroadcasti32x2 %xmm17, %zmm20 {%k1} {z}
+
+// CHECK: vbroadcasti32x2 (%rcx), %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x59,0x21]
+          vbroadcasti32x2 (%rcx), %zmm20
+
+// CHECK: vbroadcasti32x2 4660(%rax,%r14,8), %zmm20
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x48,0x59,0xa4,0xf0,0x34,0x12,0x00,0x00]
+          vbroadcasti32x2 4660(%rax,%r14,8), %zmm20
+
+// CHECK: vbroadcasti32x2 1016(%rdx), %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x59,0x62,0x7f]
+          vbroadcasti32x2 1016(%rdx), %zmm20
+
+// CHECK: vbroadcasti32x2 1024(%rdx), %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x59,0xa2,0x00,0x04,0x00,0x00]
+          vbroadcasti32x2 1024(%rdx), %zmm20
+
+// CHECK: vbroadcasti32x2 -1024(%rdx), %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x59,0x62,0x80]
+          vbroadcasti32x2 -1024(%rdx), %zmm20
+
+// CHECK: vbroadcasti32x2 -1032(%rdx), %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x59,0xa2,0xf8,0xfb,0xff,0xff]
+          vbroadcasti32x2 -1032(%rdx), %zmm20
+
+// CHECK: vbroadcastf32x2 %xmm23, %zmm27
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x19,0xdf]
+          vbroadcastf32x2 %xmm23, %zmm27
+
+// CHECK: vbroadcastf32x2 %xmm23, %zmm27 {%k6}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x4e,0x19,0xdf]
+          vbroadcastf32x2 %xmm23, %zmm27 {%k6}
+
+// CHECK: vbroadcastf32x2 %xmm23, %zmm27 {%k6} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xce,0x19,0xdf]
+          vbroadcastf32x2 %xmm23, %zmm27 {%k6} {z}
+
+// CHECK: vbroadcastf32x2 (%rcx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x19,0x19]
+          vbroadcastf32x2 (%rcx), %zmm27
+
+// CHECK: vbroadcastf32x2 291(%rax,%r14,8), %zmm27
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x19,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vbroadcastf32x2 291(%rax,%r14,8), %zmm27
+
+// CHECK: vbroadcastf32x2 1016(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x19,0x5a,0x7f]
+          vbroadcastf32x2 1016(%rdx), %zmm27
+
+// CHECK: vbroadcastf32x2 1024(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x19,0x9a,0x00,0x04,0x00,0x00]
+          vbroadcastf32x2 1024(%rdx), %zmm27
+
+// CHECK: vbroadcastf32x2 -1024(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x19,0x5a,0x80]
+          vbroadcastf32x2 -1024(%rdx), %zmm27
+
+// CHECK: vbroadcastf32x2 -1032(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x19,0x9a,0xf8,0xfb,0xff,0xff]
+          vbroadcastf32x2 -1032(%rdx), %zmm27
+
+// CHECK: vbroadcastf32x2 %xmm21, %zmm27
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x19,0xdd]
+          vbroadcastf32x2 %xmm21, %zmm27
+
+// CHECK: vbroadcastf32x2 %xmm21, %zmm27 {%k5}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x4d,0x19,0xdd]
+          vbroadcastf32x2 %xmm21, %zmm27 {%k5}
+
+// CHECK: vbroadcastf32x2 %xmm21, %zmm27 {%k5} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xcd,0x19,0xdd]
+          vbroadcastf32x2 %xmm21, %zmm27 {%k5} {z}
+
+// CHECK: vbroadcastf32x2 (%rcx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x19,0x19]
+          vbroadcastf32x2 (%rcx), %zmm27
+
+// CHECK: vbroadcastf32x2 4660(%rax,%r14,8), %zmm27
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x19,0x9c,0xf0,0x34,0x12,0x00,0x00]
+          vbroadcastf32x2 4660(%rax,%r14,8), %zmm27
+
+// CHECK: vbroadcastf32x2 1016(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x19,0x5a,0x7f]
+          vbroadcastf32x2 1016(%rdx), %zmm27
+
+// CHECK: vbroadcastf32x2 1024(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x19,0x9a,0x00,0x04,0x00,0x00]
+          vbroadcastf32x2 1024(%rdx), %zmm27
+
+// CHECK: vbroadcastf32x2 -1024(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x19,0x5a,0x80]
+          vbroadcastf32x2 -1024(%rdx), %zmm27
+
+// CHECK: vbroadcastf32x2 -1032(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x19,0x9a,0xf8,0xfb,0xff,0xff]
+          vbroadcastf32x2 -1032(%rdx), %zmm27
+
index 54b77e0219032bf5c2bd3570a0fe47a2a140019b..eef6b0cf33e2fff99fb2c99a5ff92fcbf3aba992 100644 (file)
 // CHECK: vfpclasspsl $123, -516(%rdx){1to8}, %k3
 // CHECK:  encoding: [0x62,0xf3,0x7d,0x38,0x66,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
           vfpclasspsl $0x7b,-516(%rdx){1to8}, %k3
+
+// CHECK: vbroadcasti32x2 %xmm30, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x7d,0x08,0x59,0xf6]
+          vbroadcasti32x2 %xmm30, %xmm30
+
+// CHECK: vbroadcasti32x2 %xmm30, %xmm30 {%k2}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x0a,0x59,0xf6]
+          vbroadcasti32x2 %xmm30, %xmm30 {%k2}
+
+// CHECK: vbroadcasti32x2 %xmm30, %xmm30 {%k2} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x8a,0x59,0xf6]
+          vbroadcasti32x2 %xmm30, %xmm30 {%k2} {z}
+
+// CHECK: vbroadcasti32x2 (%rcx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x59,0x31]
+          vbroadcasti32x2 (%rcx), %xmm30
+
+// CHECK: vbroadcasti32x2 291(%rax,%r14,8), %xmm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x59,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vbroadcasti32x2 291(%rax,%r14,8), %xmm30
+
+// CHECK: vbroadcasti32x2 1016(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x59,0x72,0x7f]
+          vbroadcasti32x2 1016(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 1024(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x59,0xb2,0x00,0x04,0x00,0x00]
+          vbroadcasti32x2 1024(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 -1024(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x59,0x72,0x80]
+          vbroadcasti32x2 -1024(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 -1032(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x59,0xb2,0xf8,0xfb,0xff,0xff]
+          vbroadcasti32x2 -1032(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 %xmm28, %ymm26
+// CHECK:  encoding: [0x62,0x02,0x7d,0x28,0x59,0xd4]
+          vbroadcasti32x2 %xmm28, %ymm26
+
+// CHECK: vbroadcasti32x2 %xmm28, %ymm26 {%k7}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x2f,0x59,0xd4]
+          vbroadcasti32x2 %xmm28, %ymm26 {%k7}
+
+// CHECK: vbroadcasti32x2 %xmm28, %ymm26 {%k7} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0xaf,0x59,0xd4]
+          vbroadcasti32x2 %xmm28, %ymm26 {%k7} {z}
+
+// CHECK: vbroadcasti32x2 (%rcx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x59,0x11]
+          vbroadcasti32x2 (%rcx), %ymm26
+
+// CHECK: vbroadcasti32x2 291(%rax,%r14,8), %ymm26
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x59,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vbroadcasti32x2 291(%rax,%r14,8), %ymm26
+
+// CHECK: vbroadcasti32x2 1016(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x59,0x52,0x7f]
+          vbroadcasti32x2 1016(%rdx), %ymm26
+
+// CHECK: vbroadcasti32x2 1024(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x59,0x92,0x00,0x04,0x00,0x00]
+          vbroadcasti32x2 1024(%rdx), %ymm26
+
+// CHECK: vbroadcasti32x2 -1024(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x59,0x52,0x80]
+          vbroadcasti32x2 -1024(%rdx), %ymm26
+
+// CHECK: vbroadcasti32x2 -1032(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x59,0x92,0xf8,0xfb,0xff,0xff]
+          vbroadcasti32x2 -1032(%rdx), %ymm26
+
+// CHECK: vbroadcasti32x2 %xmm28, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x7d,0x08,0x59,0xf4]
+          vbroadcasti32x2 %xmm28, %xmm30
+
+// CHECK: vbroadcasti32x2 %xmm28, %xmm30 {%k6}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x0e,0x59,0xf4]
+          vbroadcasti32x2 %xmm28, %xmm30 {%k6}
+
+// CHECK: vbroadcasti32x2 %xmm28, %xmm30 {%k6} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x8e,0x59,0xf4]
+          vbroadcasti32x2 %xmm28, %xmm30 {%k6} {z}
+
+// CHECK: vbroadcasti32x2 (%rcx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x59,0x31]
+          vbroadcasti32x2 (%rcx), %xmm30
+
+// CHECK: vbroadcasti32x2 4660(%rax,%r14,8), %xmm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x59,0xb4,0xf0,0x34,0x12,0x00,0x00]
+          vbroadcasti32x2 4660(%rax,%r14,8), %xmm30
+
+// CHECK: vbroadcasti32x2 1016(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x59,0x72,0x7f]
+          vbroadcasti32x2 1016(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 1024(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x59,0xb2,0x00,0x04,0x00,0x00]
+          vbroadcasti32x2 1024(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 -1024(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x59,0x72,0x80]
+          vbroadcasti32x2 -1024(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 -1032(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x59,0xb2,0xf8,0xfb,0xff,0xff]
+          vbroadcasti32x2 -1032(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 %xmm28, %ymm23
+// CHECK:  encoding: [0x62,0x82,0x7d,0x28,0x59,0xfc]
+          vbroadcasti32x2 %xmm28, %ymm23
+
+// CHECK: vbroadcasti32x2 %xmm28, %ymm23 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x29,0x59,0xfc]
+          vbroadcasti32x2 %xmm28, %ymm23 {%k1}
+
+// CHECK: vbroadcasti32x2 %xmm28, %ymm23 {%k1} {z}
+// CHECK:  encoding: [0x62,0x82,0x7d,0xa9,0x59,0xfc]
+          vbroadcasti32x2 %xmm28, %ymm23 {%k1} {z}
+
+// CHECK: vbroadcasti32x2 (%rcx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x59,0x39]
+          vbroadcasti32x2 (%rcx), %ymm23
+
+// CHECK: vbroadcasti32x2 4660(%rax,%r14,8), %ymm23
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x28,0x59,0xbc,0xf0,0x34,0x12,0x00,0x00]
+          vbroadcasti32x2 4660(%rax,%r14,8), %ymm23
+
+// CHECK: vbroadcasti32x2 1016(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x59,0x7a,0x7f]
+          vbroadcasti32x2 1016(%rdx), %ymm23
+
+// CHECK: vbroadcasti32x2 1024(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x59,0xba,0x00,0x04,0x00,0x00]
+          vbroadcasti32x2 1024(%rdx), %ymm23
+
+// CHECK: vbroadcasti32x2 -1024(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x59,0x7a,0x80]
+          vbroadcasti32x2 -1024(%rdx), %ymm23
+
+// CHECK: vbroadcasti32x2 -1032(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x59,0xba,0xf8,0xfb,0xff,0xff]
+          vbroadcasti32x2 -1032(%rdx), %ymm23
+
+// CHECK: vbroadcastf32x2 %xmm18, %ymm28
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x19,0xe2]
+          vbroadcastf32x2 %xmm18, %ymm28
+
+// CHECK: vbroadcastf32x2 %xmm18, %ymm28 {%k7}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2f,0x19,0xe2]
+          vbroadcastf32x2 %xmm18, %ymm28 {%k7}
+
+// CHECK: vbroadcastf32x2 %xmm18, %ymm28 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xaf,0x19,0xe2]
+          vbroadcastf32x2 %xmm18, %ymm28 {%k7} {z}
+
+// CHECK: vbroadcastf32x2 (%rcx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x19,0x21]
+          vbroadcastf32x2 (%rcx), %ymm28
+
+// CHECK: vbroadcastf32x2 291(%rax,%r14,8), %ymm28
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x19,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vbroadcastf32x2 291(%rax,%r14,8), %ymm28
+
+// CHECK: vbroadcastf32x2 1016(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x19,0x62,0x7f]
+          vbroadcastf32x2 1016(%rdx), %ymm28
+
+// CHECK: vbroadcastf32x2 1024(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x19,0xa2,0x00,0x04,0x00,0x00]
+          vbroadcastf32x2 1024(%rdx), %ymm28
+
+// CHECK: vbroadcastf32x2 -1024(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x19,0x62,0x80]
+          vbroadcastf32x2 -1024(%rdx), %ymm28
+
+// CHECK: vbroadcastf32x2 -1032(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x19,0xa2,0xf8,0xfb,0xff,0xff]
+          vbroadcastf32x2 -1032(%rdx), %ymm28
+
+// CHECK: vbroadcastf32x2 %xmm26, %ymm19
+// CHECK:  encoding: [0x62,0x82,0x7d,0x28,0x19,0xda]
+          vbroadcastf32x2 %xmm26, %ymm19
+
+// CHECK: vbroadcastf32x2 %xmm26, %ymm19 {%k7}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x2f,0x19,0xda]
+          vbroadcastf32x2 %xmm26, %ymm19 {%k7}
+
+// CHECK: vbroadcastf32x2 %xmm26, %ymm19 {%k7} {z}
+// CHECK:  encoding: [0x62,0x82,0x7d,0xaf,0x19,0xda]
+          vbroadcastf32x2 %xmm26, %ymm19 {%k7} {z}
+
+// CHECK: vbroadcastf32x2 (%rcx), %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x19,0x19]
+          vbroadcastf32x2 (%rcx), %ymm19
+
+// CHECK: vbroadcastf32x2 4660(%rax,%r14,8), %ymm19
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x28,0x19,0x9c,0xf0,0x34,0x12,0x00,0x00]
+          vbroadcastf32x2 4660(%rax,%r14,8), %ymm19
+
+// CHECK: vbroadcastf32x2 1016(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x19,0x5a,0x7f]
+          vbroadcastf32x2 1016(%rdx), %ymm19
+
+// CHECK: vbroadcastf32x2 1024(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x19,0x9a,0x00,0x04,0x00,0x00]
+          vbroadcastf32x2 1024(%rdx), %ymm19
+
+// CHECK: vbroadcastf32x2 -1024(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x19,0x5a,0x80]
+          vbroadcastf32x2 -1024(%rdx), %ymm19
+
+// CHECK: vbroadcastf32x2 -1032(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x19,0x9a,0xf8,0xfb,0xff,0xff]
+          vbroadcastf32x2 -1032(%rdx), %ymm19
+