From: Elena Demikhovsky Date: Tue, 23 Jun 2015 08:19:46 +0000 (+0000) Subject: AVX-512: Added all forms of VPABS instruction X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=d96e362b3f1c2b5b33f04df00a7abee415ebf006;p=oota-llvm.git AVX-512: Added all forms of VPABS instruction Added all intrinsics, tests for encoding, tests for intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240386 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 8e1d66849d1..8caedfaf048 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -1417,6 +1417,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". [IntrNoMem]>; } + // Vector blend let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_blendv_pd_256 : GCCBuiltin<"__builtin_ia32_blendvpd256">, @@ -1981,12 +1982,78 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; def int_x86_avx2_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd256">, Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_pabs_d_512 : GCCBuiltin<"__builtin_ia32_pabsd512_mask">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, - llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_mask_pabs_q_512 : GCCBuiltin<"__builtin_ia32_pabsq512_mask">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, - llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_pabs_b_128 : + GCCBuiltin<"__builtin_ia32_pabsb128_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pabs_b_256 : + GCCBuiltin<"__builtin_ia32_pabsb256_mask">, + Intrinsic<[llvm_v32i8_ty], + [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pabs_b_512 : + GCCBuiltin<"__builtin_ia32_pabsb512_mask">, + Intrinsic<[llvm_v64i8_ty], + [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pabs_d_128 : + GCCBuiltin<"__builtin_ia32_pabsd128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pabs_d_256 : + GCCBuiltin<"__builtin_ia32_pabsd256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pabs_d_512 : + GCCBuiltin<"__builtin_ia32_pabsd512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pabs_q_128 : + GCCBuiltin<"__builtin_ia32_pabsq128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pabs_q_256 : + GCCBuiltin<"__builtin_ia32_pabsq256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pabs_q_512 : + GCCBuiltin<"__builtin_ia32_pabsq512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pabs_w_128 : + GCCBuiltin<"__builtin_ia32_pabsw128_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pabs_w_256 : + GCCBuiltin<"__builtin_ia32_pabsw256_mask">, + Intrinsic<[llvm_v16i16_ty], + [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pabs_w_512 : + GCCBuiltin<"__builtin_ia32_pabsw512_mask">, + Intrinsic<[llvm_v32i16_ty], + [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], + [IntrNoMem]>; } // Horizontal arithmetic ops diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d7d8e1e118e..ccf75f3319d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -18334,6 +18334,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::UMIN: return "X86ISD::UMIN"; case X86ISD::SMAX: return "X86ISD::SMAX"; case X86ISD::SMIN: return "X86ISD::SMIN"; + case X86ISD::ABS: return "X86ISD::ABS"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND"; case X86ISD::FMIN: return "X86ISD::FMIN"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 9c98333776c..fdf0e591983 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -238,6 +238,9 @@ namespace llvm { /// Signed integer max and min. SMAX, SMIN, + // Integer absolute value + ABS, + /// Floating point max and min. FMAX, FMIN, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 56c5a639875..b4ccbfb0248 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -5612,77 +5612,6 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1, def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>; def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>; -multiclass avx512_vpabs opc, string OpcodeStr, ValueType OpVT, - RegisterClass KRC, RegisterClass RC, - X86MemOperand x86memop, X86MemOperand x86scalar_mop, - string BrdcstStr> { - def rr : AVX5128I, EVEX; - def rrk : AVX5128I, EVEX, EVEX_K; - def rrkz : AVX5128I, EVEX, EVEX_KZ; - let mayLoad = 1 in { - def rm : AVX5128I, EVEX; - def rmk : AVX5128I, EVEX, EVEX_K; - def rmkz : AVX5128I, EVEX, EVEX_KZ; - def rmb : AVX5128I, EVEX, EVEX_B; - def rmbk : AVX5128I, EVEX, EVEX_B, EVEX_K; - def rmbkz : AVX5128I, EVEX, EVEX_B, EVEX_KZ; - } -} - -defm VPABSDZ : avx512_vpabs<0x1E, "vpabsd", v16i32, VK16WM, VR512, - i512mem, i32mem, "{1to16}">, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPABSQZ : avx512_vpabs<0x1F, "vpabsq", v8i64, VK8WM, VR512, - i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - -def : Pat<(xor - (bc_v16i32 (v16i1sextv16i32)), - (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))), - (VPABSDZrr VR512:$src)>; -def : Pat<(xor - (bc_v8i64 (v8i1sextv8i64)), - (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))), - (VPABSQZrr VR512:$src)>; - -def : Pat<(v16i32 (int_x86_avx512_mask_pabs_d_512 (v16i32 VR512:$src), - (v16i32 immAllZerosV), (i16 -1))), - (VPABSDZrr VR512:$src)>; -def : Pat<(v8i64 (int_x86_avx512_mask_pabs_q_512 (v8i64 VR512:$src), - (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))), - (VPABSQZrr VR512:$src)>; - multiclass avx512_conflict opc, string OpcodeStr, RegisterClass RC, RegisterClass KRC, X86MemOperand x86memop, @@ -6165,3 +6094,91 @@ defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info, avx512vl_f32_info>, EVEX_CD8<32, CD8VF>; defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info, avx512vl_f64_info>, EVEX_CD8<64, CD8VF>, VEX_W; + +multiclass avx512_unary_rm opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm rr : AVX512_maskable, EVEX, AVX5128IBase; + + let mayLoad = 1 in + defm rm : AVX512_maskable, + EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>; +} + +multiclass avx512_unary_rmb opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> : + avx512_unary_rm { + let mayLoad = 1 in + defm rmb : AVX512_maskable, + EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; +} + +multiclass avx512_unary_rm_vl opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo, Predicate prd> { + let Predicates = [prd] in + defm Z : avx512_unary_rm, EVEX_V512; + + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_unary_rm, + EVEX_V256; + defm Z128 : avx512_unary_rm, + EVEX_V128; + } +} + +multiclass avx512_unary_rmb_vl opc, string OpcodeStr, SDNode OpNode, + AVX512VLVectorVTInfo VTInfo, Predicate prd> { + let Predicates = [prd] in + defm Z : avx512_unary_rmb, + EVEX_V512; + + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_unary_rmb, + EVEX_V256; + defm Z128 : avx512_unary_rmb, + EVEX_V128; + } +} + +multiclass avx512_unary_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, + SDNode OpNode, Predicate prd> { + defm Q : avx512_unary_rmb_vl, VEX_W; + defm D : avx512_unary_rmb_vl; +} + +multiclass avx512_unary_rm_vl_bw opc_b, bits<8> opc_w, string OpcodeStr, + SDNode OpNode, Predicate prd> { + defm W : avx512_unary_rm_vl; + defm B : avx512_unary_rm_vl; +} + +multiclass avx512_unary_rm_vl_all opc_b, bits<8> opc_w, + bits<8> opc_d, bits<8> opc_q, + string OpcodeStr, SDNode OpNode> { + defm NAME : avx512_unary_rm_vl_dq, + avx512_unary_rm_vl_bw; +} + +defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", X86Abs>; + +def : Pat<(xor + (bc_v16i32 (v16i1sextv16i32)), + (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))), + (VPABSDZrr VR512:$src)>; +def : Pat<(xor + (bc_v8i64 (v8i1sextv8i64)), + (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))), + (VPABSQZrr VR512:$src)>; diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index de3b3b6516a..6bf589f7bf7 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -251,6 +251,7 @@ def STDFp3SrcRm : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>, def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>; def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>; +def X86Abs : SDNode<"X86ISD::ABS", SDTIntUnaryOp>; def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>; diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index b03f2b3e38e..a15404ce780 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -429,6 +429,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_or_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0), X86_INTRINSIC_DATA(avx512_mask_or_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0), X86_INTRINSIC_DATA(avx512_mask_or_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), + X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0), X86_INTRINSIC_DATA(avx512_mask_packssdw_128, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx512_mask_packssdw_256, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx512_mask_packssdw_512, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0), diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 3495d505dbe..dc3dc0f72d5 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -489,19 +489,31 @@ declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double> } declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32) - define <16 x i32> @test_pabsd(<16 x i32> %a) { - ;CHECK: vpabsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xc0] - %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %a, <16 x i32>zeroinitializer, i16 -1) - ret < 16 x i32> %res - } declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16) - define <8 x i64> @test_pabsq(<8 x i64> %a) { - ;CHECK: vpabsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xc0] - %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %a, <8 x i64>zeroinitializer, i8 -1) - ret <8 x i64> %res - } - declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8) +; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_512 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vpabsd{{.*}}{%k1} +define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { + %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1) + %res2 = add <16 x i32> %res, %res1 + ret <16 x i32> %res2 +} + +declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_512 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vpabsq{{.*}}{%k1} +define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { + %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1) + %res2 = add <8 x i64> %res, %res1 + ret <8 x i64> %res2 +} define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) { ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1] diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll index 7c82d5f724f..9574c016ad5 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -969,4 +969,31 @@ define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> % %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) %res2 = add <64 x i8> %res, %res1 ret <64 x i8> %res2 -} \ No newline at end of file +} + +declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_512 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vpabsw{{.*}}{%k1} +define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { + %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1) + %res2 = add <32 x i16> %res, %res1 + ret <32 x i16> %res2 +} + +declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_512 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vpabsb{{.*}}{%k1} +define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) { + %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) + %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1) + %res2 = add <64 x i8> %res, %res1 + ret <64 x i8> %res2 +} + diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 6272cc86c12..8a662048a64 100644 --- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -3033,3 +3033,56 @@ define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> % %res2 = add <32 x i8> %res, %res1 ret <32 x i8> %res2 } + +declare <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8>, <16 x i8>, i16) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_128 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vpabsb{{.*}}{%k1} +define <16 x i8>@test_int_x86_avx512_mask_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { + %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1) + %res2 = add <16 x i8> %res, %res1 + ret <16 x i8> %res2 +} + +declare <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8>, <32 x i8>, i32) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_256 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vpabsb{{.*}}{%k1} +define <32 x i8>@test_int_x86_avx512_mask_pabs_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) { + %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) + %res1 = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 -1) + %res2 = add <32 x i8> %res, %res1 + ret <32 x i8> %res2 +} + +declare <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16>, <8 x i16>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_128 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vpabsw{{.*}}{%k1} +define <8 x i16>@test_int_x86_avx512_mask_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { + %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1) + %res2 = add <8 x i16> %res, %res1 + ret <8 x i16> %res2 +} + +declare <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16>, <16 x i16>, i16) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_256 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vpabsw{{.*}}{%k1} +define <16 x i16>@test_int_x86_avx512_mask_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { + %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 -1) + %res2 = add <16 x i16> %res, %res1 + ret <16 x i16> %res2 +} + diff --git a/test/CodeGen/X86/avx512vl-intrinsics.ll b/test/CodeGen/X86/avx512vl-intrinsics.ll index b9f03389a8c..b2da994a868 100644 --- a/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -2902,4 +2902,55 @@ define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, < ret <8 x float> %res2 } +declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_128 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vpabsq{{.*}}{%k1} +define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { + %res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) + %res1 = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) + %res2 = add <2 x i64> %res, %res1 + ret <2 x i64> %res2 +} + +declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_256 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vpabsq{{.*}}{%k1} +define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { + %res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) + %res1 = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1) + %res2 = add <4 x i64> %res, %res1 + ret <4 x i64> %res2 +} + +declare <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32>, <4 x i32>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_128 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vpabsd{{.*}}{%k1} +define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { + %res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) + %res2 = add <4 x i32> %res, %res1 + ret <4 x i32> %res2 +} + +declare <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32>, <8 x i32>, i8) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_256 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vpabsd{{.*}}{%k1} +define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { + %res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) + %res2 = add <8 x i32> %res, %res1 + ret <8 x i32> %res2 +} diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s index 05a7b1b82b6..9bd3081ff96 100644 --- a/test/MC/X86/avx512-encodings.s +++ b/test/MC/X86/avx512-encodings.s @@ -9681,3 +9681,115 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2 // CHECK: vexpandps %zmm9, %zmm14 {%k2} {z} // CHECK: encoding: [0x62,0x52,0x7d,0xca,0x88,0xf1] vexpandps %zmm9, %zmm14 {%k2} {z} + +// CHECK: vpabsd %zmm14, %zmm15 +// CHECK: encoding: [0x62,0x52,0x7d,0x48,0x1e,0xfe] + vpabsd %zmm14, %zmm15 + +// CHECK: vpabsd %zmm14, %zmm15 {%k6} +// CHECK: encoding: [0x62,0x52,0x7d,0x4e,0x1e,0xfe] + vpabsd %zmm14, %zmm15 {%k6} + +// CHECK: vpabsd %zmm14, %zmm15 {%k6} {z} +// CHECK: encoding: [0x62,0x52,0x7d,0xce,0x1e,0xfe] + vpabsd %zmm14, %zmm15 {%k6} {z} + +// CHECK: vpabsd (%rcx), %zmm15 +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x1e,0x39] + vpabsd (%rcx), %zmm15 + +// CHECK: vpabsd 291(%rax,%r14,8), %zmm15 +// CHECK: encoding: [0x62,0x32,0x7d,0x48,0x1e,0xbc,0xf0,0x23,0x01,0x00,0x00] + vpabsd 291(%rax,%r14,8), %zmm15 + +// CHECK: vpabsd (%rcx){1to16}, %zmm15 +// CHECK: encoding: [0x62,0x72,0x7d,0x58,0x1e,0x39] + vpabsd (%rcx){1to16}, %zmm15 + +// CHECK: vpabsd 8128(%rdx), %zmm15 +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x1e,0x7a,0x7f] + vpabsd 8128(%rdx), %zmm15 + +// CHECK: vpabsd 8192(%rdx), %zmm15 +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x1e,0xba,0x00,0x20,0x00,0x00] + vpabsd 8192(%rdx), %zmm15 + +// CHECK: vpabsd -8192(%rdx), %zmm15 +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x1e,0x7a,0x80] + vpabsd -8192(%rdx), %zmm15 + +// CHECK: vpabsd -8256(%rdx), %zmm15 +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x1e,0xba,0xc0,0xdf,0xff,0xff] + vpabsd -8256(%rdx), %zmm15 + +// CHECK: vpabsd 508(%rdx){1to16}, %zmm15 +// CHECK: encoding: [0x62,0x72,0x7d,0x58,0x1e,0x7a,0x7f] + vpabsd 508(%rdx){1to16}, %zmm15 + +// CHECK: vpabsd 512(%rdx){1to16}, %zmm15 +// CHECK: encoding: [0x62,0x72,0x7d,0x58,0x1e,0xba,0x00,0x02,0x00,0x00] + vpabsd 512(%rdx){1to16}, %zmm15 + +// CHECK: vpabsd -512(%rdx){1to16}, %zmm15 +// CHECK: encoding: [0x62,0x72,0x7d,0x58,0x1e,0x7a,0x80] + vpabsd -512(%rdx){1to16}, %zmm15 + +// CHECK: vpabsd -516(%rdx){1to16}, %zmm15 +// CHECK: encoding: [0x62,0x72,0x7d,0x58,0x1e,0xba,0xfc,0xfd,0xff,0xff] + vpabsd -516(%rdx){1to16}, %zmm15 + +// CHECK: vpabsq %zmm24, %zmm5 +// CHECK: encoding: [0x62,0x92,0xfd,0x48,0x1f,0xe8] + vpabsq %zmm24, %zmm5 + +// CHECK: vpabsq %zmm24, %zmm5 {%k6} +// CHECK: encoding: [0x62,0x92,0xfd,0x4e,0x1f,0xe8] + vpabsq %zmm24, %zmm5 {%k6} + +// CHECK: vpabsq %zmm24, %zmm5 {%k6} {z} +// CHECK: encoding: [0x62,0x92,0xfd,0xce,0x1f,0xe8] + vpabsq %zmm24, %zmm5 {%k6} {z} + +// CHECK: vpabsq (%rcx), %zmm5 +// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x1f,0x29] + vpabsq (%rcx), %zmm5 + +// CHECK: vpabsq 291(%rax,%r14,8), %zmm5 +// CHECK: encoding: [0x62,0xb2,0xfd,0x48,0x1f,0xac,0xf0,0x23,0x01,0x00,0x00] + vpabsq 291(%rax,%r14,8), %zmm5 + +// CHECK: vpabsq (%rcx){1to8}, %zmm5 +// CHECK: encoding: [0x62,0xf2,0xfd,0x58,0x1f,0x29] + vpabsq (%rcx){1to8}, %zmm5 + +// CHECK: vpabsq 8128(%rdx), %zmm5 +// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x1f,0x6a,0x7f] + vpabsq 8128(%rdx), %zmm5 + +// CHECK: vpabsq 8192(%rdx), %zmm5 +// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xaa,0x00,0x20,0x00,0x00] + vpabsq 8192(%rdx), %zmm5 + +// CHECK: vpabsq -8192(%rdx), %zmm5 +// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x1f,0x6a,0x80] + vpabsq -8192(%rdx), %zmm5 + +// CHECK: vpabsq -8256(%rdx), %zmm5 +// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xaa,0xc0,0xdf,0xff,0xff] + vpabsq -8256(%rdx), %zmm5 + +// CHECK: vpabsq 1016(%rdx){1to8}, %zmm5 +// CHECK: encoding: [0x62,0xf2,0xfd,0x58,0x1f,0x6a,0x7f] + vpabsq 1016(%rdx){1to8}, %zmm5 + +// CHECK: vpabsq 1024(%rdx){1to8}, %zmm5 +// CHECK: encoding: [0x62,0xf2,0xfd,0x58,0x1f,0xaa,0x00,0x04,0x00,0x00] + vpabsq 1024(%rdx){1to8}, %zmm5 + +// CHECK: vpabsq -1024(%rdx){1to8}, %zmm5 +// CHECK: encoding: [0x62,0xf2,0xfd,0x58,0x1f,0x6a,0x80] + vpabsq -1024(%rdx){1to8}, %zmm5 + +// CHECK: vpabsq -1032(%rdx){1to8}, %zmm5 +// CHECK: encoding: [0x62,0xf2,0xfd,0x58,0x1f,0xaa,0xf8,0xfb,0xff,0xff] + vpabsq -1032(%rdx){1to8}, %zmm5 diff --git a/test/MC/X86/avx512vl-encoding.s b/test/MC/X86/avx512vl-encoding.s index deae35f12a3..e3ec448737e 100644 --- a/test/MC/X86/avx512vl-encoding.s +++ b/test/MC/X86/avx512vl-encoding.s @@ -1227,3 +1227,228 @@ // CHECK: vexpandps %ymm29, %ymm29 {%k5} {z} // CHECK: encoding: [0x62,0x02,0x7d,0xad,0x88,0xed] vexpandps %ymm29, %ymm29 {%k5} {z} + +// CHECK: vpabsd %xmm19, %xmm28 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x1e,0xe3] + vpabsd %xmm19, %xmm28 + +// CHECK: vpabsd %xmm19, %xmm28 {%k6} +// CHECK: encoding: [0x62,0x22,0x7d,0x0e,0x1e,0xe3] + vpabsd %xmm19, %xmm28 {%k6} + +// CHECK: vpabsd %xmm19, %xmm28 {%k6} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0x8e,0x1e,0xe3] + vpabsd %xmm19, %xmm28 {%k6} {z} + +// CHECK: vpabsd (%rcx), %xmm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x1e,0x21] + vpabsd (%rcx), %xmm28 + +// CHECK: vpabsd 291(%rax,%r14,8), %xmm28 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x1e,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpabsd 291(%rax,%r14,8), %xmm28 + +// CHECK: vpabsd (%rcx){1to4}, %xmm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x1e,0x21] + vpabsd (%rcx){1to4}, %xmm28 + +// CHECK: vpabsd 2032(%rdx), %xmm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x1e,0x62,0x7f] + vpabsd 2032(%rdx), %xmm28 + +// CHECK: vpabsd 2048(%rdx), %xmm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x1e,0xa2,0x00,0x08,0x00,0x00] + vpabsd 2048(%rdx), %xmm28 + +// CHECK: vpabsd -2048(%rdx), %xmm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x1e,0x62,0x80] + vpabsd -2048(%rdx), %xmm28 + +// CHECK: vpabsd -2064(%rdx), %xmm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x1e,0xa2,0xf0,0xf7,0xff,0xff] + vpabsd -2064(%rdx), %xmm28 + +// CHECK: vpabsd 508(%rdx){1to4}, %xmm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x1e,0x62,0x7f] + vpabsd 508(%rdx){1to4}, %xmm28 + +// CHECK: vpabsd 512(%rdx){1to4}, %xmm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x1e,0xa2,0x00,0x02,0x00,0x00] + vpabsd 512(%rdx){1to4}, %xmm28 + +// CHECK: vpabsd -512(%rdx){1to4}, %xmm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x1e,0x62,0x80] + vpabsd -512(%rdx){1to4}, %xmm28 + +// CHECK: vpabsd -516(%rdx){1to4}, %xmm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x1e,0xa2,0xfc,0xfd,0xff,0xff] + vpabsd -516(%rdx){1to4}, %xmm28 + +// CHECK: vpabsd %ymm18, %ymm25 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x1e,0xca] + vpabsd %ymm18, %ymm25 + +// CHECK: vpabsd %ymm18, %ymm25 {%k2} +// CHECK: encoding: [0x62,0x22,0x7d,0x2a,0x1e,0xca] + vpabsd %ymm18, %ymm25 {%k2} + +// CHECK: vpabsd %ymm18, %ymm25 {%k2} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xaa,0x1e,0xca] + vpabsd %ymm18, %ymm25 {%k2} {z} + +// CHECK: vpabsd (%rcx), %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x1e,0x09] + vpabsd (%rcx), %ymm25 + +// CHECK: vpabsd 291(%rax,%r14,8), %ymm25 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x1e,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpabsd 291(%rax,%r14,8), %ymm25 + +// CHECK: vpabsd (%rcx){1to8}, %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x1e,0x09] + vpabsd (%rcx){1to8}, %ymm25 + +// CHECK: vpabsd 4064(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x1e,0x4a,0x7f] + vpabsd 4064(%rdx), %ymm25 + +// CHECK: vpabsd 4096(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x1e,0x8a,0x00,0x10,0x00,0x00] + vpabsd 4096(%rdx), %ymm25 + +// CHECK: vpabsd -4096(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x1e,0x4a,0x80] + vpabsd -4096(%rdx), %ymm25 + +// CHECK: vpabsd -4128(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x1e,0x8a,0xe0,0xef,0xff,0xff] + vpabsd -4128(%rdx), %ymm25 + +// CHECK: vpabsd 508(%rdx){1to8}, %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x1e,0x4a,0x7f] + vpabsd 508(%rdx){1to8}, %ymm25 + +// CHECK: vpabsd 512(%rdx){1to8}, %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x1e,0x8a,0x00,0x02,0x00,0x00] + vpabsd 512(%rdx){1to8}, %ymm25 + +// CHECK: vpabsd -512(%rdx){1to8}, %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x1e,0x4a,0x80] + vpabsd -512(%rdx){1to8}, %ymm25 + +// CHECK: vpabsd -516(%rdx){1to8}, %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x1e,0x8a,0xfc,0xfd,0xff,0xff] + vpabsd -516(%rdx){1to8}, %ymm25 + +// CHECK: vpabsq %xmm22, %xmm19 +// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x1f,0xde] + vpabsq %xmm22, %xmm19 + +// CHECK: vpabsq %xmm22, %xmm19 {%k2} +// CHECK: encoding: [0x62,0xa2,0xfd,0x0a,0x1f,0xde] + vpabsq %xmm22, %xmm19 {%k2} + +// CHECK: vpabsq %xmm22, %xmm19 {%k2} {z} +// CHECK: encoding: [0x62,0xa2,0xfd,0x8a,0x1f,0xde] + vpabsq %xmm22, %xmm19 {%k2} {z} + +// CHECK: vpabsq (%rcx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x1f,0x19] + vpabsq (%rcx), %xmm19 + +// CHECK: vpabsq 291(%rax,%r14,8), %xmm19 +// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x1f,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpabsq 291(%rax,%r14,8), %xmm19 + +// CHECK: vpabsq (%rcx){1to2}, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x1f,0x19] + vpabsq (%rcx){1to2}, %xmm19 + +// CHECK: vpabsq 2032(%rdx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x1f,0x5a,0x7f] + vpabsq 2032(%rdx), %xmm19 + +// CHECK: vpabsq 2048(%rdx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x1f,0x9a,0x00,0x08,0x00,0x00] + vpabsq 2048(%rdx), %xmm19 + +// CHECK: vpabsq -2048(%rdx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x1f,0x5a,0x80] + vpabsq -2048(%rdx), %xmm19 + +// CHECK: vpabsq -2064(%rdx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x1f,0x9a,0xf0,0xf7,0xff,0xff] + vpabsq -2064(%rdx), %xmm19 + +// CHECK: vpabsq 1016(%rdx){1to2}, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x1f,0x5a,0x7f] + vpabsq 1016(%rdx){1to2}, %xmm19 + +// CHECK: vpabsq 1024(%rdx){1to2}, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x1f,0x9a,0x00,0x04,0x00,0x00] + vpabsq 1024(%rdx){1to2}, %xmm19 + +// CHECK: vpabsq -1024(%rdx){1to2}, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x1f,0x5a,0x80] + vpabsq -1024(%rdx){1to2}, %xmm19 + +// CHECK: vpabsq -1032(%rdx){1to2}, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x1f,0x9a,0xf8,0xfb,0xff,0xff] + vpabsq -1032(%rdx){1to2}, %xmm19 + +// CHECK: vpabsq %ymm17, %ymm22 +// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x1f,0xf1] + vpabsq %ymm17, %ymm22 + +// CHECK: vpabsq %ymm17, %ymm22 {%k6} +// CHECK: encoding: [0x62,0xa2,0xfd,0x2e,0x1f,0xf1] + vpabsq %ymm17, %ymm22 {%k6} + +// CHECK: vpabsq %ymm17, %ymm22 {%k6} {z} +// CHECK: encoding: [0x62,0xa2,0xfd,0xae,0x1f,0xf1] + vpabsq %ymm17, %ymm22 {%k6} {z} + +// CHECK: vpabsq (%rcx), %ymm22 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x1f,0x31] + vpabsq (%rcx), %ymm22 + +// CHECK: vpabsq 291(%rax,%r14,8), %ymm22 +// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x1f,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpabsq 291(%rax,%r14,8), %ymm22 + +// CHECK: vpabsq (%rcx){1to4}, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x1f,0x31] + vpabsq (%rcx){1to4}, %ymm22 + +// CHECK: vpabsq 4064(%rdx), %ymm22 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x1f,0x72,0x7f] + vpabsq 4064(%rdx), %ymm22 + +// CHECK: vpabsq 4096(%rdx), %ymm22 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x1f,0xb2,0x00,0x10,0x00,0x00] + vpabsq 4096(%rdx), %ymm22 + +// CHECK: vpabsq -4096(%rdx), %ymm22 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x1f,0x72,0x80] + vpabsq -4096(%rdx), %ymm22 + +// CHECK: vpabsq -4128(%rdx), %ymm22 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x1f,0xb2,0xe0,0xef,0xff,0xff] + vpabsq -4128(%rdx), %ymm22 + +// CHECK: vpabsq 1016(%rdx){1to4}, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x1f,0x72,0x7f] + vpabsq 1016(%rdx){1to4}, %ymm22 + +// CHECK: vpabsq 1024(%rdx){1to4}, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x1f,0xb2,0x00,0x04,0x00,0x00] + vpabsq 1024(%rdx){1to4}, %ymm22 + +// CHECK: vpabsq -1024(%rdx){1to4}, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x1f,0x72,0x80] + vpabsq -1024(%rdx){1to4}, %ymm22 + +// CHECK: vpabsq -1032(%rdx){1to4}, %ymm22 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x1f,0xb2,0xf8,0xfb,0xff,0xff] + vpabsq -1032(%rdx){1to4}, %ymm22 + diff --git a/test/MC/X86/x86-64-avx512bw.s b/test/MC/X86/x86-64-avx512bw.s index 5473124540e..fc6df8c2d40 100644 --- a/test/MC/X86/x86-64-avx512bw.s +++ b/test/MC/X86/x86-64-avx512bw.s @@ -3595,3 +3595,75 @@ // CHECK: vpshufb -8256(%rdx), %zmm26, %zmm22 // CHECK: encoding: [0x62,0xe2,0x2d,0x40,0x00,0xb2,0xc0,0xdf,0xff,0xff] vpshufb -8256(%rdx), %zmm26, %zmm22 + +// CHECK: vpabsb %zmm27, %zmm17 +// CHECK: encoding: [0x62,0x82,0x7d,0x48,0x1c,0xcb] + vpabsb %zmm27, %zmm17 + +// CHECK: vpabsb %zmm27, %zmm17 {%k7} +// CHECK: encoding: [0x62,0x82,0x7d,0x4f,0x1c,0xcb] + vpabsb %zmm27, %zmm17 {%k7} + +// CHECK: vpabsb %zmm27, %zmm17 {%k7} {z} +// CHECK: encoding: [0x62,0x82,0x7d,0xcf,0x1c,0xcb] + vpabsb %zmm27, %zmm17 {%k7} {z} + +// CHECK: vpabsb (%rcx), %zmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x1c,0x09] + vpabsb (%rcx), %zmm17 + +// CHECK: vpabsb 291(%rax,%r14,8), %zmm17 +// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x1c,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpabsb 291(%rax,%r14,8), %zmm17 + +// CHECK: vpabsb 8128(%rdx), %zmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x1c,0x4a,0x7f] + vpabsb 8128(%rdx), %zmm17 + +// CHECK: vpabsb 8192(%rdx), %zmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x1c,0x8a,0x00,0x20,0x00,0x00] + vpabsb 8192(%rdx), %zmm17 + +// CHECK: vpabsb -8192(%rdx), %zmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x1c,0x4a,0x80] + vpabsb -8192(%rdx), %zmm17 + +// CHECK: vpabsb -8256(%rdx), %zmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x1c,0x8a,0xc0,0xdf,0xff,0xff] + vpabsb -8256(%rdx), %zmm17 + +// CHECK: vpabsw %zmm24, %zmm30 +// CHECK: encoding: [0x62,0x02,0x7d,0x48,0x1d,0xf0] + vpabsw %zmm24, %zmm30 + +// CHECK: vpabsw %zmm24, %zmm30 {%k6} +// CHECK: encoding: [0x62,0x02,0x7d,0x4e,0x1d,0xf0] + vpabsw %zmm24, %zmm30 {%k6} + +// CHECK: vpabsw %zmm24, %zmm30 {%k6} {z} +// CHECK: encoding: [0x62,0x02,0x7d,0xce,0x1d,0xf0] + vpabsw %zmm24, %zmm30 {%k6} {z} + +// CHECK: vpabsw (%rcx), %zmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x1d,0x31] + vpabsw (%rcx), %zmm30 + +// CHECK: vpabsw 291(%rax,%r14,8), %zmm30 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x1d,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpabsw 291(%rax,%r14,8), %zmm30 + +// CHECK: vpabsw 8128(%rdx), %zmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x1d,0x72,0x7f] + vpabsw 8128(%rdx), %zmm30 + +// CHECK: vpabsw 8192(%rdx), %zmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x1d,0xb2,0x00,0x20,0x00,0x00] + vpabsw 8192(%rdx), %zmm30 + +// CHECK: vpabsw -8192(%rdx), %zmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x1d,0x72,0x80] + vpabsw -8192(%rdx), %zmm30 + +// CHECK: vpabsw -8256(%rdx), %zmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x1d,0xb2,0xc0,0xdf,0xff,0xff] + vpabsw -8256(%rdx), %zmm30