From: Elena Demikhovsky Date: Mon, 22 Jun 2015 13:00:42 +0000 (+0000) Subject: AVX-512: added VPSHUFB instruction - all SKX forms X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=114489ab2446e4ebff595d7f99953f6ebcb020a9;p=oota-llvm.git AVX-512: added VPSHUFB instruction - all SKX forms Added intrinsics and encoding tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240277 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index ded4a7af7bc..8e1d66849d1 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -1397,6 +1397,24 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_pshuf_b_128 : + GCCBuiltin<"__builtin_ia32_pshufb128_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pshuf_b_256 : + GCCBuiltin<"__builtin_ia32_pshufb256_mask">, + Intrinsic<[llvm_v32i8_ty], + [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_pshuf_b_512 : + GCCBuiltin<"__builtin_ia32_pshufb512_mask">, + Intrinsic<[llvm_v64i8_ty], + [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], + [IntrNoMem]>; } // Vector blend diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index c60b11cbe31..56c5a639875 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -3870,6 +3870,19 @@ defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", X86PShufhw>, EVEX, AVX512XSIi8Base, VEX_W; defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", X86PShuflw>, EVEX, AVX512XDIi8Base, VEX_W; + +multiclass avx512_pshufb_sizes opc, string OpcodeStr, SDNode OpNode> { + let Predicates = [HasBWI] in + defm Z: avx512_var_shift, EVEX_V512; + + let Predicates = [HasVLX, HasBWI] in { + defm Z256: avx512_var_shift, EVEX_V256; + defm Z128: avx512_var_shift, EVEX_V128; + } +} + +defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>; + //===----------------------------------------------------------------------===// // AVX-512 - MOVDDUP //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index b3007906886..b03f2b3e38e 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -582,6 +582,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_por_q_128, INTR_TYPE_2OP_MASK, ISD::OR, 0), X86_INTRINSIC_DATA(avx512_mask_por_q_256, INTR_TYPE_2OP_MASK, ISD::OR, 0), X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0), + X86_INTRINSIC_DATA(avx512_mask_pshuf_b_128, INTR_TYPE_2OP_MASK, + X86ISD::PSHUFB, 0), + X86_INTRINSIC_DATA(avx512_mask_pshuf_b_256, INTR_TYPE_2OP_MASK, + X86ISD::PSHUFB, 0), + X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK, + X86ISD::PSHUFB, 0), X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), X86_INTRINSIC_DATA(avx512_mask_psll_q, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0), X86_INTRINSIC_DATA(avx512_mask_pslli_d, VSHIFT_MASK, X86ISD::VSHLI, 0), diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll index 06d44872d1e..7c82d5f724f 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -957,3 +957,16 @@ define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %res2 = add <32 x i16> %res, %res1 ret <32 x i16> %res2 } + +declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_512 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vpshufb %zmm{{.*}}{%k1} +define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { + %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) + %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) + %res2 = add <64 x i8> %res, %res1 + ret <64 x i8> %res2 +} \ No newline at end of file diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 6cd6c1de805..6272cc86c12 100644 --- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -3007,3 +3007,29 @@ define <16 x i16>@test_int_x86_avx512_mask_pavg_w_256(<16 x i16> %x0, <16 x i16> %res2 = add <16 x i16> %res, %res1 ret <16 x i16> %res2 } + +declare <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_128 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vpshufb %xmm{{.*}}{%k1} +define <16 x i8>@test_int_x86_avx512_mask_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { + %res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) + %res2 = add <16 x i8> %res, %res1 + ret <16 x i8> %res2 +} + +declare <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) + +; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_256 +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vpshufb %ymm{{.*}}{%k1} +define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { + %res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) + %res1 = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) + %res2 = add <32 x i8> %res, %res1 + ret <32 x i8> %res2 +} diff --git a/test/MC/X86/x86-64-avx512bw.s b/test/MC/X86/x86-64-avx512bw.s index 45e746308cb..5473124540e 100644 --- a/test/MC/X86/x86-64-avx512bw.s +++ b/test/MC/X86/x86-64-avx512bw.s @@ -3560,3 +3560,38 @@ // CHECK: encoding: [0x62,0x61,0x15,0x40,0xe3,0xaa,0xc0,0xdf,0xff,0xff] vpavgw -8256(%rdx), %zmm29, %zmm29 +// CHECK: vpshufb %zmm20, %zmm26, %zmm22 +// CHECK: encoding: [0x62,0xa2,0x2d,0x40,0x00,0xf4] + vpshufb %zmm20, %zmm26, %zmm22 + +// CHECK: vpshufb %zmm20, %zmm26, %zmm22 {%k7} +// CHECK: encoding: [0x62,0xa2,0x2d,0x47,0x00,0xf4] + vpshufb %zmm20, %zmm26, %zmm22 {%k7} + +// CHECK: vpshufb %zmm20, %zmm26, %zmm22 {%k7} {z} +// CHECK: encoding: [0x62,0xa2,0x2d,0xc7,0x00,0xf4] + vpshufb %zmm20, %zmm26, %zmm22 {%k7} {z} + +// CHECK: vpshufb (%rcx), %zmm26, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x2d,0x40,0x00,0x31] + vpshufb (%rcx), %zmm26, %zmm22 + +// CHECK: vpshufb 291(%rax,%r14,8), %zmm26, %zmm22 +// CHECK: encoding: [0x62,0xa2,0x2d,0x40,0x00,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpshufb 291(%rax,%r14,8), %zmm26, %zmm22 + +// CHECK: vpshufb 8128(%rdx), %zmm26, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x2d,0x40,0x00,0x72,0x7f] + vpshufb 8128(%rdx), %zmm26, %zmm22 + +// CHECK: vpshufb 8192(%rdx), %zmm26, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x2d,0x40,0x00,0xb2,0x00,0x20,0x00,0x00] + vpshufb 8192(%rdx), %zmm26, %zmm22 + +// CHECK: vpshufb -8192(%rdx), %zmm26, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x2d,0x40,0x00,0x72,0x80] + vpshufb -8192(%rdx), %zmm26, %zmm22 + +// CHECK: vpshufb -8256(%rdx), %zmm26, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x2d,0x40,0x00,0xb2,0xc0,0xdf,0xff,0xff] + vpshufb -8256(%rdx), %zmm26, %zmm22 diff --git a/test/MC/X86/x86-64-avx512bw_vl.s b/test/MC/X86/x86-64-avx512bw_vl.s index 991c6102ebd..14a87df1ea8 100644 --- a/test/MC/X86/x86-64-avx512bw_vl.s +++ b/test/MC/X86/x86-64-avx512bw_vl.s @@ -6510,3 +6510,76 @@ // CHECK: vpavgw -4128(%rdx), %ymm23, %ymm21 // CHECK: encoding: [0x62,0xe1,0x45,0x20,0xe3,0xaa,0xe0,0xef,0xff,0xff] vpavgw -4128(%rdx), %ymm23, %ymm21 + +// CHECK: vpshufb %xmm27, %xmm24, %xmm23 +// CHECK: encoding: [0x62,0x82,0x3d,0x00,0x00,0xfb] + vpshufb %xmm27, %xmm24, %xmm23 + +// CHECK: vpshufb %xmm27, %xmm24, %xmm23 {%k4} +// CHECK: encoding: [0x62,0x82,0x3d,0x04,0x00,0xfb] + vpshufb %xmm27, %xmm24, %xmm23 {%k4} + +// CHECK: vpshufb %xmm27, %xmm24, %xmm23 {%k4} {z} +// CHECK: encoding: [0x62,0x82,0x3d,0x84,0x00,0xfb] + vpshufb %xmm27, %xmm24, %xmm23 {%k4} {z} + +// CHECK: vpshufb (%rcx), %xmm24, %xmm23 +// CHECK: encoding: [0x62,0xe2,0x3d,0x00,0x00,0x39] + vpshufb (%rcx), %xmm24, %xmm23 + +// CHECK: vpshufb 291(%rax,%r14,8), %xmm24, %xmm23 +// CHECK: encoding: [0x62,0xa2,0x3d,0x00,0x00,0xbc,0xf0,0x23,0x01,0x00,0x00] + vpshufb 291(%rax,%r14,8), %xmm24, %xmm23 + +// CHECK: vpshufb 2032(%rdx), %xmm24, %xmm23 +// CHECK: encoding: [0x62,0xe2,0x3d,0x00,0x00,0x7a,0x7f] + vpshufb 2032(%rdx), %xmm24, %xmm23 + +// CHECK: vpshufb 2048(%rdx), %xmm24, %xmm23 +// CHECK: encoding: [0x62,0xe2,0x3d,0x00,0x00,0xba,0x00,0x08,0x00,0x00] + vpshufb 2048(%rdx), %xmm24, %xmm23 + +// CHECK: vpshufb -2048(%rdx), %xmm24, %xmm23 +// CHECK: encoding: [0x62,0xe2,0x3d,0x00,0x00,0x7a,0x80] + vpshufb -2048(%rdx), %xmm24, %xmm23 + +// CHECK: vpshufb -2064(%rdx), %xmm24, %xmm23 +// CHECK: encoding: [0x62,0xe2,0x3d,0x00,0x00,0xba,0xf0,0xf7,0xff,0xff] + vpshufb -2064(%rdx), %xmm24, %xmm23 + +// CHECK: vpshufb %ymm17, %ymm18, %ymm19 +// CHECK: encoding: [0x62,0xa2,0x6d,0x20,0x00,0xd9] + vpshufb %ymm17, %ymm18, %ymm19 + +// CHECK: vpshufb %ymm17, %ymm18, %ymm19 {%k4} +// CHECK: encoding: [0x62,0xa2,0x6d,0x24,0x00,0xd9] + vpshufb %ymm17, %ymm18, %ymm19 {%k4} + +// CHECK: vpshufb %ymm17, %ymm18, %ymm19 {%k4} {z} +// CHECK: encoding: [0x62,0xa2,0x6d,0xa4,0x00,0xd9] + vpshufb %ymm17, %ymm18, %ymm19 {%k4} {z} + +// CHECK: vpshufb (%rcx), %ymm18, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x19] + vpshufb (%rcx), %ymm18, %ymm19 + +// CHECK: vpshufb 291(%rax,%r14,8), %ymm18, %ymm19 +// CHECK: encoding: [0x62,0xa2,0x6d,0x20,0x00,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpshufb 291(%rax,%r14,8), %ymm18, %ymm19 + +// CHECK: vpshufb 4064(%rdx), %ymm18, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x5a,0x7f] + vpshufb 4064(%rdx), %ymm18, %ymm19 + +// CHECK: vpshufb 4096(%rdx), %ymm18, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x9a,0x00,0x10,0x00,0x00] + vpshufb 4096(%rdx), %ymm18, %ymm19 + +// CHECK: vpshufb -4096(%rdx), %ymm18, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x5a,0x80] + vpshufb -4096(%rdx), %ymm18, %ymm19 + +// CHECK: vpshufb -4128(%rdx), %ymm18, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x9a,0xe0,0xef,0xff,0xff] + vpshufb -4128(%rdx), %ymm18, %ymm19 +