Intrinsic<[llvm_v8i64_ty],
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrNoMem]>;
+
+ def int_x86_avx512_mask_pshuf_b_128 :
+ GCCBuiltin<"__builtin_ia32_pshufb128_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_pshuf_b_256 :
+ GCCBuiltin<"__builtin_ia32_pshufb256_mask">,
+ Intrinsic<[llvm_v32i8_ty],
+ [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_pshuf_b_512 :
+ GCCBuiltin<"__builtin_ia32_pshufb512_mask">,
+ Intrinsic<[llvm_v64i8_ty],
+ [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
+ [IntrNoMem]>;
}
// Vector blend
X86PShufhw>, EVEX, AVX512XSIi8Base, VEX_W;
defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
X86PShuflw>, EVEX, AVX512XDIi8Base, VEX_W;
+
+multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ let Predicates = [HasBWI] in
+ defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, v64i8_info>, EVEX_V512;
+
+ let Predicates = [HasVLX, HasBWI] in {
+ defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, v32i8x_info>, EVEX_V256;
+ defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, v16i8x_info>, EVEX_V128;
+ }
+}
+
+defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>;
+
//===----------------------------------------------------------------------===//
// AVX-512 - MOVDDUP
//===----------------------------------------------------------------------===//
X86_INTRINSIC_DATA(avx512_mask_por_q_128, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_por_q_256, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshuf_b_128, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshuf_b_256, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_q, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_pslli_d, VSHIFT_MASK, X86ISD::VSHLI, 0),
%res2 = add <32 x i16> %res, %res1
ret <32 x i16> %res2
}
+
+declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpshufb %zmm{{.*}}{%k1}
+define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
+ %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
+ %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
+ %res2 = add <64 x i8> %res, %res1
+ ret <64 x i8> %res2
+}
\ No newline at end of file
%res2 = add <16 x i16> %res, %res1
ret <16 x i16> %res2
}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_128
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpshufb %xmm{{.*}}{%k1}
+define <16 x i8>@test_int_x86_avx512_mask_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
+ %res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
+ %res2 = add <16 x i8> %res, %res1
+ ret <16 x i8> %res2
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_256
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpshufb %ymm{{.*}}{%k1}
+define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
+ %res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
+ %res1 = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
+ %res2 = add <32 x i8> %res, %res1
+ ret <32 x i8> %res2
+}
// CHECK: encoding: [0x62,0x61,0x15,0x40,0xe3,0xaa,0xc0,0xdf,0xff,0xff]
vpavgw -8256(%rdx), %zmm29, %zmm29
+// CHECK: vpshufb %zmm20, %zmm26, %zmm22
+// CHECK: encoding: [0x62,0xa2,0x2d,0x40,0x00,0xf4]
+ vpshufb %zmm20, %zmm26, %zmm22
+
+// CHECK: vpshufb %zmm20, %zmm26, %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xa2,0x2d,0x47,0x00,0xf4]
+ vpshufb %zmm20, %zmm26, %zmm22 {%k7}
+
+// CHECK: vpshufb %zmm20, %zmm26, %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa2,0x2d,0xc7,0x00,0xf4]
+ vpshufb %zmm20, %zmm26, %zmm22 {%k7} {z}
+
+// CHECK: vpshufb (%rcx), %zmm26, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x2d,0x40,0x00,0x31]
+ vpshufb (%rcx), %zmm26, %zmm22
+
+// CHECK: vpshufb 291(%rax,%r14,8), %zmm26, %zmm22
+// CHECK: encoding: [0x62,0xa2,0x2d,0x40,0x00,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpshufb 291(%rax,%r14,8), %zmm26, %zmm22
+
+// CHECK: vpshufb 8128(%rdx), %zmm26, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x2d,0x40,0x00,0x72,0x7f]
+ vpshufb 8128(%rdx), %zmm26, %zmm22
+
+// CHECK: vpshufb 8192(%rdx), %zmm26, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x2d,0x40,0x00,0xb2,0x00,0x20,0x00,0x00]
+ vpshufb 8192(%rdx), %zmm26, %zmm22
+
+// CHECK: vpshufb -8192(%rdx), %zmm26, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x2d,0x40,0x00,0x72,0x80]
+ vpshufb -8192(%rdx), %zmm26, %zmm22
+
+// CHECK: vpshufb -8256(%rdx), %zmm26, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x2d,0x40,0x00,0xb2,0xc0,0xdf,0xff,0xff]
+ vpshufb -8256(%rdx), %zmm26, %zmm22
// CHECK: vpavgw -4128(%rdx), %ymm23, %ymm21
// CHECK: encoding: [0x62,0xe1,0x45,0x20,0xe3,0xaa,0xe0,0xef,0xff,0xff]
vpavgw -4128(%rdx), %ymm23, %ymm21
+
+// CHECK: vpshufb %xmm27, %xmm24, %xmm23
+// CHECK: encoding: [0x62,0x82,0x3d,0x00,0x00,0xfb]
+ vpshufb %xmm27, %xmm24, %xmm23
+
+// CHECK: vpshufb %xmm27, %xmm24, %xmm23 {%k4}
+// CHECK: encoding: [0x62,0x82,0x3d,0x04,0x00,0xfb]
+ vpshufb %xmm27, %xmm24, %xmm23 {%k4}
+
+// CHECK: vpshufb %xmm27, %xmm24, %xmm23 {%k4} {z}
+// CHECK: encoding: [0x62,0x82,0x3d,0x84,0x00,0xfb]
+ vpshufb %xmm27, %xmm24, %xmm23 {%k4} {z}
+
+// CHECK: vpshufb (%rcx), %xmm24, %xmm23
+// CHECK: encoding: [0x62,0xe2,0x3d,0x00,0x00,0x39]
+ vpshufb (%rcx), %xmm24, %xmm23
+
+// CHECK: vpshufb 291(%rax,%r14,8), %xmm24, %xmm23
+// CHECK: encoding: [0x62,0xa2,0x3d,0x00,0x00,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vpshufb 291(%rax,%r14,8), %xmm24, %xmm23
+
+// CHECK: vpshufb 2032(%rdx), %xmm24, %xmm23
+// CHECK: encoding: [0x62,0xe2,0x3d,0x00,0x00,0x7a,0x7f]
+ vpshufb 2032(%rdx), %xmm24, %xmm23
+
+// CHECK: vpshufb 2048(%rdx), %xmm24, %xmm23
+// CHECK: encoding: [0x62,0xe2,0x3d,0x00,0x00,0xba,0x00,0x08,0x00,0x00]
+ vpshufb 2048(%rdx), %xmm24, %xmm23
+
+// CHECK: vpshufb -2048(%rdx), %xmm24, %xmm23
+// CHECK: encoding: [0x62,0xe2,0x3d,0x00,0x00,0x7a,0x80]
+ vpshufb -2048(%rdx), %xmm24, %xmm23
+
+// CHECK: vpshufb -2064(%rdx), %xmm24, %xmm23
+// CHECK: encoding: [0x62,0xe2,0x3d,0x00,0x00,0xba,0xf0,0xf7,0xff,0xff]
+ vpshufb -2064(%rdx), %xmm24, %xmm23
+
+// CHECK: vpshufb %ymm17, %ymm18, %ymm19
+// CHECK: encoding: [0x62,0xa2,0x6d,0x20,0x00,0xd9]
+ vpshufb %ymm17, %ymm18, %ymm19
+
+// CHECK: vpshufb %ymm17, %ymm18, %ymm19 {%k4}
+// CHECK: encoding: [0x62,0xa2,0x6d,0x24,0x00,0xd9]
+ vpshufb %ymm17, %ymm18, %ymm19 {%k4}
+
+// CHECK: vpshufb %ymm17, %ymm18, %ymm19 {%k4} {z}
+// CHECK: encoding: [0x62,0xa2,0x6d,0xa4,0x00,0xd9]
+ vpshufb %ymm17, %ymm18, %ymm19 {%k4} {z}
+
+// CHECK: vpshufb (%rcx), %ymm18, %ymm19
+// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x19]
+ vpshufb (%rcx), %ymm18, %ymm19
+
+// CHECK: vpshufb 291(%rax,%r14,8), %ymm18, %ymm19
+// CHECK: encoding: [0x62,0xa2,0x6d,0x20,0x00,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpshufb 291(%rax,%r14,8), %ymm18, %ymm19
+
+// CHECK: vpshufb 4064(%rdx), %ymm18, %ymm19
+// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x5a,0x7f]
+ vpshufb 4064(%rdx), %ymm18, %ymm19
+
+// CHECK: vpshufb 4096(%rdx), %ymm18, %ymm19
+// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x9a,0x00,0x10,0x00,0x00]
+ vpshufb 4096(%rdx), %ymm18, %ymm19
+
+// CHECK: vpshufb -4096(%rdx), %ymm18, %ymm19
+// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x5a,0x80]
+ vpshufb -4096(%rdx), %ymm18, %ymm19
+
+// CHECK: vpshufb -4128(%rdx), %ymm18, %ymm19
+// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x9a,0xe0,0xef,0xff,0xff]
+ vpshufb -4128(%rdx), %ymm18, %ymm19
+