def int_x86_avx512_mask_vcvtph2ps_512 : GCCBuiltin<"__builtin_ia32_vcvtph2ps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16i16_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_vcvtph2ps_256 : GCCBuiltin<"__builtin_ia32_vcvtph2ps256_mask">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty, llvm_v8f32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_vcvtph2ps_128 : GCCBuiltin<"__builtin_ia32_vcvtph2ps128_mask">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty, llvm_v4f32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vcvtps2ph_512 : GCCBuiltin<"__builtin_ia32_vcvtps2ph512_mask">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty, llvm_i32_ty,
llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
//===----------------------------------------------------------------------===//
// Half precision conversion instructions
//===----------------------------------------------------------------------===//
-multiclass avx512_cvtph2ps<RegisterClass destRC, RegisterClass srcRC,
- X86MemOperand x86memop> {
- def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
- "vcvtph2ps\t{$src, $dst|$dst, $src}",
- []>, EVEX;
- let hasSideEffects = 0, mayLoad = 1 in
- def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
- "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
+multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
+ X86MemOperand x86memop, PatFrag ld_frag> {
+ defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
+ "vcvtph2ps", "$src", "$src",
+ (X86cvtph2ps (_src.VT _src.RC:$src),
+ (i32 FROUND_CURRENT))>, T8PD;
+ let hasSideEffects = 0, mayLoad = 1 in {
+ defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src),
+ "vcvtph2ps", "$src", "$src",
+ (X86cvtph2ps (_src.VT (bitconvert (ld_frag addr:$src))),
+ (i32 FROUND_CURRENT))>, T8PD;
+ }
+}
+
+multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
+ X86MemOperand x86memop> {
+ defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
+ "vcvtph2ps", "{sae}, $src", "$src, {sae}",
+ (X86cvtph2ps (_src.VT _src.RC:$src),
+ (i32 FROUND_NO_EXC))>, T8PD, EVEX_B;
+
+}
+
+let Predicates = [HasAVX512] in {
+ defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64>,
+ avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, f256mem>,
+ EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
+ let Predicates = [HasVLX] in {
+ defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
+ loadv2i64>,EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
+ defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
+ loadv2i64>, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
+ }
}
multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
}
-defm VCVTPH2PSZ : avx512_cvtph2ps<VR512, VR256X, f256mem>, EVEX_V512,
- EVEX_CD8<32, CD8VH>;
defm VCVTPS2PHZ : avx512_cvtps2ph<VR256X, VR512, f256mem>, EVEX_V512,
EVEX_CD8<32, CD8VH>;
imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))),
(VCVTPS2PHZrr VR512:$src, imm:$rc)>;
-def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src),
- (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))),
- (VCVTPH2PSZrr VR256X:$src)>;
-
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
"ucomiss">, PS, EVEX, VEX_LIG,
def X86cvtpd2Int : SDNode<"X86ISD::FP_TO_SINT_RND", SDTDoubleToInt>;
def X86cvtpd2UInt : SDNode<"X86ISD::FP_TO_UINT_RND", SDTDoubleToInt>;
+def X86cvtph2ps : SDNode<"ISD::FP16_TO_FP",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCVecEltisVT<0, f32>,
+ SDTCVecEltisVT<1, i16>,
+ SDTCisFP<0>, SDTCisInt<2>]> >;
+
def X86vfpextRnd : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCisFP<1>,
X86ISD::VALIGN, 0),
X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_IMM8_MASK,
X86ISD::VALIGN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK_RM,
+ ISD::FP16_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK_RM,
+ ISD::FP16_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK_RM,
+ ISD::FP16_TO_FP, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD,
declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
+ ; CHECK: test_x86_vcvtph2ps_512
; CHECK: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
%res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
ret <16 x float> %res
}
+
+define <16 x float> @test_x86_vcvtph2ps_512_sae(<16 x i16> %a0) {
+; CHECK: test_x86_vcvtph2ps_512_sae
+ ; CHECK: vcvtph2ps {sae}, %ymm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_x86_vcvtph2ps_512_rrk(<16 x i16> %a0,<16 x float> %a1, i16 %mask) {
+ ; CHECK: test_x86_vcvtph2ps_512_rrk
+ ; CHECK: vcvtph2ps %ymm0, %zmm1 {%k1}
+ %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> %a1, i16 %mask, i32 4)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_x86_vcvtph2ps_512_sae_rrkz(<16 x i16> %a0, i16 %mask) {
+ ; CHECK: test_x86_vcvtph2ps_512_sae_rrkz
+ ; CHECK: vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z}
+ %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 8)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_x86_vcvtph2ps_512_rrkz(<16 x i16> %a0, i16 %mask) {
+ ; CHECK: test_x86_vcvtph2ps_512_rrkz
+ ; CHECK: vcvtph2ps %ymm0, %zmm0 {%k1} {z}
+ %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4)
+ ret <16 x float> %res
+}
+
declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
ret <2 x i64> %res4
}
+define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) {
+ ; CHECK: test_x86_vcvtph2ps_128
+ ; CHECK: vcvtph2ps %xmm0, %xmm0
+ %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_vcvtph2ps_128_rrk(<8 x i16> %a0,<4 x float> %a1, i8 %mask) {
+ ; CHECK: test_x86_vcvtph2ps_128_rrk
+ ; CHECK: vcvtph2ps %xmm0, %xmm1 {%k1}
+ %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> %a1, i8 %mask)
+ ret <4 x float> %res
+}
+
+
+define <4 x float> @test_x86_vcvtph2ps_128_rrkz(<8 x i16> %a0, i8 %mask) {
+ ; CHECK: test_x86_vcvtph2ps_128_rrkz
+ ; CHECK: vcvtph2ps %xmm0, %xmm0 {%k1} {z}
+ %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16>, <4 x float>, i8) nounwind readonly
+
+define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) {
+ ; CHECK: test_x86_vcvtph2ps_256
+ ; CHECK: vcvtph2ps %xmm0, %ymm0
+ %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_vcvtph2ps_256_rrk(<8 x i16> %a0,<8 x float> %a1, i8 %mask) {
+ ; CHECK: test_x86_vcvtph2ps_256_rrk
+ ; CHECK: vcvtph2ps %xmm0, %ymm1 {%k1}
+ %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> %a1, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_vcvtph2ps_256_rrkz(<8 x i16> %a0, i8 %mask) {
+ ; CHECK: test_x86_vcvtph2ps_256_rrkz
+ ; CHECK: vcvtph2ps %xmm0, %ymm0 {%k1} {z}
+ %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16>, <8 x float>, i8) nounwind readonly
// CHECK: encoding: [0x62,0xd2,0xfd,0x48,0x7c,0xc8]
vpbroadcastq %r8, %zmm1
+// CHECK: vcvtph2ps %ymm27, %zmm13
+// CHECK: encoding: [0x62,0x12,0x7d,0x48,0x13,0xeb]
+ vcvtph2ps %ymm27, %zmm13
+
+// CHECK: vcvtph2ps %ymm27, %zmm13 {%k3}
+// CHECK: encoding: [0x62,0x12,0x7d,0x4b,0x13,0xeb]
+ vcvtph2ps %ymm27, %zmm13 {%k3}
+
+// CHECK: vcvtph2ps %ymm27, %zmm13 {%k3} {z}
+// CHECK: encoding: [0x62,0x12,0x7d,0xcb,0x13,0xeb]
+ vcvtph2ps %ymm27, %zmm13 {%k3} {z}
+
+// CHECK: vcvtph2ps {sae}, %ymm27, %zmm13
+// CHECK: encoding: [0x62,0x12,0x7d,0x18,0x13,0xeb]
+ vcvtph2ps {sae}, %ymm27, %zmm13
+
+// CHECK: vcvtph2ps (%rcx), %zmm13
+// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x13,0x29]
+ vcvtph2ps (%rcx), %zmm13
+
+// CHECK: vcvtph2ps 291(%rax,%r14,8), %zmm13
+// CHECK: encoding: [0x62,0x32,0x7d,0x48,0x13,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vcvtph2ps 291(%rax,%r14,8), %zmm13
+
+// CHECK: vcvtph2ps 4064(%rdx), %zmm13
+// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x13,0x6a,0x7f]
+ vcvtph2ps 4064(%rdx), %zmm13
+
+// CHECK: vcvtph2ps 4096(%rdx), %zmm13
+// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x13,0xaa,0x00,0x10,0x00,0x00]
+ vcvtph2ps 4096(%rdx), %zmm13
+
+// CHECK: vcvtph2ps -4096(%rdx), %zmm13
+// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x13,0x6a,0x80]
+ vcvtph2ps -4096(%rdx), %zmm13
+
+// CHECK: vcvtph2ps -4128(%rdx), %zmm13
+// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x13,0xaa,0xe0,0xef,0xff,0xff]
+ vcvtph2ps -4128(%rdx), %zmm13
// CHECK: encoding: [0x62,0xc2,0xfd,0x28,0x7c,0xd8]
vpbroadcastq %r8, %ymm19
+// CHECK: vcvtph2ps %xmm17, %xmm27
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x13,0xd9]
+ vcvtph2ps %xmm17, %xmm27
+
+// CHECK: vcvtph2ps %xmm17, %xmm27 {%k2}
+// CHECK: encoding: [0x62,0x22,0x7d,0x0a,0x13,0xd9]
+ vcvtph2ps %xmm17, %xmm27 {%k2}
+
+// CHECK: vcvtph2ps %xmm17, %xmm27 {%k2} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0x8a,0x13,0xd9]
+ vcvtph2ps %xmm17, %xmm27 {%k2} {z}
+
+// CHECK: vcvtph2ps (%rcx), %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x13,0x19]
+ vcvtph2ps (%rcx), %xmm27
+
+// CHECK: vcvtph2ps 291(%rax,%r14,8), %xmm27
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x13,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vcvtph2ps 291(%rax,%r14,8), %xmm27
+
+// CHECK: vcvtph2ps 1016(%rdx), %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x13,0x5a,0x7f]
+ vcvtph2ps 1016(%rdx), %xmm27
+
+// CHECK: vcvtph2ps 1024(%rdx), %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x13,0x9a,0x00,0x04,0x00,0x00]
+ vcvtph2ps 1024(%rdx), %xmm27
+
+// CHECK: vcvtph2ps -1024(%rdx), %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x13,0x5a,0x80]
+ vcvtph2ps -1024(%rdx), %xmm27
+
+// CHECK: vcvtph2ps -1032(%rdx), %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x13,0x9a,0xf8,0xfb,0xff,0xff]
+ vcvtph2ps -1032(%rdx), %xmm27
+
+// CHECK: vcvtph2ps %xmm22, %ymm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x13,0xf6]
+ vcvtph2ps %xmm22, %ymm30
+
+// CHECK: vcvtph2ps %xmm22, %ymm30 {%k7}
+// CHECK: encoding: [0x62,0x22,0x7d,0x2f,0x13,0xf6]
+ vcvtph2ps %xmm22, %ymm30 {%k7}
+
+// CHECK: vcvtph2ps %xmm22, %ymm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xaf,0x13,0xf6]
+ vcvtph2ps %xmm22, %ymm30 {%k7} {z}
+
+// CHECK: vcvtph2ps (%rcx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x13,0x31]
+ vcvtph2ps (%rcx), %ymm30
+
+// CHECK: vcvtph2ps 291(%rax,%r14,8), %ymm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x13,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vcvtph2ps 291(%rax,%r14,8), %ymm30
+
+// CHECK: vcvtph2ps 2032(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x13,0x72,0x7f]
+ vcvtph2ps 2032(%rdx), %ymm30
+
+// CHECK: vcvtph2ps 2048(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x13,0xb2,0x00,0x08,0x00,0x00]
+ vcvtph2ps 2048(%rdx), %ymm30
+
+// CHECK: vcvtph2ps -2048(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x13,0x72,0x80]
+ vcvtph2ps -2048(%rdx), %ymm30
+
+// CHECK: vcvtph2ps -2064(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x13,0xb2,0xf0,0xf7,0xff,0xff]
+ vcvtph2ps -2064(%rdx), %ymm30