1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s
3 declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
5 define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
6 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_512:
8 ; CHECK-NEXT: kmovb %edi, %k1
9 ; CHECK-NEXT: vcvtpd2qq {ru-sae}, %zmm0, %zmm1 {%k1}
10 ; CHECK-NEXT: vcvtpd2qq {rn-sae}, %zmm0, %zmm0
11 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
13 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2)
14 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0)
15 %res2 = add <8 x i64> %res, %res1
19 declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double>, <8 x i64>, i8, i32)
21 define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
22 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_512:
24 ; CHECK-NEXT: kmovb %edi, %k1
25 ; CHECK-NEXT: vcvtpd2uqq {ru-sae}, %zmm0, %zmm1 {%k1}
26 ; CHECK-NEXT: vcvtpd2uqq {rn-sae}, %zmm0, %zmm0
27 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
29 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2)
30 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0)
31 %res2 = add <8 x i64> %res, %res1
35 declare <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float>, <8 x i64>, i8, i32)
37 define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
38 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_512:
40 ; CHECK-NEXT: kmovb %edi, %k1
41 ; CHECK-NEXT: vcvtps2qq {ru-sae}, %ymm0, %zmm1 {%k1}
42 ; CHECK-NEXT: vcvtps2qq {rn-sae}, %ymm0, %zmm0
43 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
45 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2)
46 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0)
47 %res2 = add <8 x i64> %res, %res1
51 declare <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float>, <8 x i64>, i8, i32)
53 define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
54 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_512:
56 ; CHECK-NEXT: kmovb %edi, %k1
57 ; CHECK-NEXT: vcvtps2uqq {ru-sae}, %ymm0, %zmm1 {%k1}
58 ; CHECK-NEXT: vcvtps2uqq {rn-sae}, %ymm0, %zmm0
59 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
61 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2)
62 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0)
63 %res2 = add <8 x i64> %res, %res1
67 declare <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64>, <8 x double>, i8, i32)
69 define <8 x double>@test_int_x86_avx512_mask_cvt_qq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) {
70 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512:
72 ; CHECK-NEXT: kmovb %edi, %k1
73 ; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm1 {%k1}
74 ; CHECK-NEXT: vcvtqq2pd {rn-sae}, %zmm0, %zmm0
75 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
77 %res = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
78 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
79 %res2 = fadd <8 x double> %res, %res1
80 ret <8 x double> %res2
83 declare <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64>, <8 x float>, i8, i32)
85 define <8 x float>@test_int_x86_avx512_mask_cvt_qq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) {
86 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512:
88 ; CHECK-NEXT: kmovb %edi, %k1
89 ; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm1 {%k1}
90 ; CHECK-NEXT: vcvtqq2ps {rn-sae}, %zmm0, %ymm0
91 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
93 %res = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
94 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
95 %res2 = fadd <8 x float> %res, %res1
99 declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
101 define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
102 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512:
104 ; CHECK-NEXT: kmovb %edi, %k1
105 ; CHECK-NEXT: vcvttpd2qq %zmm0, %zmm1 {%k1}
106 ; CHECK-NEXT: vcvttpd2qq {sae}, %zmm0, %zmm0
107 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
109 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4)
110 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
111 %res2 = add <8 x i64> %res, %res1
115 declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double>, <8 x i64>, i8, i32)
117 define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
118 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512:
120 ; CHECK-NEXT: kmovb %edi, %k1
121 ; CHECK-NEXT: vcvttpd2uqq %zmm0, %zmm1 {%k1}
122 ; CHECK-NEXT: vcvttpd2uqq {sae}, %zmm0, %zmm0
123 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
125 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4)
126 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
127 %res2 = add <8 x i64> %res, %res1
131 declare <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float>, <8 x i64>, i8, i32)
133 define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
134 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512:
136 ; CHECK-NEXT: kmovb %edi, %k1
137 ; CHECK-NEXT: vcvttps2qq %ymm0, %zmm1 {%k1}
138 ; CHECK-NEXT: vcvttps2qq {sae}, %ymm0, %zmm0
139 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
141 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4)
142 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
143 %res2 = add <8 x i64> %res, %res1
147 declare <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float>, <8 x i64>, i8, i32)
149 define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
150 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512:
152 ; CHECK-NEXT: kmovb %edi, %k1
153 ; CHECK-NEXT: vcvttps2uqq %ymm0, %zmm1 {%k1}
154 ; CHECK-NEXT: vcvttps2uqq {sae}, %ymm0, %zmm0
155 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
157 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4)
158 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
159 %res2 = add <8 x i64> %res, %res1
163 declare <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64>, <8 x double>, i8, i32)
165 define <8 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) {
166 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512:
168 ; CHECK-NEXT: kmovb %edi, %k1
169 ; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm1 {%k1}
170 ; CHECK-NEXT: vcvtuqq2pd {rn-sae}, %zmm0, %zmm0
171 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
173 %res = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
174 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
175 %res2 = fadd <8 x double> %res, %res1
176 ret <8 x double> %res2
179 declare <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64>, <8 x float>, i8, i32)
181 define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) {
182 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512:
184 ; CHECK-NEXT: kmovb %edi, %k1
185 ; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm1 {%k1}
186 ; CHECK-NEXT: vcvtuqq2ps {rn-sae}, %zmm0, %ymm0
187 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
189 %res = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
190 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
191 %res2 = fadd <8 x float> %res, %res1
192 ret <8 x float> %res2