1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
4 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
5 define i32 @test_kortestz(i16 %a0, i16 %a1) {
6 ; CHECK-LABEL: test_kortestz:
8 ; CHECK-NEXT: kmovw %esi, %k0
9 ; CHECK-NEXT: kmovw %edi, %k1
10 ; CHECK-NEXT: kortestw %k0, %k1
11 ; CHECK-NEXT: sete %al
12 ; CHECK-NEXT: kmovw %eax, %k0
13 ; CHECK-NEXT: kmovw %k0, %eax
14 ; CHECK-NEXT: andl $1, %eax
16 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
20 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
21 define i32 @test_kortestc(i16 %a0, i16 %a1) {
22 ; CHECK-LABEL: test_kortestc:
24 ; CHECK-NEXT: kmovw %esi, %k0
25 ; CHECK-NEXT: kmovw %edi, %k1
26 ; CHECK-NEXT: kortestw %k0, %k1
27 ; CHECK-NEXT: sbbl %eax, %eax
28 ; CHECK-NEXT: andl $1, %eax
30 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
34 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
35 define i16 @test_kand(i16 %a0, i16 %a1) {
36 ; CHECK-LABEL: test_kand:
38 ; CHECK-NEXT: movw $8, %ax
39 ; CHECK-NEXT: kmovw %eax, %k0
40 ; CHECK-NEXT: kmovw %edi, %k1
41 ; CHECK-NEXT: kandw %k0, %k1, %k0
42 ; CHECK-NEXT: kmovw %esi, %k1
43 ; CHECK-NEXT: kandw %k1, %k0, %k0
44 ; CHECK-NEXT: kmovw %k0, %eax
46 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
47 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
51 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
52 define i16 @test_knot(i16 %a0) {
53 ; CHECK-LABEL: test_knot:
55 ; CHECK-NEXT: kmovw %edi, %k0
56 ; CHECK-NEXT: knotw %k0, %k0
57 ; CHECK-NEXT: kmovw %k0, %eax
59 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
63 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
65 define i16 @unpckbw_test(i16 %a0, i16 %a1) {
66 ; CHECK-LABEL: unpckbw_test:
68 ; CHECK-NEXT: kmovw %edi, %k0
69 ; CHECK-NEXT: kmovw %esi, %k1
70 ; CHECK-NEXT: kunpckbw %k1, %k0, %k0
71 ; CHECK-NEXT: kmovw %k0, %eax
73 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
77 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
78 ; CHECK-LABEL: test_rcp_ps_512:
80 ; CHECK-NEXT: vrcp14ps %zmm0, %zmm0
82 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
85 declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
87 define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
88 ; CHECK-LABEL: test_rcp_pd_512:
90 ; CHECK-NEXT: vrcp14pd %zmm0, %zmm0
92 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
95 declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
97 declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
99 define <8 x double> @test7(<8 x double> %a) {
100 ; CHECK-LABEL: test7:
102 ; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0
104 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
108 declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
110 define <16 x float> @test8(<16 x float> %a) {
111 ; CHECK-LABEL: test8:
113 ; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0
115 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
119 define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
120 ; CHECK-LABEL: test_rsqrt_ps_512:
122 ; CHECK-NEXT: vrsqrt14ps %zmm0, %zmm0
124 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
125 ret <16 x float> %res
127 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
129 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
130 ; CHECK-LABEL: test_rsqrt14_ss:
132 ; CHECK-NEXT: vrsqrt14ss %xmm0, %xmm0, %xmm0
134 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
137 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
139 define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
140 ; CHECK-LABEL: test_rcp14_ss:
142 ; CHECK-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0
144 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
147 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
149 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
150 ; CHECK-LABEL: test_sqrt_pd_512:
152 ; CHECK-NEXT: vsqrtpd %zmm0, %zmm0
154 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
155 ret <8 x double> %res
157 declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
159 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
160 ; CHECK-LABEL: test_sqrt_ps_512:
162 ; CHECK-NEXT: vsqrtps %zmm0, %zmm0
164 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
165 ret <16 x float> %res
167 define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
168 ; CHECK-LABEL: test_sqrt_round_ps_512:
170 ; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0
172 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3)
173 ret <16 x float> %res
175 declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
177 define <8 x double> @test_getexp_pd_512(<8 x double> %a0) {
178 ; CHECK-LABEL: test_getexp_pd_512:
180 ; CHECK-NEXT: vgetexppd %zmm0, %zmm0
182 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
183 ret <8 x double> %res
185 define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) {
186 ; CHECK-LABEL: test_getexp_round_pd_512:
188 ; CHECK-NEXT: vgetexppd {sae}, %zmm0, %zmm0
190 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
191 ret <8 x double> %res
193 declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
195 define <16 x float> @test_getexp_ps_512(<16 x float> %a0) {
196 ; CHECK-LABEL: test_getexp_ps_512:
198 ; CHECK-NEXT: vgetexpps %zmm0, %zmm0
200 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
201 ret <16 x float> %res
204 define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) {
205 ; CHECK-LABEL: test_getexp_round_ps_512:
207 ; CHECK-NEXT: vgetexpps {sae}, %zmm0, %zmm0
209 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
210 ret <16 x float> %res
212 declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
214 declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
216 define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
217 ; CHECK-LABEL: test_sqrt_ss:
219 ; CHECK-NEXT: andl $1, %edi
220 ; CHECK-NEXT: kmovw %edi, %k1
221 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
222 ; CHECK-NEXT: vsqrtss %xmm1, %xmm0, %xmm3 {%k1}
223 ; CHECK-NEXT: vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
224 ; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
225 ; CHECK-NEXT: vsqrtss {rz-sae}, %xmm1, %xmm0, %xmm0
226 ; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1
227 ; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0
228 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
230 %res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
231 %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
232 %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 2)
233 %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 3)
235 %res.1 = fadd <4 x float> %res0, %res1
236 %res.2 = fadd <4 x float> %res2, %res3
237 %res = fadd <4 x float> %res.1, %res.2
241 declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
243 define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
244 ; CHECK-LABEL: test_sqrt_sd:
246 ; CHECK-NEXT: andl $1, %edi
247 ; CHECK-NEXT: kmovw %edi, %k1
248 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
249 ; CHECK-NEXT: vsqrtsd %xmm1, %xmm0, %xmm3 {%k1}
250 ; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
251 ; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
252 ; CHECK-NEXT: vsqrtsd {rz-sae}, %xmm1, %xmm0, %xmm0
253 ; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1
254 ; CHECK-NEXT: vaddpd %xmm0, %xmm4, %xmm0
255 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
257 %res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
258 %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
259 %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 2)
260 %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 3)
262 %res.1 = fadd <2 x double> %res0, %res1
263 %res.2 = fadd <2 x double> %res2, %res3
264 %res = fadd <2 x double> %res.1, %res.2
265 ret <2 x double> %res
268 define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
269 ; CHECK-LABEL: test_x86_sse2_cvtsd2si64:
271 ; CHECK-NEXT: vcvtsd2si %xmm0, %rax
273 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
276 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
278 define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
279 ; CHECK-LABEL: test_x86_sse2_cvtsi642sd:
281 ; CHECK-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0
283 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
284 ret <2 x double> %res
286 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
288 define i64 @test_x86_avx512_cvttsd2si64(<2 x double> %a0) {
289 ; CHECK-LABEL: test_x86_avx512_cvttsd2si64:
291 ; CHECK-NEXT: vcvttsd2si %xmm0, %rcx
292 ; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %rax
293 ; CHECK-NEXT: addq %rcx, %rax
295 %res0 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 4) ;
296 %res1 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 8) ;
297 %res2 = add i64 %res0, %res1
300 declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32) nounwind readnone
302 define i32 @test_x86_avx512_cvttsd2usi(<2 x double> %a0) {
303 ; CHECK-LABEL: test_x86_avx512_cvttsd2usi:
305 ; CHECK-NEXT: vcvttsd2usi %xmm0, %ecx
306 ; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %eax
307 ; CHECK-NEXT: addl %ecx, %eax
309 %res0 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 4) ;
310 %res1 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 8) ;
311 %res2 = add i32 %res0, %res1
314 declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone
316 define i32 @test_x86_avx512_cvttsd2si(<2 x double> %a0) {
317 ; CHECK-LABEL: test_x86_avx512_cvttsd2si:
319 ; CHECK-NEXT: vcvttsd2si %xmm0, %ecx
320 ; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %eax
321 ; CHECK-NEXT: addl %ecx, %eax
323 %res0 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 4) ;
324 %res1 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 8) ;
325 %res2 = add i32 %res0, %res1
328 declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone
332 define i64 @test_x86_avx512_cvttsd2usi64(<2 x double> %a0) {
333 ; CHECK-LABEL: test_x86_avx512_cvttsd2usi64:
335 ; CHECK-NEXT: vcvttsd2usi %xmm0, %rcx
336 ; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %rax
337 ; CHECK-NEXT: addq %rcx, %rax
339 %res0 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 4) ;
340 %res1 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 8) ;
341 %res2 = add i64 %res0, %res1
344 declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) nounwind readnone
346 define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
347 ; CHECK-LABEL: test_x86_sse_cvtss2si64:
349 ; CHECK-NEXT: vcvtss2si %xmm0, %rax
351 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
354 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
357 define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
358 ; CHECK-LABEL: test_x86_sse_cvtsi642ss:
360 ; CHECK-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0
362 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
365 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
368 define i32 @test_x86_avx512_cvttss2si(<4 x float> %a0) {
369 ; CHECK-LABEL: test_x86_avx512_cvttss2si:
371 ; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %ecx
372 ; CHECK-NEXT: vcvttss2si %xmm0, %eax
373 ; CHECK-NEXT: addl %ecx, %eax
375 %res0 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 8) ;
376 %res1 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 4) ;
377 %res2 = add i32 %res0, %res1
380 declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone
382 define i64 @test_x86_avx512_cvttss2si64(<4 x float> %a0) {
383 ; CHECK-LABEL: test_x86_avx512_cvttss2si64:
385 ; CHECK-NEXT: vcvttss2si %xmm0, %rcx
386 ; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %rax
387 ; CHECK-NEXT: addq %rcx, %rax
389 %res0 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 4) ;
390 %res1 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 8) ;
391 %res2 = add i64 %res0, %res1
394 declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32) nounwind readnone
396 define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) {
397 ; CHECK-LABEL: test_x86_avx512_cvttss2usi:
399 ; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %ecx
400 ; CHECK-NEXT: vcvttss2usi %xmm0, %eax
401 ; CHECK-NEXT: addl %ecx, %eax
403 %res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ;
404 %res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ;
405 %res2 = add i32 %res0, %res1
408 declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone
410 define i64 @test_x86_avx512_cvttss2usi64(<4 x float> %a0) {
411 ; CHECK-LABEL: test_x86_avx512_cvttss2usi64:
413 ; CHECK-NEXT: vcvttss2usi %xmm0, %rcx
414 ; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %rax
415 ; CHECK-NEXT: addq %rcx, %rax
417 %res0 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 4) ;
418 %res1 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 8) ;
419 %res2 = add i64 %res0, %res1
422 declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone
424 define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
425 ; CHECK-LABEL: test_x86_avx512_cvtsd2usi64:
427 ; CHECK-NEXT: vcvtsd2usi %xmm0, %rax
429 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
432 declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
434 define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
435 ; CHECK-LABEL: test_x86_vcvtph2ps_512:
437 ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0
439 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
440 ret <16 x float> %res
443 define <16 x float> @test_x86_vcvtph2ps_512_sae(<16 x i16> %a0) {
444 ; CHECK-LABEL: test_x86_vcvtph2ps_512_sae:
446 ; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0
448 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
449 ret <16 x float> %res
452 define <16 x float> @test_x86_vcvtph2ps_512_rrk(<16 x i16> %a0,<16 x float> %a1, i16 %mask) {
453 ; CHECK-LABEL: test_x86_vcvtph2ps_512_rrk:
455 ; CHECK-NEXT: kmovw %edi, %k1
456 ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm1 {%k1}
457 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
459 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> %a1, i16 %mask, i32 4)
460 ret <16 x float> %res
463 define <16 x float> @test_x86_vcvtph2ps_512_sae_rrkz(<16 x i16> %a0, i16 %mask) {
464 ; CHECK-LABEL: test_x86_vcvtph2ps_512_sae_rrkz:
466 ; CHECK-NEXT: kmovw %edi, %k1
467 ; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z}
469 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 8)
470 ret <16 x float> %res
473 define <16 x float> @test_x86_vcvtph2ps_512_rrkz(<16 x i16> %a0, i16 %mask) {
474 ; CHECK-LABEL: test_x86_vcvtph2ps_512_rrkz:
476 ; CHECK-NEXT: kmovw %edi, %k1
477 ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0 {%k1} {z}
479 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4)
480 ret <16 x float> %res
483 declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
486 define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
487 ; CHECK-LABEL: test_x86_vcvtps2ph_256:
489 ; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm0
491 %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
495 declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
497 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
498 ; CHECK-LABEL: test_x86_vbroadcast_ss_512:
500 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0
502 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
503 ret <16 x float> %res
505 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
507 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
508 ; CHECK-LABEL: test_x86_vbroadcast_sd_512:
510 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0
512 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
513 ret <8 x double> %res
515 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
517 define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0, <16 x float> %a1, i16 %mask ) {
518 ; CHECK-LABEL: test_x86_vbroadcast_ss_ps_512:
519 ; CHECK: kmovw %edi, %k1
520 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
521 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm2 {%k1} {z}
522 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0
523 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
525 %res = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> zeroinitializer, i16 -1)
526 %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> %a1, i16 %mask)
527 %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> zeroinitializer, i16 %mask)
528 %res3 = fadd <16 x float> %res, %res1
529 %res4 = fadd <16 x float> %res2, %res3
530 ret <16 x float> %res4
532 declare <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float>, <16 x float>, i16) nounwind readonly
535 define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0, <8 x double> %a1, i8 %mask ) {
536 ; CHECK-LABEL: test_x86_vbroadcast_sd_pd_512:
537 ; CHECK: kmovw %eax, %k1
538 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
539 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm2 {%k1} {z}
540 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0
541 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
543 %res = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> zeroinitializer, i8 -1)
544 %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> %a1, i8 %mask)
545 %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> zeroinitializer, i8 %mask)
546 %res3 = fadd <8 x double> %res, %res1
547 %res4 = fadd <8 x double> %res2, %res3
548 ret <8 x double> %res4
550 declare <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double>, <8 x double>, i8) nounwind readonly
552 define <16 x i32>@test_int_x86_avx512_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) {
553 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_512:
555 ; CHECK-NEXT: kmovw %edi, %k1
556 ; CHECK-NEXT: vpbroadcastd %xmm0, %zmm1 {%k1}
557 ; CHECK-NEXT: vpbroadcastd %xmm0, %zmm2 {%k1} {z}
558 ; CHECK-NEXT: vpbroadcastd %xmm0, %zmm0
559 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
560 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
562 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 -1)
563 %res1 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask)
564 %res2 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
565 %res3 = add <16 x i32> %res, %res1
566 %res4 = add <16 x i32> %res2, %res3
569 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16)
571 define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
572 ; CHECK-LABEL: test_x86_pbroadcastd_i32_512:
574 ; CHECK-NEXT: vpbroadcastd %edi, %zmm0
576 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
579 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
581 define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1, i8 %mask) {
582 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_512:
584 ; CHECK-NEXT: movzbl %dil, %eax
585 ; CHECK-NEXT: kmovw %eax, %k1
586 ; CHECK-NEXT: vpbroadcastq %xmm0, %zmm1 {%k1}
587 ; CHECK-NEXT: vpbroadcastq %xmm0, %zmm2 {%k1} {z}
588 ; CHECK-NEXT: vpbroadcastq %xmm0, %zmm0
589 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
590 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
592 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 -1)
593 %res1 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 %mask)
594 %res2 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> zeroinitializer,i8 %mask)
595 %res3 = add <8 x i64> %res, %res1
596 %res4 = add <8 x i64> %res2, %res3
599 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>, <8 x i64>, i8)
601 define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
602 ; CHECK-LABEL: test_x86_pbroadcastq_i64_512:
604 ; CHECK-NEXT: vpbroadcastq %rdi, %zmm0
606 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
609 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
611 define <16 x i32> @test_conflict_d(<16 x i32> %a) {
612 ; CHECK-LABEL: test_conflict_d:
614 ; CHECK-NEXT: vpconflictd %zmm0, %zmm0
616 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
620 declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
622 define <8 x i64> @test_conflict_q(<8 x i64> %a) {
623 ; CHECK-LABEL: test_conflict_q:
625 ; CHECK-NEXT: vpconflictq %zmm0, %zmm0
627 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
631 declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
633 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
634 ; CHECK-LABEL: test_maskz_conflict_d:
636 ; CHECK-NEXT: kmovw %edi, %k1
637 ; CHECK-NEXT: vpconflictd %zmm0, %zmm0 {%k1} {z}
639 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
643 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
644 ; CHECK-LABEL: test_mask_conflict_q:
646 ; CHECK-NEXT: movzbl %dil, %eax
647 ; CHECK-NEXT: kmovw %eax, %k1
648 ; CHECK-NEXT: vpconflictq %zmm0, %zmm1 {%k1}
649 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
651 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
655 define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
656 ; CHECK-LABEL: test_lzcnt_d:
658 ; CHECK-NEXT: vplzcntd %zmm0, %zmm0
660 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
664 declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
666 define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
667 ; CHECK-LABEL: test_lzcnt_q:
669 ; CHECK-NEXT: vplzcntq %zmm0, %zmm0
671 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
675 declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
678 define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
679 ; CHECK-LABEL: test_mask_lzcnt_d:
681 ; CHECK-NEXT: kmovw %edi, %k1
682 ; CHECK-NEXT: vplzcntd %zmm0, %zmm1 {%k1}
683 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
685 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
689 define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
690 ; CHECK-LABEL: test_mask_lzcnt_q:
692 ; CHECK-NEXT: movzbl %dil, %eax
693 ; CHECK-NEXT: kmovw %eax, %k1
694 ; CHECK-NEXT: vplzcntq %zmm0, %zmm1 {%k1}
695 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
697 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
701 define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
702 ; CHECK-LABEL: test_x86_mask_blend_ps_512:
704 ; CHECK-NEXT: kmovw %edi, %k1
705 ; CHECK-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1}
707 %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
708 ret <16 x float> %res
711 declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly
713 define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
714 ; CHECK-LABEL: test_x86_mask_blend_pd_512:
716 ; CHECK-NEXT: movzbl %dil, %eax
717 ; CHECK-NEXT: kmovw %eax, %k1
718 ; CHECK-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
720 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1]
721 ret <8 x double> %res
724 define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
725 ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop:
727 ; CHECK-NEXT: movzbl %sil, %eax
728 ; CHECK-NEXT: kmovw %eax, %k1
729 ; CHECK-NEXT: vblendmpd (%rdi), %zmm0, %zmm0 {%k1}
731 %b = load <8 x double>, <8 x double>* %ptr
732 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
733 ret <8 x double> %res
735 declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly
737 define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
738 ; CHECK-LABEL: test_x86_mask_blend_d_512:
740 ; CHECK-NEXT: kmovw %edi, %k1
741 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm0 {%k1}
743 %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1]
746 declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
748 define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
749 ; CHECK-LABEL: test_x86_mask_blend_q_512:
751 ; CHECK-NEXT: movzbl %dil, %eax
752 ; CHECK-NEXT: kmovw %eax, %k1
753 ; CHECK-NEXT: vpblendmq %zmm1, %zmm0, %zmm0 {%k1}
755 %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1]
758 declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
760 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
761 ; CHECK-LABEL: test_cmpps:
763 ; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
764 ; CHECK-NEXT: kmovw %k0, %eax
766 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
769 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
771 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
772 ; CHECK-LABEL: test_cmppd:
774 ; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k0
775 ; CHECK-NEXT: kmovw %k0, %eax
777 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
780 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
783 define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
784 ; CHECK-LABEL: test_vmaxpd:
786 ; CHECK-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
788 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
789 <8 x double>zeroinitializer, i8 -1, i32 4)
790 ret <8 x double> %res
792 declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
793 <8 x double>, i8, i32)
795 define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
796 ; CHECK-LABEL: test_vminpd:
798 ; CHECK-NEXT: vminpd %zmm1, %zmm0, %zmm0
800 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
801 <8 x double>zeroinitializer, i8 -1, i32 4)
802 ret <8 x double> %res
804 declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>,
805 <8 x double>, i8, i32)
807 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
809 define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
810 ; CHECK-LABEL: test_int_x86_avx512_mask_pabs_d_512:
812 ; CHECK-NEXT: kmovw %edi, %k1
813 ; CHECK-NEXT: vpabsd %zmm0, %zmm1 {%k1}
814 ; CHECK-NEXT: vpabsd %zmm0, %zmm0
815 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
817 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
818 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1)
819 %res2 = add <16 x i32> %res, %res1
823 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
825 define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
826 ; CHECK-LABEL: test_int_x86_avx512_mask_pabs_q_512:
828 ; CHECK-NEXT: movzbl %dil, %eax
829 ; CHECK-NEXT: kmovw %eax, %k1
830 ; CHECK-NEXT: vpabsq %zmm0, %zmm1 {%k1}
831 ; CHECK-NEXT: vpabsq %zmm0, %zmm0
832 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
834 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
835 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1)
836 %res2 = add <8 x i64> %res, %res1
840 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
841 ; CHECK-LABEL: test_vptestmq:
843 ; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0
844 ; CHECK-NEXT: kmovw %k0, %eax
846 %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
849 declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
851 define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) {
852 ; CHECK-LABEL: test_vptestmd:
854 ; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0
855 ; CHECK-NEXT: kmovw %k0, %eax
857 %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
860 declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
862 define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
863 ; CHECK-LABEL: test_store1:
865 ; CHECK-NEXT: kmovw %esi, %k1
866 ; CHECK-NEXT: vmovups %zmm0, (%rdi) {%k1}
868 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
872 declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
874 define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
875 ; CHECK-LABEL: test_store2:
877 ; CHECK-NEXT: kmovw %esi, %k1
878 ; CHECK-NEXT: vmovupd %zmm0, (%rdi) {%k1}
880 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
884 declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
886 define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
887 ; CHECK-LABEL: test_mask_store_aligned_ps:
889 ; CHECK-NEXT: kmovw %esi, %k1
890 ; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1}
892 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
896 declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
898 define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
899 ; CHECK-LABEL: test_mask_store_aligned_pd:
901 ; CHECK-NEXT: kmovw %esi, %k1
902 ; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1}
904 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
908 declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
910 define <16 x float> @test_mask_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
911 ; CHECK-LABEL: test_mask_load_aligned_ps:
913 ; CHECK-NEXT: kmovw %esi, %k1
914 ; CHECK-NEXT: vmovaps (%rdi), %zmm0
915 ; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1}
916 ; CHECK-NEXT: vmovaps (%rdi), %zmm1 {%k1} {z}
917 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
919 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
920 %res1 = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> %res, i16 %mask)
921 %res2 = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
922 %res4 = fadd <16 x float> %res2, %res1
923 ret <16 x float> %res4
926 declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
928 define <16 x float> @test_mask_load_unaligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
929 ; CHECK-LABEL: test_mask_load_unaligned_ps:
931 ; CHECK-NEXT: kmovw %esi, %k1
932 ; CHECK-NEXT: vmovups (%rdi), %zmm0
933 ; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1}
934 ; CHECK-NEXT: vmovups (%rdi), %zmm1 {%k1} {z}
935 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
937 %res = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
938 %res1 = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> %res, i16 %mask)
939 %res2 = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
940 %res4 = fadd <16 x float> %res2, %res1
941 ret <16 x float> %res4
944 declare <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8*, <16 x float>, i16)
946 define <8 x double> @test_mask_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
947 ; CHECK-LABEL: test_mask_load_aligned_pd:
949 ; CHECK-NEXT: movzbl %sil, %eax
950 ; CHECK-NEXT: kmovw %eax, %k1
951 ; CHECK-NEXT: vmovapd (%rdi), %zmm0
952 ; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1}
953 ; CHECK-NEXT: vmovapd (%rdi), %zmm1 {%k1} {z}
954 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
956 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
957 %res1 = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> %res, i8 %mask)
958 %res2 = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
959 %res4 = fadd <8 x double> %res2, %res1
960 ret <8 x double> %res4
963 declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
965 define <8 x double> @test_mask_load_unaligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
966 ; CHECK-LABEL: test_mask_load_unaligned_pd:
968 ; CHECK-NEXT: movzbl %sil, %eax
969 ; CHECK-NEXT: kmovw %eax, %k1
970 ; CHECK-NEXT: vmovupd (%rdi), %zmm0
971 ; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1}
972 ; CHECK-NEXT: vmovupd (%rdi), %zmm1 {%k1} {z}
973 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
975 %res = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
976 %res1 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> %res, i8 %mask)
977 %res2 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
978 %res4 = fadd <8 x double> %res2, %res1
979 ret <8 x double> %res4
982 declare <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8*, <8 x double>, i8)
984 define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
985 ; CHECK-LABEL: test_valign_q:
987 ; CHECK-NEXT: valignq $2, %zmm1, %zmm0, %zmm0
989 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> zeroinitializer, i8 -1)
993 define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
994 ; CHECK-LABEL: test_mask_valign_q:
996 ; CHECK-NEXT: movzbl %dil, %eax
997 ; CHECK-NEXT: kmovw %eax, %k1
998 ; CHECK-NEXT: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
999 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1001 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask)
1005 declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
1007 define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1008 ; CHECK-LABEL: test_maskz_valign_d:
1010 ; CHECK-NEXT: kmovw %edi, %k1
1011 ; CHECK-NEXT: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z}
1013 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i32 5, <16 x i32> zeroinitializer, i16 %mask)
1017 declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
1019 define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
1020 ; CHECK-LABEL: test_mask_store_ss:
1022 ; CHECK-NEXT: kmovw %esi, %k1
1023 ; CHECK-NEXT: vmovss %xmm0, (%rdi) {%k1}
1025 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
1029 declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
1031 define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
1032 ; CHECK-LABEL: test_pcmpeq_d:
1034 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1035 ; CHECK-NEXT: kmovw %k0, %eax
1037 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
1041 define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1042 ; CHECK-LABEL: test_mask_pcmpeq_d:
1044 ; CHECK-NEXT: kmovw %edi, %k1
1045 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1046 ; CHECK-NEXT: kmovw %k0, %eax
1048 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
1052 declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
1054 define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
1055 ; CHECK-LABEL: test_pcmpeq_q:
1057 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
1058 ; CHECK-NEXT: kmovw %k0, %eax
1060 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
1064 define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1065 ; CHECK-LABEL: test_mask_pcmpeq_q:
1067 ; CHECK-NEXT: movzbl %dil, %eax
1068 ; CHECK-NEXT: kmovw %eax, %k1
1069 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
1070 ; CHECK-NEXT: kmovw %k0, %eax
1072 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
1076 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
1078 define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
1079 ; CHECK-LABEL: test_pcmpgt_d:
1081 ; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
1082 ; CHECK-NEXT: kmovw %k0, %eax
1084 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
1088 define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1089 ; CHECK-LABEL: test_mask_pcmpgt_d:
1091 ; CHECK-NEXT: kmovw %edi, %k1
1092 ; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
1093 ; CHECK-NEXT: kmovw %k0, %eax
1095 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
1099 declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
1101 define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
1102 ; CHECK-LABEL: test_pcmpgt_q:
1104 ; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
1105 ; CHECK-NEXT: kmovw %k0, %eax
1107 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
1111 define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1112 ; CHECK-LABEL: test_mask_pcmpgt_q:
1114 ; CHECK-NEXT: movzbl %dil, %eax
1115 ; CHECK-NEXT: kmovw %eax, %k1
1116 ; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
1117 ; CHECK-NEXT: kmovw %k0, %eax
1119 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
1123 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
1125 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
1126 ; CHECK-LABEL: test_cmp_d_512:
1128 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1129 ; CHECK-NEXT: kmovw %k0, %r8d
1130 ; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k0
1131 ; CHECK-NEXT: kmovw %k0, %r9d
1132 ; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k0
1133 ; CHECK-NEXT: kmovw %k0, %r10d
1134 ; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k0
1135 ; CHECK-NEXT: kmovw %k0, %esi
1136 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
1137 ; CHECK-NEXT: kmovw %k0, %edi
1138 ; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
1139 ; CHECK-NEXT: kmovw %k0, %eax
1140 ; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k0
1141 ; CHECK-NEXT: kmovw %k0, %ecx
1142 ; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k0
1143 ; CHECK-NEXT: kmovw %k0, %edx
1144 ; CHECK-NEXT: vmovd %r8d, %xmm0
1145 ; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0
1146 ; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0
1147 ; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0
1148 ; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0
1149 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
1150 ; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
1151 ; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
1153 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
1154 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
1155 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
1156 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
1157 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
1158 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
1159 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
1160 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
1161 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
1162 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
1163 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
1164 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
1165 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
1166 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
1167 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
1168 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
1172 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1173 ; CHECK-LABEL: test_mask_cmp_d_512:
1175 ; CHECK-NEXT: kmovw %edi, %k1
1176 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1177 ; CHECK-NEXT: kmovw %k0, %r8d
1178 ; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k0 {%k1}
1179 ; CHECK-NEXT: kmovw %k0, %r9d
1180 ; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k0 {%k1}
1181 ; CHECK-NEXT: kmovw %k0, %r10d
1182 ; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k0 {%k1}
1183 ; CHECK-NEXT: kmovw %k0, %esi
1184 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 {%k1}
1185 ; CHECK-NEXT: kmovw %k0, %edi
1186 ; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
1187 ; CHECK-NEXT: kmovw %k0, %eax
1188 ; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k0 {%k1}
1189 ; CHECK-NEXT: kmovw %k0, %ecx
1190 ; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k0 {%k1}
1191 ; CHECK-NEXT: kmovw %k0, %edx
1192 ; CHECK-NEXT: vmovd %r8d, %xmm0
1193 ; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0
1194 ; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0
1195 ; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0
1196 ; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0
1197 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
1198 ; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
1199 ; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
1201 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
1202 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
1203 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
1204 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
1205 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
1206 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
1207 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
1208 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
1209 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
1210 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
1211 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
1212 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
1213 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
1214 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
1215 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
1216 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
1220 declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
1222 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
1223 ; CHECK-LABEL: test_ucmp_d_512:
1225 ; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k0
1226 ; CHECK-NEXT: kmovw %k0, %r8d
1227 ; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0
1228 ; CHECK-NEXT: kmovw %k0, %r9d
1229 ; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k0
1230 ; CHECK-NEXT: kmovw %k0, %r10d
1231 ; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k0
1232 ; CHECK-NEXT: kmovw %k0, %esi
1233 ; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k0
1234 ; CHECK-NEXT: kmovw %k0, %edi
1235 ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k0
1236 ; CHECK-NEXT: kmovw %k0, %eax
1237 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
1238 ; CHECK-NEXT: kmovw %k0, %ecx
1239 ; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k0
1240 ; CHECK-NEXT: kmovw %k0, %edx
1241 ; CHECK-NEXT: vmovd %r8d, %xmm0
1242 ; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0
1243 ; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0
1244 ; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0
1245 ; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0
1246 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
1247 ; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
1248 ; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
1250 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
1251 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
1252 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
1253 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
1254 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
1255 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
1256 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
1257 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
1258 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
1259 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
1260 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
1261 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
1262 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
1263 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
1264 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
1265 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
1269 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1270 ; CHECK-LABEL: test_mask_ucmp_d_512:
1272 ; CHECK-NEXT: kmovw %edi, %k1
1273 ; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k0 {%k1}
1274 ; CHECK-NEXT: kmovw %k0, %r8d
1275 ; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
1276 ; CHECK-NEXT: kmovw %k0, %r9d
1277 ; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k0 {%k1}
1278 ; CHECK-NEXT: kmovw %k0, %r10d
1279 ; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k0 {%k1}
1280 ; CHECK-NEXT: kmovw %k0, %esi
1281 ; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k0 {%k1}
1282 ; CHECK-NEXT: kmovw %k0, %edi
1283 ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k0 {%k1}
1284 ; CHECK-NEXT: kmovw %k0, %eax
1285 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 {%k1}
1286 ; CHECK-NEXT: kmovw %k0, %ecx
1287 ; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k0 {%k1}
1288 ; CHECK-NEXT: kmovw %k0, %edx
1289 ; CHECK-NEXT: vmovd %r8d, %xmm0
1290 ; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0
1291 ; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0
1292 ; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0
1293 ; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0
1294 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
1295 ; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
1296 ; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
1298 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
1299 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
1300 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
1301 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
1302 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
1303 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
1304 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
1305 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
1306 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
1307 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
1308 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
1309 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
1310 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
1311 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
1312 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
1313 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
1317 declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
1319 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
1320 ; CHECK-LABEL: test_cmp_q_512:
1322 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
1323 ; CHECK-NEXT: kmovw %k0, %r8d
1324 ; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k0
1325 ; CHECK-NEXT: kmovw %k0, %r9d
1326 ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k0
1327 ; CHECK-NEXT: kmovw %k0, %r10d
1328 ; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k0
1329 ; CHECK-NEXT: kmovw %k0, %r11d
1330 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k0
1331 ; CHECK-NEXT: kmovw %k0, %edi
1332 ; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
1333 ; CHECK-NEXT: kmovw %k0, %eax
1334 ; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k0
1335 ; CHECK-NEXT: kmovw %k0, %ecx
1336 ; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k0
1337 ; CHECK-NEXT: kmovw %k0, %edx
1338 ; CHECK-NEXT: movzbl %r8b, %esi
1339 ; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
1340 ; CHECK-NEXT: movzbl %r9b, %esi
1341 ; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0
1342 ; CHECK-NEXT: movzbl %r10b, %esi
1343 ; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
1344 ; CHECK-NEXT: movzbl %r11b, %esi
1345 ; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0
1346 ; CHECK-NEXT: movzbl %dil, %esi
1347 ; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
1348 ; CHECK-NEXT: movzbl %al, %eax
1349 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1350 ; CHECK-NEXT: movzbl %cl, %eax
1351 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1352 ; CHECK-NEXT: movzbl %dl, %eax
1353 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1355 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
1356 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
1357 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
1358 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
1359 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
1360 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
1361 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
1362 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
1363 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
1364 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
1365 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
1366 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1367 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
1368 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1369 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
1370 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1374 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1375 ; CHECK-LABEL: test_mask_cmp_q_512:
1377 ; CHECK-NEXT: movzbl %dil, %eax
1378 ; CHECK-NEXT: kmovw %eax, %k1
1379 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
1380 ; CHECK-NEXT: kmovw %k0, %r8d
1381 ; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k0 {%k1}
1382 ; CHECK-NEXT: kmovw %k0, %r9d
1383 ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k0 {%k1}
1384 ; CHECK-NEXT: kmovw %k0, %r10d
1385 ; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k0 {%k1}
1386 ; CHECK-NEXT: kmovw %k0, %r11d
1387 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 {%k1}
1388 ; CHECK-NEXT: kmovw %k0, %edi
1389 ; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
1390 ; CHECK-NEXT: kmovw %k0, %eax
1391 ; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k0 {%k1}
1392 ; CHECK-NEXT: kmovw %k0, %ecx
1393 ; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k0 {%k1}
1394 ; CHECK-NEXT: kmovw %k0, %edx
1395 ; CHECK-NEXT: movzbl %r8b, %esi
1396 ; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
1397 ; CHECK-NEXT: movzbl %r9b, %esi
1398 ; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0
1399 ; CHECK-NEXT: movzbl %r10b, %esi
1400 ; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
1401 ; CHECK-NEXT: movzbl %r11b, %esi
1402 ; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0
1403 ; CHECK-NEXT: movzbl %dil, %esi
1404 ; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
1405 ; CHECK-NEXT: movzbl %al, %eax
1406 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1407 ; CHECK-NEXT: movzbl %cl, %eax
1408 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1409 ; CHECK-NEXT: movzbl %dl, %eax
1410 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1412 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
1413 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
1414 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
1415 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
1416 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
1417 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
1418 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
1419 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
1420 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
1421 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
1422 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
1423 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1424 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
1425 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1426 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
1427 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1431 declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
1433 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
1434 ; CHECK-LABEL: test_ucmp_q_512:
1436 ; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k0
1437 ; CHECK-NEXT: kmovw %k0, %r8d
1438 ; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
1439 ; CHECK-NEXT: kmovw %k0, %r9d
1440 ; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k0
1441 ; CHECK-NEXT: kmovw %k0, %r10d
1442 ; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k0
1443 ; CHECK-NEXT: kmovw %k0, %r11d
1444 ; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k0
1445 ; CHECK-NEXT: kmovw %k0, %edi
1446 ; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k0
1447 ; CHECK-NEXT: kmovw %k0, %eax
1448 ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0
1449 ; CHECK-NEXT: kmovw %k0, %ecx
1450 ; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k0
1451 ; CHECK-NEXT: kmovw %k0, %edx
1452 ; CHECK-NEXT: movzbl %r8b, %esi
1453 ; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
1454 ; CHECK-NEXT: movzbl %r9b, %esi
1455 ; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0
1456 ; CHECK-NEXT: movzbl %r10b, %esi
1457 ; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
1458 ; CHECK-NEXT: movzbl %r11b, %esi
1459 ; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0
1460 ; CHECK-NEXT: movzbl %dil, %esi
1461 ; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
1462 ; CHECK-NEXT: movzbl %al, %eax
1463 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1464 ; CHECK-NEXT: movzbl %cl, %eax
1465 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1466 ; CHECK-NEXT: movzbl %dl, %eax
1467 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1469 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
1470 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
1471 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
1472 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
1473 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
1474 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
1475 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
1476 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
1477 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
1478 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
1479 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
1480 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1481 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
1482 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1483 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
1484 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1488 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1489 ; CHECK-LABEL: test_mask_ucmp_q_512:
1491 ; CHECK-NEXT: movzbl %dil, %eax
1492 ; CHECK-NEXT: kmovw %eax, %k1
1493 ; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k0 {%k1}
1494 ; CHECK-NEXT: kmovw %k0, %r8d
1495 ; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
1496 ; CHECK-NEXT: kmovw %k0, %r9d
1497 ; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k0 {%k1}
1498 ; CHECK-NEXT: kmovw %k0, %r10d
1499 ; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1}
1500 ; CHECK-NEXT: kmovw %k0, %r11d
1501 ; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k0 {%k1}
1502 ; CHECK-NEXT: kmovw %k0, %edi
1503 ; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1}
1504 ; CHECK-NEXT: kmovw %k0, %eax
1505 ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1}
1506 ; CHECK-NEXT: kmovw %k0, %ecx
1507 ; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k0 {%k1}
1508 ; CHECK-NEXT: kmovw %k0, %edx
1509 ; CHECK-NEXT: movzbl %r8b, %esi
1510 ; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
1511 ; CHECK-NEXT: movzbl %r9b, %esi
1512 ; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0
1513 ; CHECK-NEXT: movzbl %r10b, %esi
1514 ; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
1515 ; CHECK-NEXT: movzbl %r11b, %esi
1516 ; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0
1517 ; CHECK-NEXT: movzbl %dil, %esi
1518 ; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
1519 ; CHECK-NEXT: movzbl %al, %eax
1520 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1521 ; CHECK-NEXT: movzbl %cl, %eax
1522 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1523 ; CHECK-NEXT: movzbl %dl, %eax
1524 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1526 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
1527 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
1528 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
1529 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
1530 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
1531 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
1532 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
1533 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
1534 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
1535 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
1536 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
1537 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1538 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
1539 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1540 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
1541 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1545 declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
1547 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
1548 ; CHECK-LABEL: test_mask_vextractf32x4:
1550 ; CHECK-NEXT: kmovw %edi, %k1
1551 ; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
1553 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask)
1554 ret <4 x float> %res
1557 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i32, <4 x float>, i8)
1559 define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
1560 ; CHECK-LABEL: test_mask_vextracti64x4:
1562 ; CHECK-NEXT: kmovw %edi, %k1
1563 ; CHECK-NEXT: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
1565 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 2, <4 x i64> %b, i8 %mask)
1569 declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i64>, i8)
1571 define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
1572 ; CHECK-LABEL: test_maskz_vextracti32x4:
1574 ; CHECK-NEXT: kmovw %edi, %k1
1575 ; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
1577 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask)
1581 declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i32, <4 x i32>, i8)
1583 define <4 x double> @test_vextractf64x4(<8 x double> %a) {
1584 ; CHECK-LABEL: test_vextractf64x4:
1586 ; CHECK-NEXT: vextractf64x4 $2, %zmm0, %ymm0
1588 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i32 2, <4 x double> zeroinitializer, i8 -1)
1589 ret <4 x double> %res
1592 declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i32, <4 x double>, i8)
1594 define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
1595 ; CHECK-LABEL: test_x86_avx512_pslli_d:
1597 ; CHECK-NEXT: vpslld $7, %zmm0, %zmm0
1599 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1603 define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1604 ; CHECK-LABEL: test_x86_avx512_mask_pslli_d:
1606 ; CHECK-NEXT: kmovw %edi, %k1
1607 ; CHECK-NEXT: vpslld $7, %zmm0, %zmm1 {%k1}
1608 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1610 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1614 define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
1615 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d:
1617 ; CHECK-NEXT: kmovw %edi, %k1
1618 ; CHECK-NEXT: vpslld $7, %zmm0, %zmm0 {%k1} {z}
1620 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1624 declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1626 define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
1627 ; CHECK-LABEL: test_x86_avx512_pslli_q:
1629 ; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0
1631 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1635 define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1636 ; CHECK-LABEL: test_x86_avx512_mask_pslli_q:
1638 ; CHECK-NEXT: movzbl %dil, %eax
1639 ; CHECK-NEXT: kmovw %eax, %k1
1640 ; CHECK-NEXT: vpsllq $7, %zmm0, %zmm1 {%k1}
1641 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1643 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1647 define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
1648 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q:
1650 ; CHECK-NEXT: movzbl %dil, %eax
1651 ; CHECK-NEXT: kmovw %eax, %k1
1652 ; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
1654 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1658 declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1660 define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
1661 ; CHECK-LABEL: test_x86_avx512_psrli_d:
1663 ; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0
1665 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1669 define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1670 ; CHECK-LABEL: test_x86_avx512_mask_psrli_d:
1672 ; CHECK-NEXT: kmovw %edi, %k1
1673 ; CHECK-NEXT: vpsrld $7, %zmm0, %zmm1 {%k1}
1674 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1676 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1680 define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
1681 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d:
1683 ; CHECK-NEXT: kmovw %edi, %k1
1684 ; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
1686 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1690 declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1692 define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
1693 ; CHECK-LABEL: test_x86_avx512_psrli_q:
1695 ; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0
1697 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1701 define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1702 ; CHECK-LABEL: test_x86_avx512_mask_psrli_q:
1704 ; CHECK-NEXT: movzbl %dil, %eax
1705 ; CHECK-NEXT: kmovw %eax, %k1
1706 ; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm1 {%k1}
1707 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1709 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1713 define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
1714 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q:
1716 ; CHECK-NEXT: movzbl %dil, %eax
1717 ; CHECK-NEXT: kmovw %eax, %k1
1718 ; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
1720 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1724 declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1726 define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
1727 ; CHECK-LABEL: test_x86_avx512_psrai_d:
1729 ; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0
1731 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1735 define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1736 ; CHECK-LABEL: test_x86_avx512_mask_psrai_d:
1738 ; CHECK-NEXT: kmovw %edi, %k1
1739 ; CHECK-NEXT: vpsrad $7, %zmm0, %zmm1 {%k1}
1740 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1742 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1746 define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
1747 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d:
1749 ; CHECK-NEXT: kmovw %edi, %k1
1750 ; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
1752 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1756 declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1758 define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
1759 ; CHECK-LABEL: test_x86_avx512_psrai_q:
1761 ; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0
1763 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1767 define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1768 ; CHECK-LABEL: test_x86_avx512_mask_psrai_q:
1770 ; CHECK-NEXT: movzbl %dil, %eax
1771 ; CHECK-NEXT: kmovw %eax, %k1
1772 ; CHECK-NEXT: vpsraq $7, %zmm0, %zmm1 {%k1}
1773 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1775 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1779 define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
1780 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q:
1782 ; CHECK-NEXT: movzbl %dil, %eax
1783 ; CHECK-NEXT: kmovw %eax, %k1
1784 ; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
1786 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1790 declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1792 define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
1793 ; CHECK-LABEL: test_x86_avx512_psll_d:
1795 ; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm0
1797 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1801 define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1802 ; CHECK-LABEL: test_x86_avx512_mask_psll_d:
1804 ; CHECK-NEXT: kmovw %edi, %k1
1805 ; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm2 {%k1}
1806 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1808 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1812 define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1813 ; CHECK-LABEL: test_x86_avx512_maskz_psll_d:
1815 ; CHECK-NEXT: kmovw %edi, %k1
1816 ; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
1818 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1822 declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1824 define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
1825 ; CHECK-LABEL: test_x86_avx512_psll_q:
1827 ; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm0
1829 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1833 define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1834 ; CHECK-LABEL: test_x86_avx512_mask_psll_q:
1836 ; CHECK-NEXT: movzbl %dil, %eax
1837 ; CHECK-NEXT: kmovw %eax, %k1
1838 ; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
1839 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1841 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1845 define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1846 ; CHECK-LABEL: test_x86_avx512_maskz_psll_q:
1848 ; CHECK-NEXT: movzbl %dil, %eax
1849 ; CHECK-NEXT: kmovw %eax, %k1
1850 ; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
1852 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1856 declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1858 define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
1859 ; CHECK-LABEL: test_x86_avx512_psrl_d:
1861 ; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm0
1863 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1867 define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1868 ; CHECK-LABEL: test_x86_avx512_mask_psrl_d:
1870 ; CHECK-NEXT: kmovw %edi, %k1
1871 ; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
1872 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1874 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1878 define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1879 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d:
1881 ; CHECK-NEXT: kmovw %edi, %k1
1882 ; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
1884 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1888 declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1890 define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
1891 ; CHECK-LABEL: test_x86_avx512_psrl_q:
1893 ; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm0
1895 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1899 define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1900 ; CHECK-LABEL: test_x86_avx512_mask_psrl_q:
1902 ; CHECK-NEXT: movzbl %dil, %eax
1903 ; CHECK-NEXT: kmovw %eax, %k1
1904 ; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
1905 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1907 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1911 define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1912 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q:
1914 ; CHECK-NEXT: movzbl %dil, %eax
1915 ; CHECK-NEXT: kmovw %eax, %k1
1916 ; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
1918 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1922 declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1924 define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
1925 ; CHECK-LABEL: test_x86_avx512_psra_d:
1927 ; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm0
1929 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1933 define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1934 ; CHECK-LABEL: test_x86_avx512_mask_psra_d:
1936 ; CHECK-NEXT: kmovw %edi, %k1
1937 ; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
1938 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1940 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1944 define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1945 ; CHECK-LABEL: test_x86_avx512_maskz_psra_d:
1947 ; CHECK-NEXT: kmovw %edi, %k1
1948 ; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
1950 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1954 declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1956 define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
1957 ; CHECK-LABEL: test_x86_avx512_psra_q:
1959 ; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm0
1961 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1965 define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1966 ; CHECK-LABEL: test_x86_avx512_mask_psra_q:
1968 ; CHECK-NEXT: movzbl %dil, %eax
1969 ; CHECK-NEXT: kmovw %eax, %k1
1970 ; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
1971 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1973 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1977 define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1978 ; CHECK-LABEL: test_x86_avx512_maskz_psra_q:
1980 ; CHECK-NEXT: movzbl %dil, %eax
1981 ; CHECK-NEXT: kmovw %eax, %k1
1982 ; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
1984 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1988 declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1990 define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
1991 ; CHECK-LABEL: test_x86_avx512_psllv_d:
1993 ; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
1995 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1999 define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
2000 ; CHECK-LABEL: test_x86_avx512_mask_psllv_d:
2002 ; CHECK-NEXT: kmovw %edi, %k1
2003 ; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
2004 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2006 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
2010 define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
2011 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d:
2013 ; CHECK-NEXT: kmovw %edi, %k1
2014 ; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z}
2016 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
2020 declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
2022 define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
2023 ; CHECK-LABEL: test_x86_avx512_psllv_q:
2025 ; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm0
2027 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
2031 define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
2032 ; CHECK-LABEL: test_x86_avx512_mask_psllv_q:
2034 ; CHECK-NEXT: movzbl %dil, %eax
2035 ; CHECK-NEXT: kmovw %eax, %k1
2036 ; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
2037 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2039 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
2043 define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
2044 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q:
2046 ; CHECK-NEXT: movzbl %dil, %eax
2047 ; CHECK-NEXT: kmovw %eax, %k1
2048 ; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z}
2050 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
2054 declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
2057 define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
2058 ; CHECK-LABEL: test_x86_avx512_psrav_d:
2060 ; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm0
2062 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
2066 define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
2067 ; CHECK-LABEL: test_x86_avx512_mask_psrav_d:
2069 ; CHECK-NEXT: kmovw %edi, %k1
2070 ; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
2071 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2073 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
2077 define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
2078 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d:
2080 ; CHECK-NEXT: kmovw %edi, %k1
2081 ; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z}
2083 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
2087 declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
2089 define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
2090 ; CHECK-LABEL: test_x86_avx512_psrav_q:
2092 ; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm0
2094 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
2098 define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
2099 ; CHECK-LABEL: test_x86_avx512_mask_psrav_q:
2101 ; CHECK-NEXT: movzbl %dil, %eax
2102 ; CHECK-NEXT: kmovw %eax, %k1
2103 ; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
2104 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2106 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
2110 define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
2111 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q:
2113 ; CHECK-NEXT: movzbl %dil, %eax
2114 ; CHECK-NEXT: kmovw %eax, %k1
2115 ; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z}
2117 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
2121 declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
2123 define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
2124 ; CHECK-LABEL: test_x86_avx512_psrlv_d:
2126 ; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
2128 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
2132 define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
2133 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d:
2135 ; CHECK-NEXT: kmovw %edi, %k1
2136 ; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
2137 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2139 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
2143 define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
2144 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d:
2146 ; CHECK-NEXT: kmovw %edi, %k1
2147 ; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z}
2149 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
2153 declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
2155 define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
2156 ; CHECK-LABEL: test_x86_avx512_psrlv_q:
2158 ; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0
2160 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
2164 define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
2165 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q:
2167 ; CHECK-NEXT: movzbl %dil, %eax
2168 ; CHECK-NEXT: kmovw %eax, %k1
2169 ; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
2170 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2172 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
2176 define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
2177 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q:
2179 ; CHECK-NEXT: movzbl %dil, %eax
2180 ; CHECK-NEXT: kmovw %eax, %k1
2181 ; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z}
2183 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
2187 declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
2189 define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
2190 ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop:
2192 ; CHECK-NEXT: vpsrlvq (%rdi), %zmm0, %zmm0
2194 %b = load <8 x i64>, <8 x i64>* %ptr
2195 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2199 declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2200 declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2201 declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
2203 define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
2204 ; CHECK-LABEL: test_vsubps_rn:
2206 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
2208 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
2209 <16 x float> zeroinitializer, i16 -1, i32 0)
2210 ret <16 x float> %res
2213 define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
2214 ; CHECK-LABEL: test_vsubps_rd:
2216 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
2218 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
2219 <16 x float> zeroinitializer, i16 -1, i32 1)
2220 ret <16 x float> %res
2223 define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
2224 ; CHECK-LABEL: test_vsubps_ru:
2226 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
2228 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
2229 <16 x float> zeroinitializer, i16 -1, i32 2)
2230 ret <16 x float> %res
2233 define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
2234 ; CHECK-LABEL: test_vsubps_rz:
2236 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
2238 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
2239 <16 x float> zeroinitializer, i16 -1, i32 3)
2240 ret <16 x float> %res
2243 define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
2244 ; CHECK-LABEL: test_vmulps_rn:
2246 ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0
2248 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2249 <16 x float> zeroinitializer, i16 -1, i32 0)
2250 ret <16 x float> %res
2253 define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
2254 ; CHECK-LABEL: test_vmulps_rd:
2256 ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0
2258 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2259 <16 x float> zeroinitializer, i16 -1, i32 1)
2260 ret <16 x float> %res
2263 define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
2264 ; CHECK-LABEL: test_vmulps_ru:
2266 ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0
2268 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2269 <16 x float> zeroinitializer, i16 -1, i32 2)
2270 ret <16 x float> %res
2273 define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
2274 ; CHECK-LABEL: test_vmulps_rz:
2276 ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0
2278 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2279 <16 x float> zeroinitializer, i16 -1, i32 3)
2280 ret <16 x float> %res
2284 define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2285 ; CHECK-LABEL: test_vmulps_mask_rn:
2287 ; CHECK-NEXT: kmovw %edi, %k1
2288 ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2290 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2291 <16 x float> zeroinitializer, i16 %mask, i32 0)
2292 ret <16 x float> %res
2295 define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2296 ; CHECK-LABEL: test_vmulps_mask_rd:
2298 ; CHECK-NEXT: kmovw %edi, %k1
2299 ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2301 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2302 <16 x float> zeroinitializer, i16 %mask, i32 1)
2303 ret <16 x float> %res
2306 define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2307 ; CHECK-LABEL: test_vmulps_mask_ru:
2309 ; CHECK-NEXT: kmovw %edi, %k1
2310 ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2312 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2313 <16 x float> zeroinitializer, i16 %mask, i32 2)
2314 ret <16 x float> %res
2317 define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2318 ; CHECK-LABEL: test_vmulps_mask_rz:
2320 ; CHECK-NEXT: kmovw %edi, %k1
2321 ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2323 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2324 <16 x float> zeroinitializer, i16 %mask, i32 3)
2325 ret <16 x float> %res
2328 ;; With Passthru value
2329 define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
2330 ; CHECK-LABEL: test_vmulps_mask_passthru_rn:
2332 ; CHECK-NEXT: kmovw %edi, %k1
2333 ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2334 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2336 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2337 <16 x float> %passthru, i16 %mask, i32 0)
2338 ret <16 x float> %res
2341 define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
2342 ; CHECK-LABEL: test_vmulps_mask_passthru_rd:
2344 ; CHECK-NEXT: kmovw %edi, %k1
2345 ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2346 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2348 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2349 <16 x float> %passthru, i16 %mask, i32 1)
2350 ret <16 x float> %res
2353 define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
2354 ; CHECK-LABEL: test_vmulps_mask_passthru_ru:
2356 ; CHECK-NEXT: kmovw %edi, %k1
2357 ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2358 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2360 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2361 <16 x float> %passthru, i16 %mask, i32 2)
2362 ret <16 x float> %res
2365 define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
2366 ; CHECK-LABEL: test_vmulps_mask_passthru_rz:
2368 ; CHECK-NEXT: kmovw %edi, %k1
2369 ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2370 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2372 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2373 <16 x float> %passthru, i16 %mask, i32 3)
2374 ret <16 x float> %res
2378 define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
2379 ; CHECK-LABEL: test_vmulpd_mask_rn:
2381 ; CHECK-NEXT: movzbl %dil, %eax
2382 ; CHECK-NEXT: kmovw %eax, %k1
2383 ; CHECK-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2385 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
2386 <8 x double> zeroinitializer, i8 %mask, i32 0)
2387 ret <8 x double> %res
2390 define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
2391 ; CHECK-LABEL: test_vmulpd_mask_rd:
2393 ; CHECK-NEXT: movzbl %dil, %eax
2394 ; CHECK-NEXT: kmovw %eax, %k1
2395 ; CHECK-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2397 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
2398 <8 x double> zeroinitializer, i8 %mask, i32 1)
2399 ret <8 x double> %res
2402 define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
2403 ; CHECK-LABEL: test_vmulpd_mask_ru:
2405 ; CHECK-NEXT: movzbl %dil, %eax
2406 ; CHECK-NEXT: kmovw %eax, %k1
2407 ; CHECK-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2409 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
2410 <8 x double> zeroinitializer, i8 %mask, i32 2)
2411 ret <8 x double> %res
2414 define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
2415 ; CHECK-LABEL: test_vmulpd_mask_rz:
2417 ; CHECK-NEXT: movzbl %dil, %eax
2418 ; CHECK-NEXT: kmovw %eax, %k1
2419 ; CHECK-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2421 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
2422 <8 x double> zeroinitializer, i8 %mask, i32 3)
2423 ret <8 x double> %res
2426 define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
2427 ; CHECK-LABEL: test_xor_epi32:
2429 ; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm0
2431 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
2432 ret < 16 x i32> %res
2435 define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2436 ; CHECK-LABEL: test_mask_xor_epi32:
2438 ; CHECK-NEXT: kmovw %edi, %k1
2439 ; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm2 {%k1}
2440 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2442 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2443 ret < 16 x i32> %res
2446 declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2448 define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
2449 ; CHECK-LABEL: test_or_epi32:
2451 ; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0
2453 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
2454 ret < 16 x i32> %res
2457 define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2458 ; CHECK-LABEL: test_mask_or_epi32:
2460 ; CHECK-NEXT: kmovw %edi, %k1
2461 ; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm2 {%k1}
2462 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2464 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2465 ret < 16 x i32> %res
2468 declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2470 define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
2471 ; CHECK-LABEL: test_and_epi32:
2473 ; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm0
2475 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
2476 ret < 16 x i32> %res
2479 define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2480 ; CHECK-LABEL: test_mask_and_epi32:
2482 ; CHECK-NEXT: kmovw %edi, %k1
2483 ; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1}
2484 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2486 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2487 ret < 16 x i32> %res
2490 declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2492 define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
2493 ; CHECK-LABEL: test_xor_epi64:
2495 ; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0
2497 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
2501 define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2502 ; CHECK-LABEL: test_mask_xor_epi64:
2504 ; CHECK-NEXT: movzbl %dil, %eax
2505 ; CHECK-NEXT: kmovw %eax, %k1
2506 ; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm2 {%k1}
2507 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2509 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2513 declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2515 define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
2516 ; CHECK-LABEL: test_or_epi64:
2518 ; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0
2520 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
2524 define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2525 ; CHECK-LABEL: test_mask_or_epi64:
2527 ; CHECK-NEXT: movzbl %dil, %eax
2528 ; CHECK-NEXT: kmovw %eax, %k1
2529 ; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm2 {%k1}
2530 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2532 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2536 declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2538 define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
2539 ; CHECK-LABEL: test_and_epi64:
2541 ; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0
2543 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
2547 define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2548 ; CHECK-LABEL: test_mask_and_epi64:
2550 ; CHECK-NEXT: movzbl %dil, %eax
2551 ; CHECK-NEXT: kmovw %eax, %k1
2552 ; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1}
2553 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2555 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2559 declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2562 define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
2563 ; CHECK-LABEL: test_mask_add_epi32_rr:
2565 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
2567 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2568 ret < 16 x i32> %res
2571 define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2572 ; CHECK-LABEL: test_mask_add_epi32_rrk:
2574 ; CHECK-NEXT: kmovw %edi, %k1
2575 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm2 {%k1}
2576 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2578 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2579 ret < 16 x i32> %res
2582 define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2583 ; CHECK-LABEL: test_mask_add_epi32_rrkz:
2585 ; CHECK-NEXT: kmovw %edi, %k1
2586 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
2588 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2589 ret < 16 x i32> %res
2592 define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2593 ; CHECK-LABEL: test_mask_add_epi32_rm:
2595 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0
2597 %b = load <16 x i32>, <16 x i32>* %ptr_b
2598 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2599 ret < 16 x i32> %res
2602 define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2603 ; CHECK-LABEL: test_mask_add_epi32_rmk:
2605 ; CHECK-NEXT: kmovw %esi, %k1
2606 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm1 {%k1}
2607 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2609 %b = load <16 x i32>, <16 x i32>* %ptr_b
2610 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2611 ret < 16 x i32> %res
2614 define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2615 ; CHECK-LABEL: test_mask_add_epi32_rmkz:
2617 ; CHECK-NEXT: kmovw %esi, %k1
2618 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
2620 %b = load <16 x i32>, <16 x i32>* %ptr_b
2621 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2622 ret < 16 x i32> %res
2625 define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
2626 ; CHECK-LABEL: test_mask_add_epi32_rmb:
2628 ; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0
2630 %q = load i32, i32* %ptr_b
2631 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2632 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2633 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2634 ret < 16 x i32> %res
2637 define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2638 ; CHECK-LABEL: test_mask_add_epi32_rmbk:
2640 ; CHECK-NEXT: kmovw %esi, %k1
2641 ; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1}
2642 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2644 %q = load i32, i32* %ptr_b
2645 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2646 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2647 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2648 ret < 16 x i32> %res
2651 define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2652 ; CHECK-LABEL: test_mask_add_epi32_rmbkz:
2654 ; CHECK-NEXT: kmovw %esi, %k1
2655 ; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
2657 %q = load i32, i32* %ptr_b
2658 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2659 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2660 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2661 ret < 16 x i32> %res
2664 declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2666 define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
2667 ; CHECK-LABEL: test_mask_sub_epi32_rr:
2669 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0
2671 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2672 ret < 16 x i32> %res
2675 define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2676 ; CHECK-LABEL: test_mask_sub_epi32_rrk:
2678 ; CHECK-NEXT: kmovw %edi, %k1
2679 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1}
2680 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2682 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2683 ret < 16 x i32> %res
2686 define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2687 ; CHECK-LABEL: test_mask_sub_epi32_rrkz:
2689 ; CHECK-NEXT: kmovw %edi, %k1
2690 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z}
2692 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2693 ret < 16 x i32> %res
2696 define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2697 ; CHECK-LABEL: test_mask_sub_epi32_rm:
2699 ; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm0
2701 %b = load <16 x i32>, <16 x i32>* %ptr_b
2702 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2703 ret < 16 x i32> %res
2706 define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2707 ; CHECK-LABEL: test_mask_sub_epi32_rmk:
2709 ; CHECK-NEXT: kmovw %esi, %k1
2710 ; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm1 {%k1}
2711 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2713 %b = load <16 x i32>, <16 x i32>* %ptr_b
2714 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2715 ret < 16 x i32> %res
2718 define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2719 ; CHECK-LABEL: test_mask_sub_epi32_rmkz:
2721 ; CHECK-NEXT: kmovw %esi, %k1
2722 ; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z}
2724 %b = load <16 x i32>, <16 x i32>* %ptr_b
2725 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2726 ret < 16 x i32> %res
2729 define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
2730 ; CHECK-LABEL: test_mask_sub_epi32_rmb:
2732 ; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0
2734 %q = load i32, i32* %ptr_b
2735 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2736 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2737 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2738 ret < 16 x i32> %res
2741 define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2742 ; CHECK-LABEL: test_mask_sub_epi32_rmbk:
2744 ; CHECK-NEXT: kmovw %esi, %k1
2745 ; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1}
2746 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2748 %q = load i32, i32* %ptr_b
2749 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2750 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2751 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2752 ret < 16 x i32> %res
2755 define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2756 ; CHECK-LABEL: test_mask_sub_epi32_rmbkz:
2758 ; CHECK-NEXT: kmovw %esi, %k1
2759 ; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
2761 %q = load i32, i32* %ptr_b
2762 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2763 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2764 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2765 ret < 16 x i32> %res
2768 declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2770 define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
2771 ; CHECK-LABEL: test_mask_add_epi64_rr:
2773 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
2775 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2779 define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2780 ; CHECK-LABEL: test_mask_add_epi64_rrk:
2782 ; CHECK-NEXT: movzbl %dil, %eax
2783 ; CHECK-NEXT: kmovw %eax, %k1
2784 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm2 {%k1}
2785 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2787 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2791 define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
2792 ; CHECK-LABEL: test_mask_add_epi64_rrkz:
2794 ; CHECK-NEXT: movzbl %dil, %eax
2795 ; CHECK-NEXT: kmovw %eax, %k1
2796 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z}
2798 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2802 define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
2803 ; CHECK-LABEL: test_mask_add_epi64_rm:
2805 ; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0
2807 %b = load <8 x i64>, <8 x i64>* %ptr_b
2808 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2812 define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2813 ; CHECK-LABEL: test_mask_add_epi64_rmk:
2815 ; CHECK-NEXT: movzbl %sil, %eax
2816 ; CHECK-NEXT: kmovw %eax, %k1
2817 ; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm1 {%k1}
2818 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2820 %b = load <8 x i64>, <8 x i64>* %ptr_b
2821 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2825 define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
2826 ; CHECK-LABEL: test_mask_add_epi64_rmkz:
2828 ; CHECK-NEXT: movzbl %sil, %eax
2829 ; CHECK-NEXT: kmovw %eax, %k1
2830 ; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z}
2832 %b = load <8 x i64>, <8 x i64>* %ptr_b
2833 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2837 define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
2838 ; CHECK-LABEL: test_mask_add_epi64_rmb:
2840 ; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0
2842 %q = load i64, i64* %ptr_b
2843 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2844 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2845 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2849 define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2850 ; CHECK-LABEL: test_mask_add_epi64_rmbk:
2852 ; CHECK-NEXT: movzbl %sil, %eax
2853 ; CHECK-NEXT: kmovw %eax, %k1
2854 ; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
2855 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2857 %q = load i64, i64* %ptr_b
2858 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2859 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2860 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2864 define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
2865 ; CHECK-LABEL: test_mask_add_epi64_rmbkz:
2867 ; CHECK-NEXT: movzbl %sil, %eax
2868 ; CHECK-NEXT: kmovw %eax, %k1
2869 ; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
2871 %q = load i64, i64* %ptr_b
2872 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2873 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2874 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2878 declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2880 define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
2881 ; CHECK-LABEL: test_mask_sub_epi64_rr:
2883 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0
2885 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2889 define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2890 ; CHECK-LABEL: test_mask_sub_epi64_rrk:
2892 ; CHECK-NEXT: movzbl %dil, %eax
2893 ; CHECK-NEXT: kmovw %eax, %k1
2894 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm2 {%k1}
2895 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2897 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2901 define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
2902 ; CHECK-LABEL: test_mask_sub_epi64_rrkz:
2904 ; CHECK-NEXT: movzbl %dil, %eax
2905 ; CHECK-NEXT: kmovw %eax, %k1
2906 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z}
2908 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2912 define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
2913 ; CHECK-LABEL: test_mask_sub_epi64_rm:
2915 ; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm0
2917 %b = load <8 x i64>, <8 x i64>* %ptr_b
2918 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2922 define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2923 ; CHECK-LABEL: test_mask_sub_epi64_rmk:
2925 ; CHECK-NEXT: movzbl %sil, %eax
2926 ; CHECK-NEXT: kmovw %eax, %k1
2927 ; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm1 {%k1}
2928 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2930 %b = load <8 x i64>, <8 x i64>* %ptr_b
2931 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2935 define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
2936 ; CHECK-LABEL: test_mask_sub_epi64_rmkz:
2938 ; CHECK-NEXT: movzbl %sil, %eax
2939 ; CHECK-NEXT: kmovw %eax, %k1
2940 ; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z}
2942 %b = load <8 x i64>, <8 x i64>* %ptr_b
2943 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2947 define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
2948 ; CHECK-LABEL: test_mask_sub_epi64_rmb:
2950 ; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0
2952 %q = load i64, i64* %ptr_b
2953 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2954 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2955 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2959 define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2960 ; CHECK-LABEL: test_mask_sub_epi64_rmbk:
2962 ; CHECK-NEXT: movzbl %sil, %eax
2963 ; CHECK-NEXT: kmovw %eax, %k1
2964 ; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
2965 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2967 %q = load i64, i64* %ptr_b
2968 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2969 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2970 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2974 define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
2975 ; CHECK-LABEL: test_mask_sub_epi64_rmbkz:
2977 ; CHECK-NEXT: movzbl %sil, %eax
2978 ; CHECK-NEXT: kmovw %eax, %k1
2979 ; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
2981 %q = load i64, i64* %ptr_b
2982 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2983 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2984 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2988 declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2990 define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
2991 ; CHECK-LABEL: test_mask_mul_epi32_rr:
2993 ; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0
2995 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2999 define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
3000 ; CHECK-LABEL: test_mask_mul_epi32_rrk:
3002 ; CHECK-NEXT: movzbl %dil, %eax
3003 ; CHECK-NEXT: kmovw %eax, %k1
3004 ; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1}
3005 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3007 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
3011 define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
3012 ; CHECK-LABEL: test_mask_mul_epi32_rrkz:
3014 ; CHECK-NEXT: movzbl %dil, %eax
3015 ; CHECK-NEXT: kmovw %eax, %k1
3016 ; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z}
3018 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
3022 define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
3023 ; CHECK-LABEL: test_mask_mul_epi32_rm:
3025 ; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm0
3027 %b = load <16 x i32>, <16 x i32>* %ptr_b
3028 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
3032 define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
3033 ; CHECK-LABEL: test_mask_mul_epi32_rmk:
3035 ; CHECK-NEXT: movzbl %sil, %eax
3036 ; CHECK-NEXT: kmovw %eax, %k1
3037 ; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm1 {%k1}
3038 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
3040 %b = load <16 x i32>, <16 x i32>* %ptr_b
3041 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
3045 define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
3046 ; CHECK-LABEL: test_mask_mul_epi32_rmkz:
3048 ; CHECK-NEXT: movzbl %sil, %eax
3049 ; CHECK-NEXT: kmovw %eax, %k1
3050 ; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z}
3052 %b = load <16 x i32>, <16 x i32>* %ptr_b
3053 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
3057 define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
3058 ; CHECK-LABEL: test_mask_mul_epi32_rmb:
3060 ; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0
3062 %q = load i64, i64* %ptr_b
3063 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
3064 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
3065 %b = bitcast <8 x i64> %b64 to <16 x i32>
3066 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
3070 define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
3071 ; CHECK-LABEL: test_mask_mul_epi32_rmbk:
3073 ; CHECK-NEXT: movzbl %sil, %eax
3074 ; CHECK-NEXT: kmovw %eax, %k1
3075 ; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
3076 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
3078 %q = load i64, i64* %ptr_b
3079 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
3080 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
3081 %b = bitcast <8 x i64> %b64 to <16 x i32>
3082 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
3086 define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
3087 ; CHECK-LABEL: test_mask_mul_epi32_rmbkz:
3089 ; CHECK-NEXT: movzbl %sil, %eax
3090 ; CHECK-NEXT: kmovw %eax, %k1
3091 ; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
3093 %q = load i64, i64* %ptr_b
3094 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
3095 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
3096 %b = bitcast <8 x i64> %b64 to <16 x i32>
3097 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
3101 declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
3103 define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
3104 ; CHECK-LABEL: test_mask_mul_epu32_rr:
3106 ; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
3108 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
3112 define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
3113 ; CHECK-LABEL: test_mask_mul_epu32_rrk:
3115 ; CHECK-NEXT: movzbl %dil, %eax
3116 ; CHECK-NEXT: kmovw %eax, %k1
3117 ; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1}
3118 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3120 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
3124 define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
3125 ; CHECK-LABEL: test_mask_mul_epu32_rrkz:
3127 ; CHECK-NEXT: movzbl %dil, %eax
3128 ; CHECK-NEXT: kmovw %eax, %k1
3129 ; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z}
3131 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
3135 define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
3136 ; CHECK-LABEL: test_mask_mul_epu32_rm:
3138 ; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm0
3140 %b = load <16 x i32>, <16 x i32>* %ptr_b
3141 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
3145 define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
3146 ; CHECK-LABEL: test_mask_mul_epu32_rmk:
3148 ; CHECK-NEXT: movzbl %sil, %eax
3149 ; CHECK-NEXT: kmovw %eax, %k1
3150 ; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm1 {%k1}
3151 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
3153 %b = load <16 x i32>, <16 x i32>* %ptr_b
3154 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
3158 define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
3159 ; CHECK-LABEL: test_mask_mul_epu32_rmkz:
3161 ; CHECK-NEXT: movzbl %sil, %eax
3162 ; CHECK-NEXT: kmovw %eax, %k1
3163 ; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z}
3165 %b = load <16 x i32>, <16 x i32>* %ptr_b
3166 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
3170 define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
3171 ; CHECK-LABEL: test_mask_mul_epu32_rmb:
3173 ; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0
3175 %q = load i64, i64* %ptr_b
3176 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
3177 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
3178 %b = bitcast <8 x i64> %b64 to <16 x i32>
3179 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
3183 define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
3184 ; CHECK-LABEL: test_mask_mul_epu32_rmbk:
3186 ; CHECK-NEXT: movzbl %sil, %eax
3187 ; CHECK-NEXT: kmovw %eax, %k1
3188 ; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
3189 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
3191 %q = load i64, i64* %ptr_b
3192 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
3193 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
3194 %b = bitcast <8 x i64> %b64 to <16 x i32>
3195 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
3199 define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
3200 ; CHECK-LABEL: test_mask_mul_epu32_rmbkz:
3202 ; CHECK-NEXT: movzbl %sil, %eax
3203 ; CHECK-NEXT: kmovw %eax, %k1
3204 ; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
3206 %q = load i64, i64* %ptr_b
3207 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
3208 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
3209 %b = bitcast <8 x i64> %b64 to <16 x i32>
3210 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
3214 declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
3216 define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
3217 ; CHECK-LABEL: test_mask_mullo_epi32_rr_512:
3219 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0
3221 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
3225 define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
3226 ; CHECK-LABEL: test_mask_mullo_epi32_rrk_512:
3228 ; CHECK-NEXT: kmovw %edi, %k1
3229 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm2 {%k1}
3230 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3232 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
3233 ret < 16 x i32> %res
3236 define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
3237 ; CHECK-LABEL: test_mask_mullo_epi32_rrkz_512:
3239 ; CHECK-NEXT: kmovw %edi, %k1
3240 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z}
3242 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
3243 ret < 16 x i32> %res
3246 define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
3247 ; CHECK-LABEL: test_mask_mullo_epi32_rm_512:
3249 ; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm0
3251 %b = load <16 x i32>, <16 x i32>* %ptr_b
3252 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
3253 ret < 16 x i32> %res
3256 define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
3257 ; CHECK-LABEL: test_mask_mullo_epi32_rmk_512:
3259 ; CHECK-NEXT: kmovw %esi, %k1
3260 ; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm1 {%k1}
3261 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
3263 %b = load <16 x i32>, <16 x i32>* %ptr_b
3264 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
3265 ret < 16 x i32> %res
3268 define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
3269 ; CHECK-LABEL: test_mask_mullo_epi32_rmkz_512:
3271 ; CHECK-NEXT: kmovw %esi, %k1
3272 ; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z}
3274 %b = load <16 x i32>, <16 x i32>* %ptr_b
3275 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
3276 ret < 16 x i32> %res
3279 define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
3280 ; CHECK-LABEL: test_mask_mullo_epi32_rmb_512:
3282 ; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0
3284 %q = load i32, i32* %ptr_b
3285 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
3286 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
3287 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
3288 ret < 16 x i32> %res
3291 define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
3292 ; CHECK-LABEL: test_mask_mullo_epi32_rmbk_512:
3294 ; CHECK-NEXT: kmovw %esi, %k1
3295 ; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1}
3296 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
3298 %q = load i32, i32* %ptr_b
3299 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
3300 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
3301 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
3302 ret < 16 x i32> %res
3305 define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
3306 ; CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512:
3308 ; CHECK-NEXT: kmovw %esi, %k1
3309 ; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
3311 %q = load i32, i32* %ptr_b
3312 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
3313 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
3314 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
3315 ret < 16 x i32> %res
3318 declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3320 define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3321 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae:
3323 ; CHECK-NEXT: kmovw %edi, %k1
3324 ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3326 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
3327 ret <16 x float> %res
3329 define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3330 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae:
3332 ; CHECK-NEXT: kmovw %edi, %k1
3333 ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3335 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
3336 ret <16 x float> %res
3338 define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3339 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae:
3341 ; CHECK-NEXT: kmovw %edi, %k1
3342 ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3344 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
3345 ret <16 x float> %res
3348 define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3349 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae:
3351 ; CHECK-NEXT: kmovw %edi, %k1
3352 ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3354 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
3355 ret <16 x float> %res
3359 define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3360 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_current:
3362 ; CHECK-NEXT: kmovw %edi, %k1
3363 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z}
3365 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
3366 ret <16 x float> %res
3369 define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3370 ; CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae:
3372 ; CHECK-NEXT: kmovw %edi, %k1
3373 ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3374 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3376 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
3377 ret <16 x float> %res
3379 define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3380 ; CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae:
3382 ; CHECK-NEXT: kmovw %edi, %k1
3383 ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3384 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3386 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
3387 ret <16 x float> %res
3389 define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3390 ; CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae:
3392 ; CHECK-NEXT: kmovw %edi, %k1
3393 ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3394 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3396 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
3397 ret <16 x float> %res
3400 define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3401 ; CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae:
3403 ; CHECK-NEXT: kmovw %edi, %k1
3404 ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3405 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3407 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
3408 ret <16 x float> %res
3412 define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3413 ; CHECK-LABEL: test_mm512_mask_add_round_ps_current:
3415 ; CHECK-NEXT: kmovw %edi, %k1
3416 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1}
3417 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3419 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
3420 ret <16 x float> %res
3424 define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3425 ; CHECK-LABEL: test_mm512_add_round_ps_rn_sae:
3427 ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
3429 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
3430 ret <16 x float> %res
3432 define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3433 ; CHECK-LABEL: test_mm512_add_round_ps_rd_sae:
3435 ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
3437 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
3438 ret <16 x float> %res
3440 define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3441 ; CHECK-LABEL: test_mm512_add_round_ps_ru_sae:
3443 ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
3445 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
3446 ret <16 x float> %res
3449 define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3450 ; CHECK-LABEL: test_mm512_add_round_ps_rz_sae:
3452 ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
3454 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
3455 ret <16 x float> %res
3458 define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3459 ; CHECK-LABEL: test_mm512_add_round_ps_current:
3461 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
3463 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
3464 ret <16 x float> %res
3466 declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
3468 define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3469 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae:
3471 ; CHECK-NEXT: kmovw %edi, %k1
3472 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3473 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3475 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
3476 ret <16 x float> %res
3478 define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3479 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae:
3481 ; CHECK-NEXT: kmovw %edi, %k1
3482 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3483 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3485 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
3486 ret <16 x float> %res
3488 define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3489 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae:
3491 ; CHECK-NEXT: kmovw %edi, %k1
3492 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3493 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3495 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
3496 ret <16 x float> %res
3499 define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3500 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae:
3502 ; CHECK-NEXT: kmovw %edi, %k1
3503 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3504 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3506 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
3507 ret <16 x float> %res
3511 define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3512 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_current:
3514 ; CHECK-NEXT: kmovw %edi, %k1
3515 ; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1}
3516 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3518 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
3519 ret <16 x float> %res
3522 define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3523 ; CHECK-LABEL: test_mm512_sub_round_ps_rn_sae:
3525 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
3527 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
3528 ret <16 x float> %res
3530 define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3531 ; CHECK-LABEL: test_mm512_sub_round_ps_rd_sae:
3533 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
3535 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
3536 ret <16 x float> %res
3538 define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3539 ; CHECK-LABEL: test_mm512_sub_round_ps_ru_sae:
3541 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
3543 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
3544 ret <16 x float> %res
3547 define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3548 ; CHECK-LABEL: test_mm512_sub_round_ps_rz_sae:
3550 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
3552 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
3553 ret <16 x float> %res
3556 define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3557 ; CHECK-LABEL: test_mm512_sub_round_ps_current:
3559 ; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm0
3561 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
3562 ret <16 x float> %res
3565 define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3566 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae:
3568 ; CHECK-NEXT: kmovw %edi, %k1
3569 ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3571 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
3572 ret <16 x float> %res
3574 define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3575 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae:
3577 ; CHECK-NEXT: kmovw %edi, %k1
3578 ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3580 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
3581 ret <16 x float> %res
3583 define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3584 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae:
3586 ; CHECK-NEXT: kmovw %edi, %k1
3587 ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3589 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
3590 ret <16 x float> %res
3593 define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3594 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae:
3596 ; CHECK-NEXT: kmovw %edi, %k1
3597 ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3599 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
3600 ret <16 x float> %res
3604 define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3605 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_current:
3607 ; CHECK-NEXT: kmovw %edi, %k1
3608 ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z}
3610 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
3611 ret <16 x float> %res
3614 define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3615 ; CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae:
3617 ; CHECK-NEXT: kmovw %edi, %k1
3618 ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3619 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3621 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
3622 ret <16 x float> %res
3624 define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3625 ; CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae:
3627 ; CHECK-NEXT: kmovw %edi, %k1
3628 ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3629 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3631 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
3632 ret <16 x float> %res
3634 define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3635 ; CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae:
3637 ; CHECK-NEXT: kmovw %edi, %k1
3638 ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3639 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3641 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
3642 ret <16 x float> %res
3645 define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3646 ; CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae:
3648 ; CHECK-NEXT: kmovw %edi, %k1
3649 ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3650 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3652 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
3653 ret <16 x float> %res
3657 define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3658 ; CHECK-LABEL: test_mm512_mask_div_round_ps_current:
3660 ; CHECK-NEXT: kmovw %edi, %k1
3661 ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm2 {%k1}
3662 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3664 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
3665 ret <16 x float> %res
3669 define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3670 ; CHECK-LABEL: test_mm512_div_round_ps_rn_sae:
3672 ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
3674 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
3675 ret <16 x float> %res
3677 define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3678 ; CHECK-LABEL: test_mm512_div_round_ps_rd_sae:
3680 ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
3682 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
3683 ret <16 x float> %res
3685 define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3686 ; CHECK-LABEL: test_mm512_div_round_ps_ru_sae:
3688 ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
3690 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
3691 ret <16 x float> %res
3694 define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3695 ; CHECK-LABEL: test_mm512_div_round_ps_rz_sae:
3697 ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
3699 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
3700 ret <16 x float> %res
3703 define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3704 ; CHECK-LABEL: test_mm512_div_round_ps_current:
3706 ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0
3708 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
3709 ret <16 x float> %res
3711 declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
3713 define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3714 ; CHECK-LABEL: test_mm512_maskz_min_round_ps_sae:
3716 ; CHECK-NEXT: kmovw %edi, %k1
3717 ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3719 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
3720 ret <16 x float> %res
3723 define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3724 ; CHECK-LABEL: test_mm512_maskz_min_round_ps_current:
3726 ; CHECK-NEXT: kmovw %edi, %k1
3727 ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
3729 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
3730 ret <16 x float> %res
3733 define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3734 ; CHECK-LABEL: test_mm512_mask_min_round_ps_sae:
3736 ; CHECK-NEXT: kmovw %edi, %k1
3737 ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
3738 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3740 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
3741 ret <16 x float> %res
3744 define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3745 ; CHECK-LABEL: test_mm512_mask_min_round_ps_current:
3747 ; CHECK-NEXT: kmovw %edi, %k1
3748 ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm2 {%k1}
3749 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3751 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
3752 ret <16 x float> %res
3755 define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3756 ; CHECK-LABEL: test_mm512_min_round_ps_sae:
3758 ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0
3760 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
3761 ret <16 x float> %res
3764 define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3765 ; CHECK-LABEL: test_mm512_min_round_ps_current:
3767 ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0
3769 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
3770 ret <16 x float> %res
3772 declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
3774 define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3775 ; CHECK-LABEL: test_mm512_maskz_max_round_ps_sae:
3777 ; CHECK-NEXT: kmovw %edi, %k1
3778 ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3780 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
3781 ret <16 x float> %res
3784 define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3785 ; CHECK-LABEL: test_mm512_maskz_max_round_ps_current:
3787 ; CHECK-NEXT: kmovw %edi, %k1
3788 ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
3790 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
3791 ret <16 x float> %res
3794 define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3795 ; CHECK-LABEL: test_mm512_mask_max_round_ps_sae:
3797 ; CHECK-NEXT: kmovw %edi, %k1
3798 ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
3799 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3801 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
3802 ret <16 x float> %res
3805 define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3806 ; CHECK-LABEL: test_mm512_mask_max_round_ps_current:
3808 ; CHECK-NEXT: kmovw %edi, %k1
3809 ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm2 {%k1}
3810 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3812 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
3813 ret <16 x float> %res
3816 define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3817 ; CHECK-LABEL: test_mm512_max_round_ps_sae:
3819 ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0
3821 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
3822 ret <16 x float> %res
3825 define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3826 ; CHECK-LABEL: test_mm512_max_round_ps_current:
3828 ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0
3830 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
3831 ret <16 x float> %res
3833 declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
3835 declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
3837 define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
3838 ; CHECK-LABEL: test_mask_add_ss_rn:
3840 ; CHECK-NEXT: andl $1, %edi
3841 ; CHECK-NEXT: kmovw %edi, %k1
3842 ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3843 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3845 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0)
3846 ret <4 x float> %res
3849 define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
3850 ; CHECK-LABEL: test_mask_add_ss_rd:
3852 ; CHECK-NEXT: andl $1, %edi
3853 ; CHECK-NEXT: kmovw %edi, %k1
3854 ; CHECK-NEXT: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3855 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3857 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
3858 ret <4 x float> %res
3861 define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
3862 ; CHECK-LABEL: test_mask_add_ss_ru:
3864 ; CHECK-NEXT: andl $1, %edi
3865 ; CHECK-NEXT: kmovw %edi, %k1
3866 ; CHECK-NEXT: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3867 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3869 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2)
3870 ret <4 x float> %res
3873 define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
3874 ; CHECK-LABEL: test_mask_add_ss_rz:
3876 ; CHECK-NEXT: andl $1, %edi
3877 ; CHECK-NEXT: kmovw %edi, %k1
3878 ; CHECK-NEXT: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3879 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3881 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3)
3882 ret <4 x float> %res
3885 define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
3886 ; CHECK-LABEL: test_mask_add_ss_current:
3888 ; CHECK-NEXT: andl $1, %edi
3889 ; CHECK-NEXT: kmovw %edi, %k1
3890 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1}
3891 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3893 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
3894 ret <4 x float> %res
3897 define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
3898 ; CHECK-LABEL: test_maskz_add_ss_rn:
3900 ; CHECK-NEXT: andl $1, %edi
3901 ; CHECK-NEXT: kmovw %edi, %k1
3902 ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
3904 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0)
3905 ret <4 x float> %res
3908 define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) {
3909 ; CHECK-LABEL: test_add_ss_rn:
3911 ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0
3913 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0)
3914 ret <4 x float> %res
3917 declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
3919 define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
3920 ; CHECK-LABEL: test_mask_add_sd_rn:
3922 ; CHECK-NEXT: andl $1, %edi
3923 ; CHECK-NEXT: kmovw %edi, %k1
3924 ; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3925 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3927 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0)
3928 ret <2 x double> %res
3931 define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
3932 ; CHECK-LABEL: test_mask_add_sd_rd:
3934 ; CHECK-NEXT: andl $1, %edi
3935 ; CHECK-NEXT: kmovw %edi, %k1
3936 ; CHECK-NEXT: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3937 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3939 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
3940 ret <2 x double> %res
3943 define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
3944 ; CHECK-LABEL: test_mask_add_sd_ru:
3946 ; CHECK-NEXT: andl $1, %edi
3947 ; CHECK-NEXT: kmovw %edi, %k1
3948 ; CHECK-NEXT: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3949 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3951 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2)
3952 ret <2 x double> %res
3955 define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
3956 ; CHECK-LABEL: test_mask_add_sd_rz:
3958 ; CHECK-NEXT: andl $1, %edi
3959 ; CHECK-NEXT: kmovw %edi, %k1
3960 ; CHECK-NEXT: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3961 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3963 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3)
3964 ret <2 x double> %res
3967 define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
3968 ; CHECK-LABEL: test_mask_add_sd_current:
3970 ; CHECK-NEXT: andl $1, %edi
3971 ; CHECK-NEXT: kmovw %edi, %k1
3972 ; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
3973 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3975 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
3976 ret <2 x double> %res
3979 define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
3980 ; CHECK-LABEL: test_maskz_add_sd_rn:
3982 ; CHECK-NEXT: andl $1, %edi
3983 ; CHECK-NEXT: kmovw %edi, %k1
3984 ; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
3986 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 0)
3987 ret <2 x double> %res
3990 define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) {
3991 ; CHECK-LABEL: test_add_sd_rn:
3993 ; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0
3995 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 0)
3996 ret <2 x double> %res
3999 declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
4001 define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
4002 ; CHECK-LABEL: test_mask_max_ss_sae:
4004 ; CHECK-NEXT: andl $1, %edi
4005 ; CHECK-NEXT: kmovw %edi, %k1
4006 ; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
4007 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
4009 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
4010 ret <4 x float> %res
4013 define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
4014 ; CHECK-LABEL: test_maskz_max_ss_sae:
4016 ; CHECK-NEXT: andl $1, %edi
4017 ; CHECK-NEXT: kmovw %edi, %k1
4018 ; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
4020 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
4021 ret <4 x float> %res
4024 define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) {
4025 ; CHECK-LABEL: test_max_ss_sae:
4027 ; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm0
4029 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
4030 ret <4 x float> %res
4033 define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
4034 ; CHECK-LABEL: test_mask_max_ss:
4036 ; CHECK-NEXT: andl $1, %edi
4037 ; CHECK-NEXT: kmovw %edi, %k1
4038 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm2 {%k1}
4039 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
4041 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
4042 ret <4 x float> %res
4045 define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
4046 ; CHECK-LABEL: test_maskz_max_ss:
4048 ; CHECK-NEXT: andl $1, %edi
4049 ; CHECK-NEXT: kmovw %edi, %k1
4050 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z}
4052 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4)
4053 ret <4 x float> %res
4056 define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) {
4057 ; CHECK-LABEL: test_max_ss:
4059 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0
4061 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4)
4062 ret <4 x float> %res
4064 declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
4066 define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
4067 ; CHECK-LABEL: test_mask_max_sd_sae:
4069 ; CHECK-NEXT: andl $1, %edi
4070 ; CHECK-NEXT: kmovw %edi, %k1
4071 ; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
4072 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
4074 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
4075 ret <2 x double> %res
4078 define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
4079 ; CHECK-LABEL: test_maskz_max_sd_sae:
4081 ; CHECK-NEXT: andl $1, %edi
4082 ; CHECK-NEXT: kmovw %edi, %k1
4083 ; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
4085 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
4086 ret <2 x double> %res
4089 define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) {
4090 ; CHECK-LABEL: test_max_sd_sae:
4092 ; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm0
4094 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
4095 ret <2 x double> %res
4098 define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
4099 ; CHECK-LABEL: test_mask_max_sd:
4101 ; CHECK-NEXT: andl $1, %edi
4102 ; CHECK-NEXT: kmovw %edi, %k1
4103 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm2 {%k1}
4104 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
4106 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
4107 ret <2 x double> %res
4110 define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
4111 ; CHECK-LABEL: test_maskz_max_sd:
4113 ; CHECK-NEXT: andl $1, %edi
4114 ; CHECK-NEXT: kmovw %edi, %k1
4115 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z}
4117 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4)
4118 ret <2 x double> %res
4121 define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) {
4122 ; CHECK-LABEL: test_max_sd:
4124 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
4126 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
4127 ret <2 x double> %res
4130 define <2 x double> @test_x86_avx512_cvtsi2sd32(<2 x double> %a, i32 %b) {
4131 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd32:
4133 ; CHECK-NEXT: vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0
4135 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double> %a, i32 %b, i32 3) ; <<<2 x double>> [#uses=1]
4136 ret <2 x double> %res
4138 declare <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double>, i32, i32) nounwind readnone
4140 define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) {
4141 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd64:
4143 ; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0
4145 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1]
4146 ret <2 x double> %res
4148 declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone
4150 define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) {
4151 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss32:
4153 ; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0
4155 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1]
4156 ret <4 x float> %res
4158 declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone
4160 define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) {
4161 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss64:
4163 ; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0
4165 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1]
4166 ret <4 x float> %res
4168 declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone
4170 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b)
4171 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss:
4173 ; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0
4176 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
4177 ret <4 x float> %res
4180 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32* %ptr)
4181 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss_mem:
4183 ; CHECK-NEXT: movl (%rdi), %eax
4184 ; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0
4187 %b = load i32, i32* %ptr
4188 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
4189 ret <4 x float> %res
4192 define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b)
4193 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss:
4195 ; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0
4198 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
4199 ret <4 x float> %res
4202 define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, i32* %ptr)
4203 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss_mem:
4205 ; CHECK-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0
4208 %b = load i32, i32* %ptr
4209 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
4210 ret <4 x float> %res
4212 declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone
4214 define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b)
4215 ; CHECK-LABEL: _mm_cvt_roundu64_ss:
4217 ; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0
4220 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1]
4221 ret <4 x float> %res
4224 define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b)
4225 ; CHECK-LABEL: _mm_cvtu64_ss:
4227 ; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0
4230 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1]
4231 ret <4 x float> %res
4233 declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind readnone
4235 define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b)
4236 ; CHECK-LABEL: test_x86_avx512_mm_cvtu32_sd:
4238 ; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0
4241 %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1]
4242 ret <2 x double> %res
4244 declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone
4246 define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b)
4247 ; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd:
4249 ; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0
4252 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1]
4253 ret <2 x double> %res
4256 define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b)
4257 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd:
4259 ; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0
4262 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1]
4263 ret <2 x double> %res
4265 declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone
4267 define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) {
4268 ; CHECK-LABEL: test_vpmaxq:
4270 ; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
4272 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1,
4273 <8 x i64>zeroinitializer, i8 -1)
4276 declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4278 define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) {
4279 ; CHECK-LABEL: test_vpminud:
4281 ; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm0
4283 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1,
4284 <16 x i32>zeroinitializer, i16 -1)
4287 declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4289 define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
4290 ; CHECK-LABEL: test_vpmaxsd:
4292 ; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
4294 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1,
4295 <16 x i32>zeroinitializer, i16 -1)
4298 declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4300 define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4301 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_d_512:
4303 ; CHECK-NEXT: kmovw %edi, %k1
4304 ; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm2 {%k1}
4305 ; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
4306 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4308 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4309 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4310 %res2 = add <16 x i32> %res, %res1
4311 ret <16 x i32> %res2
4314 define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4315 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_q_512:
4317 ; CHECK-NEXT: movzbl %dil, %eax
4318 ; CHECK-NEXT: kmovw %eax, %k1
4319 ; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm2 {%k1}
4320 ; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
4321 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4323 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4324 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4325 %res2 = add <8 x i64> %res, %res1
4329 declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4331 define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4332 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_d_512:
4334 ; CHECK-NEXT: kmovw %edi, %k1
4335 ; CHECK-NEXT: vpmaxud %zmm1, %zmm0, %zmm2 {%k1}
4336 ; CHECK-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
4337 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4339 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4340 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4341 %res2 = add <16 x i32> %res, %res1
4342 ret <16 x i32> %res2
4345 declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4347 define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4348 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_q_512:
4350 ; CHECK-NEXT: movzbl %dil, %eax
4351 ; CHECK-NEXT: kmovw %eax, %k1
4352 ; CHECK-NEXT: vpmaxuq %zmm1, %zmm0, %zmm2 {%k1}
4353 ; CHECK-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
4354 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4356 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4357 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4358 %res2 = add <8 x i64> %res, %res1
4362 declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4364 define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4365 ; CHECK-LABEL: test_int_x86_avx512_mask_pmins_d_512:
4367 ; CHECK-NEXT: kmovw %edi, %k1
4368 ; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm2 {%k1}
4369 ; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm0
4370 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4372 %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4373 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4374 %res2 = add <16 x i32> %res, %res1
4375 ret <16 x i32> %res2
4378 declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4380 define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4381 ; CHECK-LABEL: test_int_x86_avx512_mask_pmins_q_512:
4383 ; CHECK-NEXT: movzbl %dil, %eax
4384 ; CHECK-NEXT: kmovw %eax, %k1
4385 ; CHECK-NEXT: vpminsq %zmm1, %zmm0, %zmm2 {%k1}
4386 ; CHECK-NEXT: vpminsq %zmm1, %zmm0, %zmm0
4387 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4389 %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4390 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4391 %res2 = add <8 x i64> %res, %res1
4395 define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4396 ; CHECK-LABEL: test_int_x86_avx512_mask_pminu_d_512:
4398 ; CHECK-NEXT: kmovw %edi, %k1
4399 ; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm2 {%k1}
4400 ; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm0
4401 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4403 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4404 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4405 %res2 = add <16 x i32> %res, %res1
4406 ret <16 x i32> %res2
4409 declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4411 define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4412 ; CHECK-LABEL: test_int_x86_avx512_mask_pminu_q_512:
4414 ; CHECK-NEXT: movzbl %dil, %eax
4415 ; CHECK-NEXT: kmovw %eax, %k1
4416 ; CHECK-NEXT: vpminuq %zmm1, %zmm0, %zmm2 {%k1}
4417 ; CHECK-NEXT: vpminuq %zmm1, %zmm0, %zmm0
4418 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4420 %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4421 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4422 %res2 = add <8 x i64> %res, %res1
4426 declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4428 define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
4429 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512:
4431 ; CHECK-NEXT: kmovw %esi, %k1
4432 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
4433 ; CHECK-NEXT: vpermi2d (%rdi), %zmm0, %zmm3 {%k1}
4434 ; CHECK-NEXT: vpermi2d %zmm2, %zmm0, %zmm1
4435 ; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0
4437 %x2 = load <16 x i32>, <16 x i32>* %x2p
4438 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4439 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
4440 %res2 = add <16 x i32> %res, %res1
4441 ret <16 x i32> %res2
4444 declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
4446 define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
4447 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
4449 ; CHECK-NEXT: movzbl %dil, %eax
4450 ; CHECK-NEXT: kmovw %eax, %k1
4451 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
4452 ; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm3 {%k1}
4453 ; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1
4454 ; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0
4456 %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
4457 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
4458 %res2 = fadd <8 x double> %res, %res1
4459 ret <8 x double> %res2
4462 declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
4464 define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
4465 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
4467 ; CHECK-NEXT: kmovw %edi, %k1
4468 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
4469 ; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm3 {%k1}
4470 ; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1
4471 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0
4473 %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
4474 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
4475 %res2 = fadd <16 x float> %res, %res1
4476 ret <16 x float> %res2
4479 declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4481 define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4482 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
4484 ; CHECK-NEXT: movzbl %dil, %eax
4485 ; CHECK-NEXT: kmovw %eax, %k1
4486 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
4487 ; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm3 {%k1}
4488 ; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1
4489 ; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0
4491 %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4492 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4493 %res2 = add <8 x i64> %res, %res1
4497 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4499 define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, i16 %x3) {
4500 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512:
4502 ; CHECK-NEXT: kmovw %esi, %k1
4503 ; CHECK-NEXT: vmovaps %zmm1, %zmm2
4504 ; CHECK-NEXT: vpermt2d (%rdi), %zmm0, %zmm2 {%k1} {z}
4505 ; CHECK-NEXT: vpermt2d %zmm1, %zmm0, %zmm1
4506 ; CHECK-NEXT: vpaddd %zmm1, %zmm2, %zmm0
4508 %x2 = load <16 x i32>, <16 x i32>* %x2p
4509 %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4510 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x1, i16 -1)
4511 %res2 = add <16 x i32> %res, %res1
4512 ret <16 x i32> %res2
4515 declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
4517 define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, double* %x2ptr, i8 %x3) {
4518 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512:
4520 ; CHECK-NEXT: movzbl %sil, %eax
4521 ; CHECK-NEXT: kmovw %eax, %k1
4522 ; CHECK-NEXT: vmovaps %zmm1, %zmm2
4523 ; CHECK-NEXT: vpermt2pd (%rdi){1to8}, %zmm0, %zmm2 {%k1} {z}
4524 ; CHECK-NEXT: vpermt2pd %zmm1, %zmm0, %zmm1
4525 ; CHECK-NEXT: vaddpd %zmm1, %zmm2, %zmm0
4527 %x2s = load double, double* %x2ptr
4528 %x2ins = insertelement <8 x double> undef, double %x2s, i32 0
4529 %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer
4530 %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
4531 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x1, i8 -1)
4532 %res2 = fadd <8 x double> %res, %res1
4533 ret <8 x double> %res2
4536 declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
4538 define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
4539 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512:
4541 ; CHECK-NEXT: kmovw %edi, %k1
4542 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
4543 ; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm3 {%k1} {z}
4544 ; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1
4545 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0
4547 %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
4548 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
4549 %res2 = fadd <16 x float> %res, %res1
4550 ret <16 x float> %res2
4554 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4556 define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4557 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512:
4559 ; CHECK-NEXT: movzbl %dil, %eax
4560 ; CHECK-NEXT: kmovw %eax, %k1
4561 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
4562 ; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm3 {%k1} {z}
4563 ; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm1
4564 ; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0
4566 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4567 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4568 %res2 = add <8 x i64> %res, %res1
4572 declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4574 define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4575 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512:
4577 ; CHECK-NEXT: kmovw %edi, %k1
4578 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
4579 ; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm3 {%k1}
4580 ; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm1
4581 ; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0
4583 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4584 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4585 %res2 = add <16 x i32> %res, %res1
4586 ret <16 x i32> %res2
4589 declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
4590 define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
4591 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_pd_512:
4593 ; CHECK-NEXT: movzbl %dil, %eax
4594 ; CHECK-NEXT: kmovw %eax, %k1
4595 ; CHECK-NEXT: vscalefpd {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
4596 ; CHECK-NEXT: vscalefpd {rn-sae}, %zmm1, %zmm0, %zmm0
4597 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4599 %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 3)
4600 %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
4601 %res2 = fadd <8 x double> %res, %res1
4602 ret <8 x double> %res2
4605 declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
4606 define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
4607 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ps_512:
4609 ; CHECK-NEXT: kmovw %edi, %k1
4610 ; CHECK-NEXT: vscalefps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
4611 ; CHECK-NEXT: vscalefps {rn-sae}, %zmm1, %zmm0, %zmm0
4612 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4614 %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 2)
4615 %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
4616 %res2 = fadd <16 x float> %res, %res1
4617 ret <16 x float> %res2
4620 declare <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
4622 define <8 x double>@test_int_x86_avx512_mask_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
4623 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_512:
4625 ; CHECK-NEXT: movzbl %dil, %eax
4626 ; CHECK-NEXT: kmovw %eax, %k1
4627 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 = zmm2[1],k1[1],zmm2[3],k1[3],zmm2[5],k1[5],zmm2[7],k1[7]
4628 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
4629 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4631 %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
4632 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
4633 %res2 = fadd <8 x double> %res, %res1
4634 ret <8 x double> %res2
4637 declare <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
4639 define <16 x float>@test_int_x86_avx512_mask_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
4640 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_512:
4642 ; CHECK-NEXT: kmovw %edi, %k1
4643 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 = zmm2[2],k1[2],zmm2[3],k1[3],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[14],k1[14],zmm2[15],k1[15]
4644 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
4645 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4647 %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
4648 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
4649 %res2 = fadd <16 x float> %res, %res1
4650 ret <16 x float> %res2
4653 declare <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
4655 define <8 x double>@test_int_x86_avx512_mask_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
4656 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_512:
4658 ; CHECK-NEXT: movzbl %dil, %eax
4659 ; CHECK-NEXT: kmovw %eax, %k1
4660 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[2],k1[2],zmm2[4],k1[4],zmm2[6],k1[6]
4661 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
4662 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4664 %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
4665 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
4666 %res2 = fadd <8 x double> %res, %res1
4667 ret <8 x double> %res2
4670 declare <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
4672 define <16 x float>@test_int_x86_avx512_mask_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
4673 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_512:
4675 ; CHECK-NEXT: kmovw %edi, %k1
4676 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[12],k1[12],zmm2[13],k1[13]
4677 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
4678 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4680 %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
4681 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
4682 %res2 = fadd <16 x float> %res, %res1
4683 ret <16 x float> %res2
4686 declare <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4688 define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4689 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512:
4691 ; CHECK-NEXT: movzbl %dil, %eax
4692 ; CHECK-NEXT: kmovw %eax, %k1
4693 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[2],k1[2],zmm2[4],k1[4],zmm2[6],k1[6]
4694 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm3 = k1[0],zmm0[0],k1[2],zmm0[2],k1[4],zmm0[4],k1[6],zmm0[6]
4695 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
4696 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4697 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
4699 %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4700 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4701 %res2 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer,i8 %x3)
4702 %res3 = add <8 x i64> %res, %res1
4703 %res4 = add <8 x i64> %res2, %res3
4707 declare <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4709 define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4710 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512:
4712 ; CHECK-NEXT: movzbl %dil, %eax
4713 ; CHECK-NEXT: kmovw %eax, %k1
4714 ; CHECK-NEXT: vpunpckhqdq {{.*#+}} zmm2 = zmm2[1],k1[1],zmm2[3],k1[3],zmm2[5],k1[5],zmm2[7],k1[7]
4715 ; CHECK-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
4716 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4718 %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4719 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4720 %res2 = add <8 x i64> %res, %res1
4724 declare <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4726 define <16 x i32>@test_int_x86_avx512_mask_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4727 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_512:
4729 ; CHECK-NEXT: kmovw %edi, %k1
4730 ; CHECK-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm2[2],k1[2],zmm2[3],k1[3],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[14],k1[14],zmm2[15],k1[15]
4731 ; CHECK-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
4732 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4734 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4735 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4736 %res2 = add <16 x i32> %res, %res1
4737 ret <16 x i32> %res2
4740 declare <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4742 define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4743 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_512:
4745 ; CHECK-NEXT: kmovw %edi, %k1
4746 ; CHECK-NEXT: vpunpckldq {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[12],k1[12],zmm2[13],k1[13]
4747 ; CHECK-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
4748 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4750 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4751 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4752 %res2 = add <16 x i32> %res, %res1
4753 ret <16 x i32> %res2
4756 declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8)
4758 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
4759 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512:
4761 ; CHECK-NEXT: kmovw %edi, %k1
4762 ; CHECK-NEXT: vpmovqb %zmm0, %xmm1 {%k1}
4763 ; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z}
4764 ; CHECK-NEXT: vpmovqb %zmm0, %xmm0
4765 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
4766 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4768 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
4769 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
4770 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
4771 %res3 = add <16 x i8> %res0, %res1
4772 %res4 = add <16 x i8> %res3, %res2
4776 declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64>, i8)
4778 define void @test_int_x86_avx512_mask_pmov_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4779 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_512:
4781 ; CHECK-NEXT: movzbl %sil, %eax
4782 ; CHECK-NEXT: kmovw %eax, %k1
4783 ; CHECK-NEXT: vpmovqb %zmm0, (%rdi)
4784 ; CHECK-NEXT: vpmovqb %zmm0, (%rdi) {%k1}
4786 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4787 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
4791 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8)
4793 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
4794 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512:
4796 ; CHECK-NEXT: kmovw %edi, %k1
4797 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm1 {%k1}
4798 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z}
4799 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm0
4800 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
4801 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4803 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
4804 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
4805 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
4806 %res3 = add <16 x i8> %res0, %res1
4807 %res4 = add <16 x i8> %res3, %res2
4811 declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64>, i8)
4813 define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4814 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512:
4816 ; CHECK-NEXT: vpmovsqb %zmm0, (%rdi)
4817 ; CHECK-NEXT: kmovw %esi, %k1
4818 ; CHECK-NEXT: vpmovsqb %zmm0, (%rdi) {%k1}
4820 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4821 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
4825 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8)
4827 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
4828 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512:
4830 ; CHECK-NEXT: kmovw %edi, %k1
4831 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm1 {%k1}
4832 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z}
4833 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm0
4834 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
4835 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4837 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
4838 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
4839 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
4840 %res3 = add <16 x i8> %res0, %res1
4841 %res4 = add <16 x i8> %res3, %res2
4845 declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64>, i8)
4847 define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4848 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512:
4850 ; CHECK-NEXT: vpmovusqb %zmm0, (%rdi)
4851 ; CHECK-NEXT: kmovw %esi, %k1
4852 ; CHECK-NEXT: vpmovusqb %zmm0, (%rdi) {%k1}
4854 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4855 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
4859 declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
4861 define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
4862 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512:
4864 ; CHECK-NEXT: movzbl %dil, %eax
4865 ; CHECK-NEXT: kmovw %eax, %k1
4866 ; CHECK-NEXT: vpmovqw %zmm0, %xmm1 {%k1}
4867 ; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z}
4868 ; CHECK-NEXT: vpmovqw %zmm0, %xmm0
4869 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
4870 ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0
4872 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
4873 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
4874 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
4875 %res3 = add <8 x i16> %res0, %res1
4876 %res4 = add <8 x i16> %res3, %res2
4880 declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64>, i8)
4882 define void @test_int_x86_avx512_mask_pmov_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4883 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_512:
4885 ; CHECK-NEXT: movzbl %sil, %eax
4886 ; CHECK-NEXT: kmovw %eax, %k1
4887 ; CHECK-NEXT: vpmovqw %zmm0, (%rdi)
4888 ; CHECK-NEXT: vpmovqw %zmm0, (%rdi) {%k1}
4890 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4891 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
4895 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8)
4897 define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
4898 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512:
4900 ; CHECK-NEXT: movzbl %dil, %eax
4901 ; CHECK-NEXT: kmovw %eax, %k1
4902 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm1 {%k1}
4903 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z}
4904 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm0
4905 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
4906 ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0
4908 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
4909 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
4910 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
4911 %res3 = add <8 x i16> %res0, %res1
4912 %res4 = add <8 x i16> %res3, %res2
4916 declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64>, i8)
4918 define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4919 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512:
4921 ; CHECK-NEXT: vpmovsqw %zmm0, (%rdi)
4922 ; CHECK-NEXT: kmovw %esi, %k1
4923 ; CHECK-NEXT: vpmovsqw %zmm0, (%rdi) {%k1}
4925 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4926 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
4930 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8)
4932 define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
4933 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512:
4935 ; CHECK-NEXT: movzbl %dil, %eax
4936 ; CHECK-NEXT: kmovw %eax, %k1
4937 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm1 {%k1}
4938 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z}
4939 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm0
4940 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
4941 ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0
4943 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
4944 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
4945 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
4946 %res3 = add <8 x i16> %res0, %res1
4947 %res4 = add <8 x i16> %res3, %res2
4951 declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64>, i8)
4953 define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4954 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512:
4956 ; CHECK-NEXT: vpmovusqw %zmm0, (%rdi)
4957 ; CHECK-NEXT: kmovw %esi, %k1
4958 ; CHECK-NEXT: vpmovusqw %zmm0, (%rdi) {%k1}
4960 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4961 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
4965 declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8)
4967 define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
4968 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
4970 ; CHECK-NEXT: movzbl %dil, %eax
4971 ; CHECK-NEXT: kmovw %eax, %k1
4972 ; CHECK-NEXT: vpmovqd %zmm0, %ymm1 {%k1}
4973 ; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z}
4974 ; CHECK-NEXT: vpmovqd %zmm0, %ymm0
4975 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
4976 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0
4978 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
4979 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
4980 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
4981 %res3 = add <8 x i32> %res0, %res1
4982 %res4 = add <8 x i32> %res3, %res2
4986 declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64>, i8)
4988 define void @test_int_x86_avx512_mask_pmov_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4989 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_512:
4991 ; CHECK-NEXT: movzbl %sil, %eax
4992 ; CHECK-NEXT: kmovw %eax, %k1
4993 ; CHECK-NEXT: vpmovqd %zmm0, (%rdi)
4994 ; CHECK-NEXT: vpmovqd %zmm0, (%rdi) {%k1}
4996 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4997 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
5001 declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8)
5003 define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
5004 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_512:
5006 ; CHECK-NEXT: movzbl %dil, %eax
5007 ; CHECK-NEXT: kmovw %eax, %k1
5008 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm1 {%k1}
5009 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm2 {%k1} {z}
5010 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm0
5011 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
5012 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0
5014 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
5015 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
5016 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
5017 %res3 = add <8 x i32> %res0, %res1
5018 %res4 = add <8 x i32> %res3, %res2
5022 declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64>, i8)
5024 define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
5025 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512:
5027 ; CHECK-NEXT: vpmovsqd %zmm0, (%rdi)
5028 ; CHECK-NEXT: kmovw %esi, %k1
5029 ; CHECK-NEXT: vpmovsqd %zmm0, (%rdi) {%k1}
5031 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
5032 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
5036 declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8)
5038 define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
5039 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_512:
5041 ; CHECK-NEXT: movzbl %dil, %eax
5042 ; CHECK-NEXT: kmovw %eax, %k1
5043 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm1 {%k1}
5044 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm2 {%k1} {z}
5045 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm0
5046 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
5047 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0
5049 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
5050 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
5051 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
5052 %res3 = add <8 x i32> %res0, %res1
5053 %res4 = add <8 x i32> %res3, %res2
5057 declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64>, i8)
5059 define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
5060 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512:
5062 ; CHECK-NEXT: vpmovusqd %zmm0, (%rdi)
5063 ; CHECK-NEXT: kmovw %esi, %k1
5064 ; CHECK-NEXT: vpmovusqd %zmm0, (%rdi) {%k1}
5066 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
5067 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
5071 declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
5073 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
5074 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512:
5076 ; CHECK-NEXT: kmovw %edi, %k1
5077 ; CHECK-NEXT: vpmovdb %zmm0, %xmm1 {%k1}
5078 ; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z}
5079 ; CHECK-NEXT: vpmovdb %zmm0, %xmm0
5080 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
5081 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5083 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
5084 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
5085 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
5086 %res3 = add <16 x i8> %res0, %res1
5087 %res4 = add <16 x i8> %res3, %res2
5091 declare void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32>, i16)
5093 define void @test_int_x86_avx512_mask_pmov_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
5094 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_512:
5096 ; CHECK-NEXT: kmovw %esi, %k1
5097 ; CHECK-NEXT: vpmovdb %zmm0, (%rdi)
5098 ; CHECK-NEXT: vpmovdb %zmm0, (%rdi) {%k1}
5100 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
5101 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
5105 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16)
5107 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
5108 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512:
5110 ; CHECK-NEXT: kmovw %edi, %k1
5111 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm1 {%k1}
5112 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z}
5113 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm0
5114 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
5115 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5117 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
5118 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
5119 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
5120 %res3 = add <16 x i8> %res0, %res1
5121 %res4 = add <16 x i8> %res3, %res2
5125 declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32>, i16)
5127 define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
5128 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512:
5130 ; CHECK-NEXT: vpmovsdb %zmm0, (%rdi)
5131 ; CHECK-NEXT: kmovw %esi, %k1
5132 ; CHECK-NEXT: vpmovsdb %zmm0, (%rdi) {%k1}
5134 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
5135 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
5139 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16)
5141 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
5142 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512:
5144 ; CHECK-NEXT: kmovw %edi, %k1
5145 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm1 {%k1}
5146 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z}
5147 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm0
5148 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
5149 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5151 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
5152 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
5153 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
5154 %res3 = add <16 x i8> %res0, %res1
5155 %res4 = add <16 x i8> %res3, %res2
5159 declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32>, i16)
5161 define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
5162 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512:
5164 ; CHECK-NEXT: vpmovusdb %zmm0, (%rdi)
5165 ; CHECK-NEXT: kmovw %esi, %k1
5166 ; CHECK-NEXT: vpmovusdb %zmm0, (%rdi) {%k1}
5168 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
5169 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
5173 declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16)
5175 define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
5176 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512:
5178 ; CHECK-NEXT: kmovw %edi, %k1
5179 ; CHECK-NEXT: vpmovdw %zmm0, %ymm1 {%k1}
5180 ; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z}
5181 ; CHECK-NEXT: vpmovdw %zmm0, %ymm0
5182 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
5183 ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0
5185 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
5186 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
5187 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
5188 %res3 = add <16 x i16> %res0, %res1
5189 %res4 = add <16 x i16> %res3, %res2
5190 ret <16 x i16> %res4
5193 declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32>, i16)
5195 define void @test_int_x86_avx512_mask_pmov_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
5196 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_512:
5198 ; CHECK-NEXT: kmovw %esi, %k1
5199 ; CHECK-NEXT: vpmovdw %zmm0, (%rdi)
5200 ; CHECK-NEXT: vpmovdw %zmm0, (%rdi) {%k1}
5202 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
5203 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
5207 declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16)
5209 define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
5210 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512:
5212 ; CHECK-NEXT: kmovw %edi, %k1
5213 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm1 {%k1}
5214 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z}
5215 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm0
5216 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
5217 ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0
5219 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
5220 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
5221 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
5222 %res3 = add <16 x i16> %res0, %res1
5223 %res4 = add <16 x i16> %res3, %res2
5224 ret <16 x i16> %res4
5227 declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32>, i16)
5229 define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
5230 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512:
5232 ; CHECK-NEXT: vpmovsdw %zmm0, (%rdi)
5233 ; CHECK-NEXT: kmovw %esi, %k1
5234 ; CHECK-NEXT: vpmovsdw %zmm0, (%rdi) {%k1}
5236 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
5237 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
5241 declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16)
5243 define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
5244 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512:
5246 ; CHECK-NEXT: kmovw %edi, %k1
5247 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm1 {%k1}
5248 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z}
5249 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm0
5250 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
5251 ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0
5253 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
5254 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
5255 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
5256 %res3 = add <16 x i16> %res0, %res1
5257 %res4 = add <16 x i16> %res3, %res2
5258 ret <16 x i16> %res4
5261 declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32>, i16)
5263 define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
5264 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512:
5266 ; CHECK-NEXT: vpmovusdw %zmm0, (%rdi)
5267 ; CHECK-NEXT: kmovw %esi, %k1
5268 ; CHECK-NEXT: vpmovusdw %zmm0, (%rdi) {%k1}
5270 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
5271 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
5275 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
5277 define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
5278 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
5280 ; CHECK-NEXT: movzbl %dil, %eax
5281 ; CHECK-NEXT: kmovw %eax, %k1
5282 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1}
5283 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0
5284 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
5286 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
5287 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
5288 %res2 = fadd <8 x double> %res, %res1
5289 ret <8 x double> %res2
5292 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
5294 define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
5295 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512:
5297 ; CHECK-NEXT: kmovw %edi, %k1
5298 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1}
5299 ; CHECK-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0
5300 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
5302 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
5303 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
5304 %res2 = fadd <16 x float> %res, %res1
5305 ret <16 x float> %res2
5308 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
5310 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
5311 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_512:
5313 ; CHECK-NEXT: movzbl %dil, %eax
5314 ; CHECK-NEXT: kmovw %eax, %k1
5315 ; CHECK-NEXT: vcvtpd2dq %zmm0, %ymm1 {%k1}
5316 ; CHECK-NEXT: vcvtpd2dq {rn-sae}, %zmm0, %ymm0
5317 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
5319 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
5320 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
5321 %res2 = add <8 x i32> %res, %res1
5325 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
5327 define <8 x float>@test_int_x86_avx512_mask_cvt_pd2ps_512(<8 x double> %x0, <8 x float> %x1, i8 %x2) {
5328 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_512:
5330 ; CHECK-NEXT: movzbl %dil, %eax
5331 ; CHECK-NEXT: kmovw %eax, %k1
5332 ; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm1 {%k1}
5333 ; CHECK-NEXT: vcvtpd2ps {ru-sae}, %zmm0, %ymm0
5334 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
5336 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 %x2, i32 4)
5337 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 2)
5338 %res2 = fadd <8 x float> %res, %res1
5339 ret <8 x float> %res2
5342 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
5344 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
5345 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_512:
5347 ; CHECK-NEXT: movzbl %dil, %eax
5348 ; CHECK-NEXT: kmovw %eax, %k1
5349 ; CHECK-NEXT: vcvtpd2udq {ru-sae}, %zmm0, %ymm1 {%k1}
5350 ; CHECK-NEXT: vcvtpd2udq {rn-sae}, %zmm0, %ymm0
5351 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
5353 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 2)
5354 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
5355 %res2 = add <8 x i32> %res, %res1
5359 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float>, <16 x i32>, i16, i32)
5361 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
5362 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_512:
5364 ; CHECK-NEXT: kmovw %edi, %k1
5365 ; CHECK-NEXT: vcvtps2dq {ru-sae}, %zmm0, %zmm1 {%k1}
5366 ; CHECK-NEXT: vcvtps2dq {rn-sae}, %zmm0, %zmm0
5367 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
5369 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
5370 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
5371 %res2 = add <16 x i32> %res, %res1
5372 ret <16 x i32> %res2
5375 declare <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float>, <8 x double>, i8, i32)
5377 define <8 x double>@test_int_x86_avx512_mask_cvt_ps2pd_512(<8 x float> %x0, <8 x double> %x1, i8 %x2) {
5378 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_512:
5380 ; CHECK-NEXT: movzbl %dil, %eax
5381 ; CHECK-NEXT: kmovw %eax, %k1
5382 ; CHECK-NEXT: vcvtps2pd %ymm0, %zmm1 {%k1}
5383 ; CHECK-NEXT: vcvtps2pd {sae}, %ymm0, %zmm0
5384 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
5386 %res = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 %x2, i32 4)
5387 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 -1, i32 8)
5388 %res2 = fadd <8 x double> %res, %res1
5389 ret <8 x double> %res2
5392 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
5394 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
5395 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_512:
5397 ; CHECK-NEXT: kmovw %edi, %k1
5398 ; CHECK-NEXT: vcvtps2udq {ru-sae}, %zmm0, %zmm1 {%k1}
5399 ; CHECK-NEXT: vcvtps2udq {rn-sae}, %zmm0, %zmm0
5400 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
5402 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
5403 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
5404 %res2 = add <16 x i32> %res, %res1
5405 ret <16 x i32> %res2
5408 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
5410 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
5411 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512:
5413 ; CHECK-NEXT: movzbl %dil, %eax
5414 ; CHECK-NEXT: kmovw %eax, %k1
5415 ; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1}
5416 ; CHECK-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0
5417 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
5419 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
5420 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
5421 %res2 = add <8 x i32> %res, %res1
5425 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
5427 define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
5428 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
5430 ; CHECK-NEXT: movzbl %dil, %eax
5431 ; CHECK-NEXT: kmovw %eax, %k1
5432 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1}
5433 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0
5434 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
5436 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
5437 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
5438 %res2 = fadd <8 x double> %res, %res1
5439 ret <8 x double> %res2
5443 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
5445 define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
5446 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512:
5448 ; CHECK-NEXT: kmovw %edi, %k1
5449 ; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1}
5450 ; CHECK-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0
5451 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
5453 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
5454 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
5455 %res2 = fadd <16 x float> %res, %res1
5456 ret <16 x float> %res2
5459 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
5461 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
5462 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512:
5464 ; CHECK-NEXT: movzbl %dil, %eax
5465 ; CHECK-NEXT: kmovw %eax, %k1
5466 ; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1}
5467 ; CHECK-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0
5468 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
5470 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
5471 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
5472 %res2 = add <8 x i32> %res, %res1
5476 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32)
5478 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
5479 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512:
5481 ; CHECK-NEXT: kmovw %edi, %k1
5482 ; CHECK-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1}
5483 ; CHECK-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0
5484 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
5486 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
5487 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
5488 %res2 = add <16 x i32> %res, %res1
5489 ret <16 x i32> %res2
5492 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32)
5494 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
5495 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512:
5497 ; CHECK-NEXT: kmovw %edi, %k1
5498 ; CHECK-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1}
5499 ; CHECK-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0
5500 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
5502 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
5503 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
5504 %res2 = add <16 x i32> %res, %res1
5505 ret <16 x i32> %res2
5509 declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32)
5510 define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
5511 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss:
5513 ; CHECK-NEXT: andl $1, %edi
5514 ; CHECK-NEXT: kmovw %edi, %k1
5515 ; CHECK-NEXT: vscalefss %xmm1, %xmm0, %xmm2 {%k1}
5516 ; CHECK-NEXT: vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0
5517 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
5519 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4)
5520 %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8)
5521 %res2 = fadd <4 x float> %res, %res1
5522 ret <4 x float> %res2
5525 declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32)
5526 define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
5527 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sd:
5529 ; CHECK-NEXT: andl $1, %edi
5530 ; CHECK-NEXT: kmovw %edi, %k1
5531 ; CHECK-NEXT: vscalefsd %xmm1, %xmm0, %xmm2 {%k1}
5532 ; CHECK-NEXT: vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0
5533 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
5535 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4)
5536 %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8)
5537 %res2 = fadd <2 x double> %res, %res1
5538 ret <2 x double> %res2
5541 declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
5543 define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
5544 ; CHECK-LABEL: test_getexp_ss:
5546 ; CHECK-NEXT: andl $1, %edi
5547 ; CHECK-NEXT: kmovw %edi, %k1
5548 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
5549 ; CHECK-NEXT: vgetexpss %xmm1, %xmm0, %xmm3 {%k1}
5550 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
5551 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
5552 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm0
5553 ; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1
5554 ; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0
5555 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
5557 %res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
5558 %res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
5559 %res2 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
5560 %res3 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
5562 %res.1 = fadd <4 x float> %res0, %res1
5563 %res.2 = fadd <4 x float> %res2, %res3
5564 %res = fadd <4 x float> %res.1, %res.2
5565 ret <4 x float> %res
5568 declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
5570 define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
5571 ; CHECK-LABEL: test_getexp_sd:
5573 ; CHECK-NEXT: andl $1, %edi
5574 ; CHECK-NEXT: kmovw %edi, %k1
5575 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
5576 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 {%k1}
5577 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4
5578 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
5579 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
5580 ; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1
5581 ; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0
5582 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
5584 %res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
5585 %res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
5586 %res2 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
5587 %res3 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
5589 %res.1 = fadd <2 x double> %res0, %res1
5590 %res.2 = fadd <2 x double> %res2, %res3
5591 %res = fadd <2 x double> %res.1, %res.2
5592 ret <2 x double> %res
5595 declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32)
5597 define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
5598 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd:
5600 ; CHECK-NEXT: andl $1, %edi
5601 ; CHECK-NEXT: kmovw %edi, %k1
5602 ; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1}
5603 ; CHECK-NEXT: kmovw %k0, %eax
5604 ; CHECK-NEXT: shlb $7, %al
5605 ; CHECK-NEXT: sarb $7, %al
5608 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
5612 define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
5613 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd_all:
5615 ; CHECK-NEXT: vcmpunordsd {sae}, %xmm1, %xmm0, %k0
5616 ; CHECK-NEXT: vcmplesd %xmm1, %xmm0, %k1
5617 ; CHECK-NEXT: korw %k0, %k1, %k0
5618 ; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k1
5619 ; CHECK-NEXT: vcmpneqsd %xmm1, %xmm0, %k2
5620 ; CHECK-NEXT: korw %k1, %k2, %k1
5621 ; CHECK-NEXT: andl $1, %edi
5622 ; CHECK-NEXT: kmovw %edi, %k2
5623 ; CHECK-NEXT: kandw %k2, %k1, %k1
5624 ; CHECK-NEXT: korw %k1, %k0, %k0
5625 ; CHECK-NEXT: kmovw %k0, %eax
5626 ; CHECK-NEXT: shlb $7, %al
5627 ; CHECK-NEXT: sarb $7, %al
5630 %res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4)
5631 %res2 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 3, i8 -1, i32 8)
5632 %res3 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 4, i8 %x3, i32 4)
5633 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
5635 %res11 = or i8 %res1, %res2
5636 %res12 = or i8 %res3, %res4
5637 %res13 = or i8 %res11, %res12
5641 declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32)
5643 define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
5644 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss:
5646 ; CHECK-NEXT: andl $1, %edi
5647 ; CHECK-NEXT: kmovw %edi, %k1
5648 ; CHECK-NEXT: vcmpunordss %xmm1, %xmm0, %k0 {%k1}
5649 ; CHECK-NEXT: kmovw %k0, %eax
5650 ; CHECK-NEXT: shlb $7, %al
5651 ; CHECK-NEXT: sarb $7, %al
5654 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4)
5659 define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
5660 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss_all:
5662 ; CHECK-NEXT: vcmpless %xmm1, %xmm0, %k1
5663 ; CHECK-NEXT: vcmpunordss {sae}, %xmm1, %xmm0, %k0 {%k1}
5664 ; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k1
5665 ; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k1 {%k1}
5666 ; CHECK-NEXT: andl $1, %edi
5667 ; CHECK-NEXT: kmovw %edi, %k2
5668 ; CHECK-NEXT: kandw %k2, %k1, %k1
5669 ; CHECK-NEXT: kandw %k1, %k0, %k0
5670 ; CHECK-NEXT: kmovw %k0, %eax
5671 ; CHECK-NEXT: shlb $7, %al
5672 ; CHECK-NEXT: sarb $7, %al
5674 %res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4)
5675 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8)
5676 %res3 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 4, i8 %x3, i32 4)
5677 %res4 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 5, i8 %x3, i32 8)
5679 %res11 = and i8 %res1, %res2
5680 %res12 = and i8 %res3, %res4
5681 %res13 = and i8 %res11, %res12
5685 declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16)
5687 define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
5688 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4:
5690 ; CHECK-NEXT: kmovw %edi, %k1
5691 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
5692 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
5693 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
5695 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
5696 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
5697 %res2 = fadd <16 x float> %res, %res1
5698 ret <16 x float> %res2
5701 declare <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double>, <8 x double>, i32, <8 x double>, i8)
5703 define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
5704 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2:
5706 ; CHECK-NEXT: movzbl %dil, %eax
5707 ; CHECK-NEXT: kmovw %eax, %k1
5708 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
5709 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
5710 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
5711 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
5712 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
5714 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
5715 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
5716 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
5718 %res3 = fadd <8 x double> %res, %res1
5719 %res4 = fadd <8 x double> %res3, %res2
5720 ret <8 x double> %res4
5723 declare <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
5725 define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
5726 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4:
5728 ; CHECK-NEXT: kmovw %edi, %k1
5729 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
5730 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
5731 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
5733 %res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
5734 %res1 = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1)
5735 %res2 = add <16 x i32> %res, %res1
5736 ret <16 x i32> %res2
5739 declare <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
5741 define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
5742 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2:
5744 ; CHECK-NEXT: movzbl %dil, %eax
5745 ; CHECK-NEXT: kmovw %eax, %k1
5746 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
5747 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
5748 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
5750 %res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
5751 %res1 = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1)
5752 %res2 = add <8 x i64> %res, %res1
5756 declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
5758 define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
5759 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_512:
5761 ; CHECK-NEXT: movzbl %dil, %eax
5762 ; CHECK-NEXT: kmovw %eax, %k1
5763 ; CHECK-NEXT: vgetmantpd $11, %zmm0, %zmm1 {%k1}
5764 ; CHECK-NEXT: vgetmantpd $11, {sae}, %zmm0, %zmm0
5765 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
5767 %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %x3, i32 4)
5768 %res1 = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 -1, i32 8)
5769 %res2 = fadd <8 x double> %res, %res1
5770 ret <8 x double> %res2
5773 declare <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
5775 define <16 x float>@test_int_x86_avx512_mask_getmant_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
5776 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_512:
5778 ; CHECK-NEXT: kmovw %edi, %k1
5779 ; CHECK-NEXT: vgetmantps $11, %zmm0, %zmm1 {%k1}
5780 ; CHECK-NEXT: vgetmantps $11, {sae}, %zmm0, %zmm0
5781 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
5783 %res = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 %x3, i32 4)
5784 %res1 = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 8)
5785 %res2 = fadd <16 x float> %res, %res1
5786 ret <16 x float> %res2
5789 declare <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double>, <2 x double>, i32, <2 x double>, i8, i32)
5791 define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
5792 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sd:
5794 ; CHECK-NEXT: andl $1, %edi
5795 ; CHECK-NEXT: kmovw %edi, %k1
5796 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
5797 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1}
5798 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z}
5799 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5
5800 ; CHECK-NEXT: vgetmantsd $11, {sae}, %xmm1, %xmm0, %xmm2 {%k1}
5801 ; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm0
5802 ; CHECK-NEXT: vaddpd %xmm5, %xmm2, %xmm1
5803 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
5805 %res = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 4)
5806 %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> zeroinitializer, i8 %x3, i32 4)
5807 %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 8)
5808 %res3 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 -1, i32 4)
5809 %res11 = fadd <2 x double> %res, %res1
5810 %res12 = fadd <2 x double> %res2, %res3
5811 %res13 = fadd <2 x double> %res11, %res12
5812 ret <2 x double> %res13
5815 declare <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float>, <4 x float>, i32, <4 x float>, i8, i32)
5817 define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
5818 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ss:
5820 ; CHECK-NEXT: andl $1, %edi
5821 ; CHECK-NEXT: kmovw %edi, %k1
5822 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm2 {%k1}
5823 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3 {%k1} {z}
5824 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm4
5825 ; CHECK-NEXT: vgetmantss $11, {sae}, %xmm1, %xmm0, %xmm0
5826 ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1
5827 ; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0
5828 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
5830 %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 %x3, i32 4)
5831 %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> zeroinitializer, i8 %x3, i32 4)
5832 %res2 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 8)
5833 %res3 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 4)
5834 %res11 = fadd <4 x float> %res, %res1
5835 %res12 = fadd <4 x float> %res2, %res3
5836 %res13 = fadd <4 x float> %res11, %res12
5837 ret <4 x float> %res13
5840 declare <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8)
5842 define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
5843 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_512:
5845 ; CHECK-NEXT: movzbl %dil, %eax
5846 ; CHECK-NEXT: kmovw %eax, %k1
5847 ; CHECK-NEXT: vshufpd {{.*#+}} zmm2 = zmm2[0],k1[1],zmm2[3],k1[2],zmm2[5],k1[4],zmm2[6],k1[6]
5848 ; CHECK-NEXT: vshufpd {{.*#+}} zmm3 = k1[0],zmm0[1],k1[3],zmm0[2],k1[5],zmm0[4],k1[6],zmm0[6]
5849 ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
5850 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
5851 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
5853 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
5854 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
5855 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
5857 %res3 = fadd <8 x double> %res, %res1
5858 %res4 = fadd <8 x double> %res3, %res2
5859 ret <8 x double> %res4
5862 declare <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16)
5864 define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
5865 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_512:
5867 ; CHECK-NEXT: kmovw %edi, %k1
5868 ; CHECK-NEXT: vshufps {{.*#+}} zmm2 = zmm2[2,1],k1[1,0],zmm2[6,5],k1[5,4],zmm2[10,9],k1[9,8],zmm2[14,13],k1[13,12]
5869 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
5870 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
5872 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
5873 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
5874 %res2 = fadd <16 x float> %res, %res1
5875 ret <16 x float> %res2
5878 declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8)
5880 define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
5881 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
5883 ; CHECK-NEXT: movzbl %dil, %eax
5884 ; CHECK-NEXT: kmovw %eax, %k1
5885 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 = zmm1[0,1,3,2,5,4,6,6]
5886 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm2 = k1[0,1,3,2,5,4,6,6]
5887 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,3,2,5,4,6,6]
5888 ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1
5889 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
5891 %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3)
5892 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3)
5893 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1)
5894 %res3 = fadd <8 x double> %res, %res1
5895 %res4 = fadd <8 x double> %res3, %res2
5896 ret <8 x double> %res4
5899 declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16)
5901 define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
5902 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
5904 ; CHECK-NEXT: kmovw %edi, %k1
5905 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 = zmm1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
5906 ; CHECK-NEXT: vpermilps {{.*#+}} zmm2 = k1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
5907 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
5908 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1
5909 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
5911 %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3)
5912 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3)
5913 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1)
5914 %res3 = fadd <16 x float> %res, %res1
5915 %res4 = fadd <16 x float> %res3, %res2
5916 ret <16 x float> %res4
5919 declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
5921 define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
5922 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
5924 ; CHECK-NEXT: movzbl %dil, %eax
5925 ; CHECK-NEXT: kmovw %eax, %k1
5926 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1}
5927 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm3 {%k1} {z}
5928 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0
5929 ; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1
5930 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
5932 %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
5933 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
5934 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
5935 %res3 = fadd <8 x double> %res, %res1
5936 %res4 = fadd <8 x double> %res2, %res3
5937 ret <8 x double> %res4
5940 declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
5942 define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
5943 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
5945 ; CHECK-NEXT: kmovw %edi, %k1
5946 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1}
5947 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm3 {%k1} {z}
5948 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0
5949 ; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1
5950 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
5952 %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
5953 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
5954 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
5955 %res3 = fadd <16 x float> %res, %res1
5956 %res4 = fadd <16 x float> %res2, %res3
5957 ret <16 x float> %res4
5960 declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i8)
5962 define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i8 %x4) {
5963 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_512:
5965 ; CHECK-NEXT: kmovw %edi, %k1
5966 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
5967 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
5968 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0
5969 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
5970 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
5972 %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 %x4)
5973 %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 -1)
5974 %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> zeroinitializer, i8 %x4)
5975 %res3 = fadd <16 x float> %res, %res1
5976 %res4 = fadd <16 x float> %res2, %res3
5977 ret <16 x float> %res4
5980 declare <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32>, <4 x i32>, i32, <16 x i32>, i8)
5982 define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i8 %x4) {
5983 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_512:
5985 ; CHECK-NEXT: kmovw %edi, %k1
5986 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
5987 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
5988 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0
5989 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
5990 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
5992 %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 %x4)
5993 %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 -1)
5994 %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i8 %x4)
5995 %res3 = add <16 x i32> %res, %res1
5996 %res4 = add <16 x i32> %res2, %res3
5997 ret <16 x i32> %res4
6000 declare <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double>, <4 x double>, i32, <8 x double>, i8)
6002 define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) {
6003 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x4_512:
6005 ; CHECK-NEXT: movzbl %dil, %eax
6006 ; CHECK-NEXT: kmovw %eax, %k1
6007 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
6008 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
6009 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
6010 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
6011 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
6013 %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
6014 %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
6015 %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
6016 %res3 = fadd <8 x double> %res, %res1
6017 %res4 = fadd <8 x double> %res2, %res3
6018 ret <8 x double> %res4
6021 declare <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64>, <4 x i64>, i32, <8 x i64>, i8)
6023 define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) {
6024 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x4_512:
6026 ; CHECK-NEXT: movzbl %dil, %eax
6027 ; CHECK-NEXT: kmovw %eax, %k1
6028 ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
6029 ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
6030 ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
6031 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
6032 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
6034 %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
6035 %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
6036 %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
6037 %res3 = add <8 x i64> %res, %res1
6038 %res4 = add <8 x i64> %res2, %res3
6042 declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float>, <4 x float>, <2 x double>, i8, i32)
6044 define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<4 x float> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) {
6045 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ss2sd_round:
6047 ; CHECK-NEXT: andl $1, %edi
6048 ; CHECK-NEXT: kmovw %edi, %k1
6049 ; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm2 {%k1}
6050 ; CHECK-NEXT: vcvtss2sd {sae}, %xmm1, %xmm0, %xmm0
6051 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
6053 %res = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 %x3, i32 4)
6054 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 -1, i32 8)
6055 %res2 = fadd <2 x double> %res, %res1
6056 ret <2 x double> %res2
6059 declare <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double>, <2 x double>, <4 x float>, i8, i32)
6061 define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<2 x double> %x0,<2 x double> %x1, <4 x float> %x2, i8 %x3) {
6062 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_sd2ss_round:
6064 ; CHECK-NEXT: andl $1, %edi
6065 ; CHECK-NEXT: kmovw %edi, %k1
6066 ; CHECK-NEXT: vcvtsd2ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
6067 ; CHECK-NEXT: vcvtsd2ss {rn-sae}, %xmm1, %xmm0, %xmm0
6068 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
6070 %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 3)
6071 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 -1, i32 8)
6072 %res2 = fadd <4 x float> %res, %res1
6073 ret <4 x float> %res2
6076 declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
6078 define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
6079 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
6081 ; CHECK-NEXT: kmovw %edi, %k1
6082 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
6083 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1}
6084 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
6085 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
6087 %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
6088 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
6089 %res2 = add <16 x i32> %res, %res1
6090 ret <16 x i32> %res2
6093 declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
6095 define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
6096 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
6098 ; CHECK-NEXT: kmovw %edi, %k1
6099 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
6100 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
6101 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
6102 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
6104 %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
6105 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
6106 %res2 = add <16 x i32> %res, %res1
6107 ret <16 x i32> %res2
6110 declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
6112 define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
6113 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
6115 ; CHECK-NEXT: movzbl %dil, %eax
6116 ; CHECK-NEXT: kmovw %eax, %k1
6117 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
6118 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1}
6119 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
6120 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
6122 %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
6123 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
6124 %res2 = add <8 x i64> %res, %res1
6128 declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
6130 define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
6131 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
6133 ; CHECK-NEXT: movzbl %dil, %eax
6134 ; CHECK-NEXT: kmovw %eax, %k1
6135 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
6136 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
6137 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
6138 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
6140 %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
6141 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
6142 %res2 = add <8 x i64> %res, %res1
6146 declare <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float>, <16 x float>, i16)
6148 define <16 x float>@test_int_x86_avx512_mask_movsldup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
6149 ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_512:
6151 ; CHECK-NEXT: kmovw %edi, %k1
6152 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
6153 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
6154 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
6155 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
6156 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
6158 %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
6159 %res1 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
6160 %res2 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
6161 %res3 = fadd <16 x float> %res, %res1
6162 %res4 = fadd <16 x float> %res2, %res3
6163 ret <16 x float> %res4
6166 declare <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float>, <16 x float>, i16)
6168 define <16 x float>@test_int_x86_avx512_mask_movshdup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
6169 ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_512:
6171 ; CHECK-NEXT: kmovw %edi, %k1
6172 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
6173 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm2 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
6174 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
6175 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
6176 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
6178 %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
6179 %res1 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
6180 %res2 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
6181 %res3 = fadd <16 x float> %res, %res1
6182 %res4 = fadd <16 x float> %res2, %res3
6183 ret <16 x float> %res4
6186 declare <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double>, <8 x double>, i8)
6188 define <8 x double>@test_int_x86_avx512_mask_movddup_512(<8 x double> %x0, <8 x double> %x1, i8 %x2) {
6189 ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_512:
6191 ; CHECK-NEXT: movzbl %dil, %eax
6192 ; CHECK-NEXT: kmovw %eax, %k1
6193 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6]
6194 ; CHECK-NEXT: vmovddup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6]
6195 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
6196 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
6197 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
6199 %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 %x2)
6200 %res1 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 -1)
6201 %res2 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> zeroinitializer, i8 %x2)
6202 %res3 = fadd <8 x double> %res, %res1
6203 %res4 = fadd <8 x double> %res2, %res3
6204 ret <8 x double> %res4
6207 define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
6208 ; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae:
6210 ; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0
6211 ; CHECK-NEXT: sete %al
6212 ; CHECK-NEXT: movzbl %al, %eax
6214 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8)
6218 define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
6219 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae:
6221 ; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0
6222 ; CHECK-NEXT: sete %al
6223 ; CHECK-NEXT: movzbl %al, %eax
6225 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8)
6229 define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
6230 ; CHECK-LABEL: test_x86_avx512_comi_sd_eq:
6232 ; CHECK-NEXT: vcomisd %xmm1, %xmm0
6233 ; CHECK-NEXT: sete %al
6234 ; CHECK-NEXT: movzbl %al, %eax
6236 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4)
6240 define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
6241 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq:
6243 ; CHECK-NEXT: vucomisd %xmm1, %xmm0
6244 ; CHECK-NEXT: sete %al
6245 ; CHECK-NEXT: movzbl %al, %eax
6247 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4)
6251 define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
6252 ; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae:
6254 ; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0
6255 ; CHECK-NEXT: sbbl %eax, %eax
6256 ; CHECK-NEXT: andl $1, %eax
6258 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8)
6262 define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
6263 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae:
6265 ; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0
6266 ; CHECK-NEXT: sbbl %eax, %eax
6267 ; CHECK-NEXT: andl $1, %eax
6269 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8)
6273 define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
6274 ; CHECK-LABEL: test_x86_avx512_comi_sd_lt:
6276 ; CHECK-NEXT: vcomisd %xmm1, %xmm0
6277 ; CHECK-NEXT: sbbl %eax, %eax
6278 ; CHECK-NEXT: andl $1, %eax
6280 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4)
6284 define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
6285 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt:
6287 ; CHECK-NEXT: vucomisd %xmm1, %xmm0
6288 ; CHECK-NEXT: sbbl %eax, %eax
6289 ; CHECK-NEXT: andl $1, %eax
6291 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4)
6295 declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32)
6297 define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) {
6298 ; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt:
6300 ; CHECK-NEXT: vucomiss %xmm1, %xmm0
6301 ; CHECK-NEXT: sbbl %eax, %eax
6302 ; CHECK-NEXT: andl $1, %eax
6304 %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4)
6308 declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32)
6309 declare <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float>, <4 x float>, <4 x float>, i8)
6311 define <4 x float>@test_int_x86_avx512_mask_move_ss_rrk(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
6312 ; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrk:
6314 ; CHECK-NEXT: andl $1, %edi
6315 ; CHECK-NEXT: kmovw %edi, %k1
6316 ; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1}
6317 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
6319 %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
6320 ret <4 x float> %res
6323 define <4 x float>@test_int_x86_avx512_mask_move_ss_rrkz(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
6324 ; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrkz:
6326 ; CHECK-NEXT: andl $1, %edi
6327 ; CHECK-NEXT: kmovw %edi, %k1
6328 ; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z}
6330 %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x2)
6331 ret <4 x float> %res
6334 define <4 x float>@test_int_x86_avx512_mask_move_ss_rr(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
6335 ; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rr:
6337 ; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0
6339 %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 -1)
6340 ret <4 x float> %res
6343 declare <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double>, <2 x double>, <2 x double>, i8)
6344 define <2 x double>@test_int_x86_avx512_mask_move_sd_rr(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
6345 ; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rr:
6347 ; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0
6349 %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 -1)
6350 ret <2 x double> %res
6353 define <2 x double>@test_int_x86_avx512_mask_move_sd_rrkz(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
6354 ; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrkz:
6356 ; CHECK-NEXT: andl $1, %edi
6357 ; CHECK-NEXT: kmovw %edi, %k1
6358 ; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z}
6360 %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 %x2)
6361 ret <2 x double> %res
6364 define <2 x double>@test_int_x86_avx512_mask_move_sd_rrk(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
6365 ; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrk:
6367 ; CHECK-NEXT: andl $1, %edi
6368 ; CHECK-NEXT: kmovw %edi, %k1
6369 ; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1}
6370 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
6372 %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
6373 ret <2 x double> %res
6376 declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float>, <16 x float>, i16)
6378 define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512(<4 x float> %x0, <16 x float> %x2, i16 %mask) {
6379 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512:
6380 ; CHECK: kmovw %edi, %k1
6381 ; CHECK: vshuff32x4 $0, %zmm0, %zmm0, %zmm2 {%k1} {z}
6382 ; CHECK: vshuff32x4 $0, %zmm0, %zmm0, %zmm1 {%k1}
6383 ; CHECK: vshuff32x4 $0, %zmm0, %zmm0, %zmm0
6384 ; CHECK: vaddps %zmm1, %zmm0, %zmm0
6385 ; CHECK: vaddps %zmm0, %zmm2, %zmm0
6387 %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 -1)
6388 %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 %mask)
6389 %res3 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %mask)
6390 %res4 = fadd <16 x float> %res1, %res2
6391 %res5 = fadd <16 x float> %res3, %res4
6392 ret <16 x float> %res5
6395 declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double>, <8 x double>, i8)
6397 define <8 x double>@test_int_x86_avx512_mask_broadcastf64x4_512(<4 x double> %x0, <8 x double> %x2, i8 %mask) {
6398 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512:
6399 ; CHECK: kmovw %eax, %k1
6400 ; CHECK: vshuff64x2 $68, %zmm0, %zmm0, %zmm2 {%k1} {z}
6401 ; CHECK: vshuff64x2 $68, %zmm0, %zmm0, %zmm1 {%k1}
6402 ; CHECK: vshuff64x2 $68, %zmm0, %zmm0, %zmm0
6403 ; CHECK: vaddpd %zmm1, %zmm0, %zmm0
6404 ; CHECK: vaddpd %zmm0, %zmm2, %zmm0
6406 %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 -1)
6407 %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 %mask)
6408 %res3 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> zeroinitializer, i8 %mask)
6409 %res4 = fadd <8 x double> %res1, %res2
6410 %res5 = fadd <8 x double> %res3, %res4
6411 ret <8 x double> %res5
6414 declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32>, <16 x i32>, i16)
6416 define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x4_512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask) {
6417 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512:
6418 ; CHECK: kmovw %edi, %k1
6419 ; CHECK: vshufi32x4 $0, %zmm0, %zmm0, %zmm2 {%k1} {z}
6420 ; CHECK: vshufi32x4 $0, %zmm0, %zmm0, %zmm1 {%k1}
6421 ; CHECK: vshufi32x4 $0, %zmm0, %zmm0, %zmm0
6422 ; CHECK: vpaddd %zmm1, %zmm0, %zmm0
6423 ; CHECK: vpaddd %zmm0, %zmm2, %zmm0
6425 %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1)
6426 %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask)
6427 %res3 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
6428 %res4 = add <16 x i32> %res1, %res2
6429 %res5 = add <16 x i32> %res3, %res4
6430 ret <16 x i32> %res5
6433 declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64>, <8 x i64>, i8)
6435 define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x4_512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask) {
6436 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512:
6437 ; CHECK: kmovw %eax, %k1
6438 ; CHECK: vshufi64x2 $68, %zmm0, %zmm0, %zmm2 {%k1} {z}
6439 ; CHECK: vshufi64x2 $68, %zmm0, %zmm0, %zmm1 {%k1}
6440 ; CHECK: vshufi64x2 $68, %zmm0, %zmm0, %zmm0
6441 ; CHECK: vpaddq %zmm1, %zmm0, %zmm0
6442 ; CHECK: vpaddq %zmm0, %zmm2, %zmm0
6444 %res1 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 -1)
6445 %res2 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask)
6446 %res3 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> zeroinitializer, i8 %mask)
6447 %res4 = add <8 x i64> %res1, %res2
6448 %res5 = add <8 x i64> %res3, %res4
6452 declare <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64>, i8, <8 x i64>, i8)
6454 define <8 x i64>@test_int_x86_avx512_mask_psrl_qi_512(<8 x i64> %x0, i8 %x1, <8 x i64> %x2, i8 %x3) {
6455 ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_qi_512:
6457 ; CHECK-NEXT: movzbl %sil, %eax
6458 ; CHECK-NEXT: kmovw %eax, %k1
6459 ; CHECK-NEXT: vpsrlq $255, %zmm0, %zmm1 {%k1}
6460 ; CHECK-NEXT: vpsrlq $255, %zmm0, %zmm2 {%k1} {z}
6461 ; CHECK-NEXT: vpsrlq $255, %zmm0, %zmm0
6462 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
6463 ; CHECK-NEXT: vpaddq %zmm2, %zmm0, %zmm0
6465 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i8 255, <8 x i64> %x2, i8 %x3)
6466 %res1 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i8 255, <8 x i64> %x2, i8 -1)
6467 %res2 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i8 255, <8 x i64> zeroinitializer, i8 %x3)
6468 %res3 = add <8 x i64> %res, %res1
6469 %res4 = add <8 x i64> %res3, %res2
6473 declare <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32>, i8, <16 x i32>, i16)
6475 define <16 x i32>@test_int_x86_avx512_mask_psrl_di_512(<16 x i32> %x0, i8 %x1, <16 x i32> %x2, i16 %x3) {
6476 ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_di_512:
6478 ; CHECK-NEXT: kmovw %esi, %k1
6479 ; CHECK-NEXT: vpsrld $255, %zmm0, %zmm1 {%k1}
6480 ; CHECK-NEXT: vpsrld $255, %zmm0, %zmm2 {%k1} {z}
6481 ; CHECK-NEXT: vpsrld $255, %zmm0, %zmm0
6482 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
6483 ; CHECK-NEXT: vpaddd %zmm2, %zmm0, %zmm0
6485 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i8 255, <16 x i32> %x2, i16 %x3)
6486 %res1 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i8 255, <16 x i32> %x2, i16 -1)
6487 %res2 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i8 255, <16 x i32> zeroinitializer, i16 %x3)
6488 %res3 = add <16 x i32> %res, %res1
6489 %res4 = add <16 x i32> %res3, %res2
6490 ret <16 x i32> %res4
6493 declare <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32>, i8, <16 x i32>, i16)
6495 define <16 x i32>@test_int_x86_avx512_mask_psra_di_512(<16 x i32> %x0, i8 %x1, <16 x i32> %x2, i16 %x3) {
6496 ; CHECK-LABEL: test_int_x86_avx512_mask_psra_di_512:
6498 ; CHECK-NEXT: kmovw %esi, %k1
6499 ; CHECK-NEXT: vpsrad $3, %zmm0, %zmm1 {%k1}
6500 ; CHECK-NEXT: vpsrad $3, %zmm0, %zmm2 {%k1} {z}
6501 ; CHECK-NEXT: vpsrad $3, %zmm0, %zmm0
6502 ; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1
6503 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
6505 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i8 3, <16 x i32> %x2, i16 %x3)
6506 %res1 = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i8 3, <16 x i32> zeroinitializer, i16 %x3)
6507 %res2 = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i8 3, <16 x i32> %x2, i16 -1)
6508 %res3 = add <16 x i32> %res, %res1
6509 %res4 = add <16 x i32> %res3, %res2
6510 ret <16 x i32> %res4
6513 declare <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64>, i8, <8 x i64>, i8)
6515 define <8 x i64>@test_int_x86_avx512_mask_psra_qi_512(<8 x i64> %x0, i8 %x1, <8 x i64> %x2, i8 %x3) {
6516 ; CHECK-LABEL: test_int_x86_avx512_mask_psra_qi_512:
6518 ; CHECK-NEXT: movzbl %sil, %eax
6519 ; CHECK-NEXT: kmovw %eax, %k1
6520 ; CHECK-NEXT: vpsraq $3, %zmm0, %zmm1 {%k1}
6521 ; CHECK-NEXT: vpsraq $3, %zmm0, %zmm2 {%k1} {z}
6522 ; CHECK-NEXT: vpsraq $3, %zmm0, %zmm0
6523 ; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1
6524 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
6526 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i8 3, <8 x i64> %x2, i8 %x3)
6527 %res1 = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i8 3, <8 x i64> zeroinitializer, i8 %x3)
6528 %res2 = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i8 3, <8 x i64> %x2, i8 -1)
6529 %res3 = add <8 x i64> %res, %res1
6530 %res4 = add <8 x i64> %res3, %res2
6534 declare <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32>, i8, <16 x i32>, i16)
6536 define <16 x i32>@test_int_x86_avx512_mask_psll_di_512(<16 x i32> %x0, i8 %x1, <16 x i32> %x2, i16 %x3) {
6537 ; CHECK-LABEL: test_int_x86_avx512_mask_psll_di_512:
6539 ; CHECK-NEXT: kmovw %esi, %k1
6540 ; CHECK-NEXT: vpslld $3, %zmm0, %zmm1 {%k1}
6541 ; CHECK-NEXT: vpslld $3, %zmm0, %zmm2 {%k1} {z}
6542 ; CHECK-NEXT: vpslld $3, %zmm0, %zmm0
6543 ; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1
6544 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
6546 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i8 3, <16 x i32> %x2, i16 %x3)
6547 %res1 = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i8 3, <16 x i32> zeroinitializer, i16 %x3)
6548 %res2 = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i8 3, <16 x i32> %x2, i16 -1)
6549 %res3 = add <16 x i32> %res, %res1
6550 %res4 = add <16 x i32> %res3, %res2
6551 ret <16 x i32> %res4
6554 declare <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64>, i8, <8 x i64>, i8)
6556 define <8 x i64>@test_int_x86_avx512_mask_psll_qi_512(<8 x i64> %x0, i8 %x1, <8 x i64> %x2, i8 %x3) {
6557 ; CHECK-LABEL: test_int_x86_avx512_mask_psll_qi_512:
6559 ; CHECK-NEXT: movzbl %sil, %eax
6560 ; CHECK-NEXT: kmovw %eax, %k1
6561 ; CHECK-NEXT: vpsllq $3, %zmm0, %zmm1 {%k1}
6562 ; CHECK-NEXT: vpsllq $3, %zmm0, %zmm2 {%k1} {z}
6563 ; CHECK-NEXT: vpsllq $3, %zmm0, %zmm0
6564 ; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1
6565 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
6567 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i8 3, <8 x i64> %x2, i8 %x3)
6568 %res1 = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i8 3, <8 x i64> zeroinitializer, i8 %x3)
6569 %res2 = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i8 3, <8 x i64> %x2, i8 -1)
6570 %res3 = add <8 x i64> %res, %res1
6571 %res4 = add <8 x i64> %res3, %res2
6575 declare <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32>, i16, <16 x i32>, i8)
6577 define <16 x i32>@test_int_x86_avx512_mask_pshuf_d_512(<16 x i32> %x0, i16 %x1, <16 x i32> %x2, i8 %x3) {
6578 ; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_d_512:
6580 ; CHECK-NEXT: kmovw %esi, %k1
6581 ; CHECK-NEXT: vpshufd $3, %zmm0, %zmm1 {%k1}
6582 ; CHECK-NEXT: vpshufd $3, %zmm0, %zmm2 {%k1} {z}
6583 ; CHECK-NEXT: vpshufd $3, %zmm0, %zmm0
6584 ; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1
6585 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
6587 %res = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i16 3, <16 x i32> %x2, i8 %x3)
6588 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i16 3, <16 x i32> zeroinitializer, i8 %x3)
6589 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i16 3, <16 x i32> %x2, i8 -1)
6590 %res3 = add <16 x i32> %res, %res1
6591 %res4 = add <16 x i32> %res3, %res2
6592 ret <16 x i32> %res4
6595 declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
6597 define <16 x i32>@test_int_x86_avx512_mask_prorv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
6598 ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_512:
6600 ; CHECK-NEXT: kmovw %edi, %k1
6601 ; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm2 {%k1}
6602 ; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm3 {%k1} {z}
6603 ; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm0
6604 ; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm1
6605 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
6607 %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
6608 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
6609 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
6610 %res3 = add <16 x i32> %res, %res1
6611 %res4 = add <16 x i32> %res3, %res2
6612 ret <16 x i32> %res4
6615 declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
6617 define <8 x i64>@test_int_x86_avx512_mask_prorv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
6618 ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_512:
6620 ; CHECK-NEXT: movzbl %dil, %eax
6621 ; CHECK-NEXT: kmovw %eax, %k1
6622 ; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm2 {%k1}
6623 ; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm3 {%k1} {z}
6624 ; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm0
6625 ; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1
6626 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
6628 %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
6629 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
6630 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
6631 %res3 = add <8 x i64> %res, %res1
6632 %res4 = add <8 x i64> %res3, %res2
6636 declare <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32>, i8, <16 x i32>, i16)
6638 define <16 x i32>@test_int_x86_avx512_mask_prol_d_512(<16 x i32> %x0, i8 %x1, <16 x i32> %x2, i16 %x3) {
6639 ; CHECK-LABEL: test_int_x86_avx512_mask_prol_d_512:
6641 ; CHECK-NEXT: kmovw %esi, %k1
6642 ; CHECK-NEXT: vprold $3, %zmm0, %zmm1 {%k1}
6643 ; CHECK-NEXT: vprold $3, %zmm0, %zmm2 {%k1} {z}
6644 ; CHECK-NEXT: vprold $3, %zmm0, %zmm0
6645 ; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1
6646 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
6648 %res = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i8 3, <16 x i32> %x2, i16 %x3)
6649 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i8 3, <16 x i32> zeroinitializer, i16 %x3)
6650 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i8 3, <16 x i32> %x2, i16 -1)
6651 %res3 = add <16 x i32> %res, %res1
6652 %res4 = add <16 x i32> %res3, %res2
6653 ret <16 x i32> %res4
6656 declare <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64>, i8, <8 x i64>, i8)
6658 define <8 x i64>@test_int_x86_avx512_mask_prol_q_512(<8 x i64> %x0, i8 %x1, <8 x i64> %x2, i8 %x3) {
6659 ; CHECK-LABEL: test_int_x86_avx512_mask_prol_q_512:
6661 ; CHECK-NEXT: movzbl %sil, %eax
6662 ; CHECK-NEXT: kmovw %eax, %k1
6663 ; CHECK-NEXT: vprolq $3, %zmm0, %zmm1 {%k1}
6664 ; CHECK-NEXT: vprolq $3, %zmm0, %zmm2 {%k1} {z}
6665 ; CHECK-NEXT: vprolq $3, %zmm0, %zmm0
6666 ; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1
6667 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
6669 %res = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i8 3, <8 x i64> %x2, i8 %x3)
6670 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i8 3, <8 x i64> zeroinitializer, i8 %x3)
6671 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i8 3, <8 x i64> %x2, i8 -1)
6672 %res3 = add <8 x i64> %res, %res1
6673 %res4 = add <8 x i64> %res3, %res2
6677 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
6679 declare <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8>, <16 x i32>, i16)
6681 define <16 x i32>@test_int_x86_avx512_mask_pmovzxb_d_512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) {
6682 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_d_512:
6684 ; CHECK-NEXT: kmovw %edi, %k1
6685 ; CHECK-NEXT: vpmovzxbd %xmm0, %zmm1 {%k1}
6686 ; CHECK-NEXT: vpmovzxbd %xmm0, %zmm2 {%k1} {z}
6687 ; CHECK-NEXT: vpmovzxbd %xmm0, %zmm0
6688 ; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1
6689 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
6691 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2)
6692 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> zeroinitializer, i16 %x2)
6693 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 -1)
6694 %res3 = add <16 x i32> %res, %res1
6695 %res4 = add <16 x i32> %res3, %res2
6696 ret <16 x i32> %res4
6699 declare <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8>, <8 x i64>, i8)
6701 define <8 x i64>@test_int_x86_avx512_mask_pmovzxb_q_512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) {
6702 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_q_512:
6704 ; CHECK-NEXT: movzbl %dil, %eax
6705 ; CHECK-NEXT: kmovw %eax, %k1
6706 ; CHECK-NEXT: vpmovzxbq %xmm0, %zmm1 {%k1}
6707 ; CHECK-NEXT: vpmovzxbq %xmm0, %zmm2 {%k1} {z}
6708 ; CHECK-NEXT: vpmovzxbq %xmm0, %zmm0
6709 ; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1
6710 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
6712 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2)
6713 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> zeroinitializer, i8 %x2)
6714 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 -1)
6715 %res3 = add <8 x i64> %res, %res1
6716 %res4 = add <8 x i64> %res3, %res2
6720 declare <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32>, <8 x i64>, i8)
6722 define <8 x i64>@test_int_x86_avx512_mask_pmovzxd_q_512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) {
6723 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxd_q_512:
6725 ; CHECK-NEXT: movzbl %dil, %eax
6726 ; CHECK-NEXT: kmovw %eax, %k1
6727 ; CHECK-NEXT: vpmovzxdq %ymm0, %zmm1 {%k1}
6728 ; CHECK-NEXT: vpmovzxdq %ymm0, %zmm2 {%k1} {z}
6729 ; CHECK-NEXT: vpmovzxdq %ymm0, %zmm0
6730 ; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1
6731 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
6733 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2)
6734 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> zeroinitializer, i8 %x2)
6735 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 -1)
6736 %res3 = add <8 x i64> %res, %res1
6737 %res4 = add <8 x i64> %res3, %res2
6741 declare <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16>, <16 x i32>, i16)
6743 define <16 x i32>@test_int_x86_avx512_mask_pmovzxw_d_512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) {
6744 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_d_512:
6746 ; CHECK-NEXT: kmovw %edi, %k1
6747 ; CHECK-NEXT: vpmovzxwd %ymm0, %zmm1 {%k1}
6748 ; CHECK-NEXT: vpmovzxwd %ymm0, %zmm2 {%k1} {z}
6749 ; CHECK-NEXT: vpmovzxwd %ymm0, %zmm0
6750 ; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1
6751 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
6753 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2)
6754 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> zeroinitializer, i16 %x2)
6755 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 -1)
6756 %res3 = add <16 x i32> %res, %res1
6757 %res4 = add <16 x i32> %res3, %res2
6758 ret <16 x i32> %res4
6761 declare <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16>, <8 x i64>, i8)
6763 define <8 x i64>@test_int_x86_avx512_mask_pmovzxw_q_512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) {
6764 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_q_512:
6766 ; CHECK-NEXT: movzbl %dil, %eax
6767 ; CHECK-NEXT: kmovw %eax, %k1
6768 ; CHECK-NEXT: vpmovzxwq %xmm0, %zmm1 {%k1}
6769 ; CHECK-NEXT: vpmovzxwq %xmm0, %zmm2 {%k1} {z}
6770 ; CHECK-NEXT: vpmovzxwq %xmm0, %zmm0
6771 ; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1
6772 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
6774 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2)
6775 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> zeroinitializer, i8 %x2)
6776 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 -1)
6777 %res3 = add <8 x i64> %res, %res1
6778 %res4 = add <8 x i64> %res3, %res2