1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
4 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
5 define i32 @test_kortestz(i16 %a0, i16 %a1) {
6 ; CHECK-LABEL: test_kortestz:
8 ; CHECK-NEXT: kmovw %esi, %k0
9 ; CHECK-NEXT: kmovw %edi, %k1
10 ; CHECK-NEXT: kortestw %k0, %k1
11 ; CHECK-NEXT: sete %al
12 ; CHECK-NEXT: kmovw %eax, %k0
13 ; CHECK-NEXT: kmovw %k0, %eax
14 ; CHECK-NEXT: andl $1, %eax
16 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
20 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
21 define i32 @test_kortestc(i16 %a0, i16 %a1) {
22 ; CHECK-LABEL: test_kortestc:
24 ; CHECK-NEXT: kmovw %esi, %k0
25 ; CHECK-NEXT: kmovw %edi, %k1
26 ; CHECK-NEXT: kortestw %k0, %k1
27 ; CHECK-NEXT: sbbl %eax, %eax
28 ; CHECK-NEXT: andl $1, %eax
30 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
34 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
35 define i16 @test_kand(i16 %a0, i16 %a1) {
36 ; CHECK-LABEL: test_kand:
38 ; CHECK-NEXT: movw $8, %ax
39 ; CHECK-NEXT: kmovw %eax, %k0
40 ; CHECK-NEXT: kmovw %edi, %k1
41 ; CHECK-NEXT: kandw %k0, %k1, %k0
42 ; CHECK-NEXT: kmovw %esi, %k1
43 ; CHECK-NEXT: kandw %k1, %k0, %k0
44 ; CHECK-NEXT: kmovw %k0, %eax
46 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
47 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
51 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
52 define i16 @test_knot(i16 %a0) {
53 ; CHECK-LABEL: test_knot:
55 ; CHECK-NEXT: kmovw %edi, %k0
56 ; CHECK-NEXT: knotw %k0, %k0
57 ; CHECK-NEXT: kmovw %k0, %eax
59 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
63 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
65 define i16 @unpckbw_test(i16 %a0, i16 %a1) {
66 ; CHECK-LABEL: unpckbw_test:
68 ; CHECK-NEXT: kmovw %edi, %k0
69 ; CHECK-NEXT: kmovw %esi, %k1
70 ; CHECK-NEXT: kunpckbw %k1, %k0, %k0
71 ; CHECK-NEXT: kmovw %k0, %eax
73 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
77 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
78 ; CHECK-LABEL: test_rcp_ps_512:
80 ; CHECK-NEXT: vrcp14ps %zmm0, %zmm0
82 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
85 declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
87 define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
88 ; CHECK-LABEL: test_rcp_pd_512:
90 ; CHECK-NEXT: vrcp14pd %zmm0, %zmm0
92 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
95 declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
97 declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
99 define <8 x double> @test7(<8 x double> %a) {
100 ; CHECK-LABEL: test7:
102 ; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0
104 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
108 declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
110 define <16 x float> @test8(<16 x float> %a) {
111 ; CHECK-LABEL: test8:
113 ; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0
115 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
119 define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
120 ; CHECK-LABEL: test_rsqrt_ps_512:
122 ; CHECK-NEXT: vrsqrt14ps %zmm0, %zmm0
124 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
125 ret <16 x float> %res
127 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
129 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
130 ; CHECK-LABEL: test_rsqrt14_ss:
132 ; CHECK-NEXT: vrsqrt14ss %xmm0, %xmm0, %xmm0
134 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
137 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
139 define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
140 ; CHECK-LABEL: test_rcp14_ss:
142 ; CHECK-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0
144 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
147 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
149 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
150 ; CHECK-LABEL: test_sqrt_pd_512:
152 ; CHECK-NEXT: vsqrtpd %zmm0, %zmm0
154 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
155 ret <8 x double> %res
157 declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
159 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
160 ; CHECK-LABEL: test_sqrt_ps_512:
162 ; CHECK-NEXT: vsqrtps %zmm0, %zmm0
164 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
165 ret <16 x float> %res
167 define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
168 ; CHECK-LABEL: test_sqrt_round_ps_512:
170 ; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0
172 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3)
173 ret <16 x float> %res
175 declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
177 define <8 x double> @test_getexp_pd_512(<8 x double> %a0) {
178 ; CHECK-LABEL: test_getexp_pd_512:
180 ; CHECK-NEXT: vgetexppd %zmm0, %zmm0
182 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
183 ret <8 x double> %res
185 define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) {
186 ; CHECK-LABEL: test_getexp_round_pd_512:
188 ; CHECK-NEXT: vgetexppd {sae}, %zmm0, %zmm0
190 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
191 ret <8 x double> %res
193 declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
195 define <16 x float> @test_getexp_ps_512(<16 x float> %a0) {
196 ; CHECK-LABEL: test_getexp_ps_512:
198 ; CHECK-NEXT: vgetexpps %zmm0, %zmm0
200 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
201 ret <16 x float> %res
204 define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) {
205 ; CHECK-LABEL: test_getexp_round_ps_512:
207 ; CHECK-NEXT: vgetexpps {sae}, %zmm0, %zmm0
209 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
210 ret <16 x float> %res
212 declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
214 declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
216 define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
217 ; CHECK-LABEL: test_sqrt_ss:
219 ; CHECK-NEXT: andl $1, %edi
220 ; CHECK-NEXT: kmovw %edi, %k1
221 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
222 ; CHECK-NEXT: vsqrtss %xmm1, %xmm0, %xmm3 {%k1}
223 ; CHECK-NEXT: vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
224 ; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
225 ; CHECK-NEXT: vsqrtss {rz-sae}, %xmm1, %xmm0, %xmm0
226 ; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1
227 ; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0
228 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
230 %res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
231 %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
232 %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 2)
233 %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 3)
235 %res.1 = fadd <4 x float> %res0, %res1
236 %res.2 = fadd <4 x float> %res2, %res3
237 %res = fadd <4 x float> %res.1, %res.2
241 declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
243 define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
244 ; CHECK-LABEL: test_sqrt_sd:
246 ; CHECK-NEXT: andl $1, %edi
247 ; CHECK-NEXT: kmovw %edi, %k1
248 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
249 ; CHECK-NEXT: vsqrtsd %xmm1, %xmm0, %xmm3 {%k1}
250 ; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
251 ; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
252 ; CHECK-NEXT: vsqrtsd {rz-sae}, %xmm1, %xmm0, %xmm0
253 ; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1
254 ; CHECK-NEXT: vaddpd %xmm0, %xmm4, %xmm0
255 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
257 %res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
258 %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
259 %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 2)
260 %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 3)
262 %res.1 = fadd <2 x double> %res0, %res1
263 %res.2 = fadd <2 x double> %res2, %res3
264 %res = fadd <2 x double> %res.1, %res.2
265 ret <2 x double> %res
268 define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
269 ; CHECK-LABEL: test_x86_sse2_cvtsd2si64:
271 ; CHECK-NEXT: vcvtsd2si %xmm0, %rax
273 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
276 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
278 define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
279 ; CHECK-LABEL: test_x86_sse2_cvtsi642sd:
281 ; CHECK-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0
283 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
284 ret <2 x double> %res
286 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
288 define i64 @test_x86_avx512_cvttsd2si64(<2 x double> %a0) {
289 ; CHECK-LABEL: test_x86_avx512_cvttsd2si64:
291 ; CHECK-NEXT: vcvttsd2si %xmm0, %rcx
292 ; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %rax
293 ; CHECK-NEXT: addq %rcx, %rax
295 %res0 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 4) ;
296 %res1 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 8) ;
297 %res2 = add i64 %res0, %res1
300 declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32) nounwind readnone
302 define i32 @test_x86_avx512_cvttsd2usi(<2 x double> %a0) {
303 ; CHECK-LABEL: test_x86_avx512_cvttsd2usi:
305 ; CHECK-NEXT: vcvttsd2usi %xmm0, %ecx
306 ; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %eax
307 ; CHECK-NEXT: addl %ecx, %eax
309 %res0 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 4) ;
310 %res1 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 8) ;
311 %res2 = add i32 %res0, %res1
314 declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone
316 define i32 @test_x86_avx512_cvttsd2si(<2 x double> %a0) {
317 ; CHECK-LABEL: test_x86_avx512_cvttsd2si:
319 ; CHECK-NEXT: vcvttsd2si %xmm0, %ecx
320 ; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %eax
321 ; CHECK-NEXT: addl %ecx, %eax
323 %res0 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 4) ;
324 %res1 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 8) ;
325 %res2 = add i32 %res0, %res1
328 declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone
332 define i64 @test_x86_avx512_cvttsd2usi64(<2 x double> %a0) {
333 ; CHECK-LABEL: test_x86_avx512_cvttsd2usi64:
335 ; CHECK-NEXT: vcvttsd2usi %xmm0, %rcx
336 ; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %rax
337 ; CHECK-NEXT: addq %rcx, %rax
339 %res0 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 4) ;
340 %res1 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 8) ;
341 %res2 = add i64 %res0, %res1
344 declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) nounwind readnone
346 define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
347 ; CHECK-LABEL: test_x86_sse_cvtss2si64:
349 ; CHECK-NEXT: vcvtss2si %xmm0, %rax
351 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
354 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
357 define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
358 ; CHECK-LABEL: test_x86_sse_cvtsi642ss:
360 ; CHECK-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0
362 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
365 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
368 define i32 @test_x86_avx512_cvttss2si(<4 x float> %a0) {
369 ; CHECK-LABEL: test_x86_avx512_cvttss2si:
371 ; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %ecx
372 ; CHECK-NEXT: vcvttss2si %xmm0, %eax
373 ; CHECK-NEXT: addl %ecx, %eax
375 %res0 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 8) ;
376 %res1 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 4) ;
377 %res2 = add i32 %res0, %res1
380 declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone
382 define i64 @test_x86_avx512_cvttss2si64(<4 x float> %a0) {
383 ; CHECK-LABEL: test_x86_avx512_cvttss2si64:
385 ; CHECK-NEXT: vcvttss2si %xmm0, %rcx
386 ; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %rax
387 ; CHECK-NEXT: addq %rcx, %rax
389 %res0 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 4) ;
390 %res1 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 8) ;
391 %res2 = add i64 %res0, %res1
394 declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32) nounwind readnone
396 define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) {
397 ; CHECK-LABEL: test_x86_avx512_cvttss2usi:
399 ; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %ecx
400 ; CHECK-NEXT: vcvttss2usi %xmm0, %eax
401 ; CHECK-NEXT: addl %ecx, %eax
403 %res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ;
404 %res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ;
405 %res2 = add i32 %res0, %res1
408 declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone
410 define i64 @test_x86_avx512_cvttss2usi64(<4 x float> %a0) {
411 ; CHECK-LABEL: test_x86_avx512_cvttss2usi64:
413 ; CHECK-NEXT: vcvttss2usi %xmm0, %rcx
414 ; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %rax
415 ; CHECK-NEXT: addq %rcx, %rax
417 %res0 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 4) ;
418 %res1 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 8) ;
419 %res2 = add i64 %res0, %res1
422 declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone
424 define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
425 ; CHECK-LABEL: test_x86_avx512_cvtsd2usi64:
427 ; CHECK-NEXT: vcvtsd2usi %xmm0, %rax
429 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
432 declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
434 define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
435 ; CHECK-LABEL: test_x86_vcvtph2ps_512:
437 ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0
439 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
440 ret <16 x float> %res
443 define <16 x float> @test_x86_vcvtph2ps_512_sae(<16 x i16> %a0) {
444 ; CHECK-LABEL: test_x86_vcvtph2ps_512_sae:
446 ; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0
448 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
449 ret <16 x float> %res
452 define <16 x float> @test_x86_vcvtph2ps_512_rrk(<16 x i16> %a0,<16 x float> %a1, i16 %mask) {
453 ; CHECK-LABEL: test_x86_vcvtph2ps_512_rrk:
455 ; CHECK-NEXT: kmovw %edi, %k1
456 ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm1 {%k1}
457 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
459 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> %a1, i16 %mask, i32 4)
460 ret <16 x float> %res
463 define <16 x float> @test_x86_vcvtph2ps_512_sae_rrkz(<16 x i16> %a0, i16 %mask) {
464 ; CHECK-LABEL: test_x86_vcvtph2ps_512_sae_rrkz:
466 ; CHECK-NEXT: kmovw %edi, %k1
467 ; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z}
469 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 8)
470 ret <16 x float> %res
473 define <16 x float> @test_x86_vcvtph2ps_512_rrkz(<16 x i16> %a0, i16 %mask) {
474 ; CHECK-LABEL: test_x86_vcvtph2ps_512_rrkz:
476 ; CHECK-NEXT: kmovw %edi, %k1
477 ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0 {%k1} {z}
479 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4)
480 ret <16 x float> %res
483 declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
486 define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
487 ; CHECK-LABEL: test_x86_vcvtps2ph_256:
489 ; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm0
491 %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
495 declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
497 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
498 ; CHECK-LABEL: test_x86_vbroadcast_ss_512:
500 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0
502 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
503 ret <16 x float> %res
505 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
507 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
508 ; CHECK-LABEL: test_x86_vbroadcast_sd_512:
510 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0
512 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
513 ret <8 x double> %res
515 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
517 define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0, <16 x float> %a1, i16 %mask ) {
518 ; CHECK-LABEL: test_x86_vbroadcast_ss_ps_512:
519 ; CHECK: kmovw %edi, %k1
520 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
521 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm2 {%k1} {z}
522 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0
523 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
525 %res = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> zeroinitializer, i16 -1)
526 %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> %a1, i16 %mask)
527 %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> zeroinitializer, i16 %mask)
528 %res3 = fadd <16 x float> %res, %res1
529 %res4 = fadd <16 x float> %res2, %res3
530 ret <16 x float> %res4
532 declare <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float>, <16 x float>, i16) nounwind readonly
535 define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0, <8 x double> %a1, i8 %mask ) {
536 ; CHECK-LABEL: test_x86_vbroadcast_sd_pd_512:
537 ; CHECK: kmovw %eax, %k1
538 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
539 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm2 {%k1} {z}
540 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0
541 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
543 %res = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> zeroinitializer, i8 -1)
544 %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> %a1, i8 %mask)
545 %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> zeroinitializer, i8 %mask)
546 %res3 = fadd <8 x double> %res, %res1
547 %res4 = fadd <8 x double> %res2, %res3
548 ret <8 x double> %res4
550 declare <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double>, <8 x double>, i8) nounwind readonly
552 define <16 x i32>@test_int_x86_avx512_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) {
553 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_512:
555 ; CHECK-NEXT: kmovw %edi, %k1
556 ; CHECK-NEXT: vpbroadcastd %xmm0, %zmm1 {%k1}
557 ; CHECK-NEXT: vpbroadcastd %xmm0, %zmm2 {%k1} {z}
558 ; CHECK-NEXT: vpbroadcastd %xmm0, %zmm0
559 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
560 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
562 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 -1)
563 %res1 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask)
564 %res2 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
565 %res3 = add <16 x i32> %res, %res1
566 %res4 = add <16 x i32> %res2, %res3
569 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16)
571 define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
572 ; CHECK-LABEL: test_x86_pbroadcastd_i32_512:
574 ; CHECK-NEXT: vpbroadcastd %edi, %zmm0
576 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
579 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
581 define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1, i8 %mask) {
582 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_512:
584 ; CHECK-NEXT: movzbl %dil, %eax
585 ; CHECK-NEXT: kmovw %eax, %k1
586 ; CHECK-NEXT: vpbroadcastq %xmm0, %zmm1 {%k1}
587 ; CHECK-NEXT: vpbroadcastq %xmm0, %zmm2 {%k1} {z}
588 ; CHECK-NEXT: vpbroadcastq %xmm0, %zmm0
589 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
590 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
592 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 -1)
593 %res1 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 %mask)
594 %res2 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> zeroinitializer,i8 %mask)
595 %res3 = add <8 x i64> %res, %res1
596 %res4 = add <8 x i64> %res2, %res3
599 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>, <8 x i64>, i8)
601 define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
602 ; CHECK-LABEL: test_x86_pbroadcastq_i64_512:
604 ; CHECK-NEXT: vpbroadcastq %rdi, %zmm0
606 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
609 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
611 define <16 x i32> @test_conflict_d(<16 x i32> %a) {
612 ; CHECK-LABEL: test_conflict_d:
614 ; CHECK-NEXT: vpconflictd %zmm0, %zmm0
616 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
620 declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
622 define <8 x i64> @test_conflict_q(<8 x i64> %a) {
623 ; CHECK-LABEL: test_conflict_q:
625 ; CHECK-NEXT: vpconflictq %zmm0, %zmm0
627 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
631 declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
633 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
634 ; CHECK-LABEL: test_maskz_conflict_d:
636 ; CHECK-NEXT: kmovw %edi, %k1
637 ; CHECK-NEXT: vpconflictd %zmm0, %zmm0 {%k1} {z}
639 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
643 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
644 ; CHECK-LABEL: test_mask_conflict_q:
646 ; CHECK-NEXT: movzbl %dil, %eax
647 ; CHECK-NEXT: kmovw %eax, %k1
648 ; CHECK-NEXT: vpconflictq %zmm0, %zmm1 {%k1}
649 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
651 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
655 define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
656 ; CHECK-LABEL: test_lzcnt_d:
658 ; CHECK-NEXT: vplzcntd %zmm0, %zmm0
660 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
664 declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
666 define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
667 ; CHECK-LABEL: test_lzcnt_q:
669 ; CHECK-NEXT: vplzcntq %zmm0, %zmm0
671 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
675 declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
678 define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
679 ; CHECK-LABEL: test_mask_lzcnt_d:
681 ; CHECK-NEXT: kmovw %edi, %k1
682 ; CHECK-NEXT: vplzcntd %zmm0, %zmm1 {%k1}
683 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
685 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
689 define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
690 ; CHECK-LABEL: test_mask_lzcnt_q:
692 ; CHECK-NEXT: movzbl %dil, %eax
693 ; CHECK-NEXT: kmovw %eax, %k1
694 ; CHECK-NEXT: vplzcntq %zmm0, %zmm1 {%k1}
695 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
697 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
701 define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
702 ; CHECK-LABEL: test_x86_mask_blend_ps_512:
704 ; CHECK-NEXT: kmovw %edi, %k1
705 ; CHECK-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1}
707 %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
708 ret <16 x float> %res
711 declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly
713 define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
714 ; CHECK-LABEL: test_x86_mask_blend_pd_512:
716 ; CHECK-NEXT: movzbl %dil, %eax
717 ; CHECK-NEXT: kmovw %eax, %k1
718 ; CHECK-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
720 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1]
721 ret <8 x double> %res
724 define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
725 ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop:
727 ; CHECK-NEXT: movzbl %sil, %eax
728 ; CHECK-NEXT: kmovw %eax, %k1
729 ; CHECK-NEXT: vblendmpd (%rdi), %zmm0, %zmm0 {%k1}
731 %b = load <8 x double>, <8 x double>* %ptr
732 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
733 ret <8 x double> %res
735 declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly
737 define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
738 ; CHECK-LABEL: test_x86_mask_blend_d_512:
740 ; CHECK-NEXT: kmovw %edi, %k1
741 ; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm0 {%k1}
743 %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1]
746 declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
748 define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
749 ; CHECK-LABEL: test_x86_mask_blend_q_512:
751 ; CHECK-NEXT: movzbl %dil, %eax
752 ; CHECK-NEXT: kmovw %eax, %k1
753 ; CHECK-NEXT: vpblendmq %zmm1, %zmm0, %zmm0 {%k1}
755 %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1]
758 declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
760 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
761 ; CHECK-LABEL: test_cmpps:
763 ; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
764 ; CHECK-NEXT: kmovw %k0, %eax
766 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
769 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
771 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
772 ; CHECK-LABEL: test_cmppd:
774 ; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k0
775 ; CHECK-NEXT: kmovw %k0, %eax
777 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
780 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
783 define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
784 ; CHECK-LABEL: test_vmaxpd:
786 ; CHECK-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
788 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
789 <8 x double>zeroinitializer, i8 -1, i32 4)
790 ret <8 x double> %res
792 declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
793 <8 x double>, i8, i32)
795 define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
796 ; CHECK-LABEL: test_vminpd:
798 ; CHECK-NEXT: vminpd %zmm1, %zmm0, %zmm0
800 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
801 <8 x double>zeroinitializer, i8 -1, i32 4)
802 ret <8 x double> %res
804 declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>,
805 <8 x double>, i8, i32)
807 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
809 define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
810 ; CHECK-LABEL: test_int_x86_avx512_mask_pabs_d_512:
812 ; CHECK-NEXT: kmovw %edi, %k1
813 ; CHECK-NEXT: vpabsd %zmm0, %zmm1 {%k1}
814 ; CHECK-NEXT: vpabsd %zmm0, %zmm0
815 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
817 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
818 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1)
819 %res2 = add <16 x i32> %res, %res1
823 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
825 define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
826 ; CHECK-LABEL: test_int_x86_avx512_mask_pabs_q_512:
828 ; CHECK-NEXT: movzbl %dil, %eax
829 ; CHECK-NEXT: kmovw %eax, %k1
830 ; CHECK-NEXT: vpabsq %zmm0, %zmm1 {%k1}
831 ; CHECK-NEXT: vpabsq %zmm0, %zmm0
832 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
834 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
835 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1)
836 %res2 = add <8 x i64> %res, %res1
840 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
841 ; CHECK-LABEL: test_vptestmq:
843 ; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0
844 ; CHECK-NEXT: kmovw %k0, %eax
846 %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
849 declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
851 define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) {
852 ; CHECK-LABEL: test_vptestmd:
854 ; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0
855 ; CHECK-NEXT: kmovw %k0, %eax
857 %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
860 declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
862 define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
863 ; CHECK-LABEL: test_store1:
865 ; CHECK-NEXT: kmovw %esi, %k1
866 ; CHECK-NEXT: vmovups %zmm0, (%rdi) {%k1}
868 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
872 declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
874 define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
875 ; CHECK-LABEL: test_store2:
877 ; CHECK-NEXT: kmovw %esi, %k1
878 ; CHECK-NEXT: vmovupd %zmm0, (%rdi) {%k1}
880 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
884 declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
886 define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
887 ; CHECK-LABEL: test_mask_store_aligned_ps:
889 ; CHECK-NEXT: kmovw %esi, %k1
890 ; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1}
892 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
896 declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
898 define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
899 ; CHECK-LABEL: test_mask_store_aligned_pd:
901 ; CHECK-NEXT: kmovw %esi, %k1
902 ; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1}
904 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
908 declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
910 define <16 x float> @test_maskz_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
911 ; CHECK-LABEL: test_maskz_load_aligned_ps:
913 ; CHECK-NEXT: kmovw %esi, %k1
914 ; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z}
916 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
917 ret <16 x float> %res
920 declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
922 define <8 x double> @test_maskz_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
923 ; CHECK-LABEL: test_maskz_load_aligned_pd:
925 ; CHECK-NEXT: kmovw %esi, %k1
926 ; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z}
928 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
929 ret <8 x double> %res
932 declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
934 define <16 x float> @test_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
935 ; CHECK-LABEL: test_load_aligned_ps:
937 ; CHECK-NEXT: vmovaps (%rdi), %zmm0
939 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
940 ret <16 x float> %res
943 define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
944 ; CHECK-LABEL: test_load_aligned_pd:
946 ; CHECK-NEXT: vmovapd (%rdi), %zmm0
948 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
949 ret <8 x double> %res
952 declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
954 define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
955 ; CHECK-LABEL: test_valign_q:
957 ; CHECK-NEXT: valignq $2, %zmm1, %zmm0, %zmm0
959 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> zeroinitializer, i8 -1)
963 define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
964 ; CHECK-LABEL: test_mask_valign_q:
966 ; CHECK-NEXT: movzbl %dil, %eax
967 ; CHECK-NEXT: kmovw %eax, %k1
968 ; CHECK-NEXT: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
969 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
971 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask)
975 declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
977 define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
978 ; CHECK-LABEL: test_maskz_valign_d:
980 ; CHECK-NEXT: kmovw %edi, %k1
981 ; CHECK-NEXT: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z}
983 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i32 5, <16 x i32> zeroinitializer, i16 %mask)
987 declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
989 define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
990 ; CHECK-LABEL: test_mask_store_ss:
992 ; CHECK-NEXT: kmovw %esi, %k1
993 ; CHECK-NEXT: vmovss %xmm0, (%rdi) {%k1}
995 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
999 declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
1001 define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
1002 ; CHECK-LABEL: test_pcmpeq_d:
1004 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1005 ; CHECK-NEXT: kmovw %k0, %eax
1007 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
1011 define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1012 ; CHECK-LABEL: test_mask_pcmpeq_d:
1014 ; CHECK-NEXT: kmovw %edi, %k1
1015 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1016 ; CHECK-NEXT: kmovw %k0, %eax
1018 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
1022 declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
1024 define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
1025 ; CHECK-LABEL: test_pcmpeq_q:
1027 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
1028 ; CHECK-NEXT: kmovw %k0, %eax
1030 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
1034 define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1035 ; CHECK-LABEL: test_mask_pcmpeq_q:
1037 ; CHECK-NEXT: movzbl %dil, %eax
1038 ; CHECK-NEXT: kmovw %eax, %k1
1039 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
1040 ; CHECK-NEXT: kmovw %k0, %eax
1042 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
1046 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
1048 define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
1049 ; CHECK-LABEL: test_pcmpgt_d:
1051 ; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
1052 ; CHECK-NEXT: kmovw %k0, %eax
1054 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
1058 define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1059 ; CHECK-LABEL: test_mask_pcmpgt_d:
1061 ; CHECK-NEXT: kmovw %edi, %k1
1062 ; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
1063 ; CHECK-NEXT: kmovw %k0, %eax
1065 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
1069 declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
1071 define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
1072 ; CHECK-LABEL: test_pcmpgt_q:
1074 ; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
1075 ; CHECK-NEXT: kmovw %k0, %eax
1077 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
1081 define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1082 ; CHECK-LABEL: test_mask_pcmpgt_q:
1084 ; CHECK-NEXT: movzbl %dil, %eax
1085 ; CHECK-NEXT: kmovw %eax, %k1
1086 ; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
1087 ; CHECK-NEXT: kmovw %k0, %eax
1089 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
1093 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
1095 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
1096 ; CHECK-LABEL: test_cmp_d_512:
1098 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
1099 ; CHECK-NEXT: kmovw %k0, %r8d
1100 ; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k0
1101 ; CHECK-NEXT: kmovw %k0, %r9d
1102 ; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k0
1103 ; CHECK-NEXT: kmovw %k0, %r10d
1104 ; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k0
1105 ; CHECK-NEXT: kmovw %k0, %esi
1106 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
1107 ; CHECK-NEXT: kmovw %k0, %edi
1108 ; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
1109 ; CHECK-NEXT: kmovw %k0, %eax
1110 ; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k0
1111 ; CHECK-NEXT: kmovw %k0, %ecx
1112 ; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k0
1113 ; CHECK-NEXT: kmovw %k0, %edx
1114 ; CHECK-NEXT: vmovd %r8d, %xmm0
1115 ; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0
1116 ; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0
1117 ; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0
1118 ; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0
1119 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
1120 ; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
1121 ; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
1123 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
1124 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
1125 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
1126 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
1127 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
1128 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
1129 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
1130 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
1131 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
1132 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
1133 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
1134 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
1135 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
1136 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
1137 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
1138 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
1142 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1143 ; CHECK-LABEL: test_mask_cmp_d_512:
1145 ; CHECK-NEXT: kmovw %edi, %k1
1146 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
1147 ; CHECK-NEXT: kmovw %k0, %r8d
1148 ; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k0 {%k1}
1149 ; CHECK-NEXT: kmovw %k0, %r9d
1150 ; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k0 {%k1}
1151 ; CHECK-NEXT: kmovw %k0, %r10d
1152 ; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k0 {%k1}
1153 ; CHECK-NEXT: kmovw %k0, %esi
1154 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 {%k1}
1155 ; CHECK-NEXT: kmovw %k0, %edi
1156 ; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
1157 ; CHECK-NEXT: kmovw %k0, %eax
1158 ; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k0 {%k1}
1159 ; CHECK-NEXT: kmovw %k0, %ecx
1160 ; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k0 {%k1}
1161 ; CHECK-NEXT: kmovw %k0, %edx
1162 ; CHECK-NEXT: vmovd %r8d, %xmm0
1163 ; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0
1164 ; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0
1165 ; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0
1166 ; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0
1167 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
1168 ; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
1169 ; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
1171 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
1172 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
1173 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
1174 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
1175 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
1176 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
1177 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
1178 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
1179 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
1180 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
1181 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
1182 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
1183 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
1184 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
1185 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
1186 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
1190 declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
1192 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
1193 ; CHECK-LABEL: test_ucmp_d_512:
1195 ; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k0
1196 ; CHECK-NEXT: kmovw %k0, %r8d
1197 ; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0
1198 ; CHECK-NEXT: kmovw %k0, %r9d
1199 ; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k0
1200 ; CHECK-NEXT: kmovw %k0, %r10d
1201 ; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k0
1202 ; CHECK-NEXT: kmovw %k0, %esi
1203 ; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k0
1204 ; CHECK-NEXT: kmovw %k0, %edi
1205 ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k0
1206 ; CHECK-NEXT: kmovw %k0, %eax
1207 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
1208 ; CHECK-NEXT: kmovw %k0, %ecx
1209 ; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k0
1210 ; CHECK-NEXT: kmovw %k0, %edx
1211 ; CHECK-NEXT: vmovd %r8d, %xmm0
1212 ; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0
1213 ; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0
1214 ; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0
1215 ; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0
1216 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
1217 ; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
1218 ; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
1220 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
1221 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
1222 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
1223 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
1224 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
1225 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
1226 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
1227 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
1228 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
1229 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
1230 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
1231 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
1232 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
1233 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
1234 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
1235 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
1239 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1240 ; CHECK-LABEL: test_mask_ucmp_d_512:
1242 ; CHECK-NEXT: kmovw %edi, %k1
1243 ; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k0 {%k1}
1244 ; CHECK-NEXT: kmovw %k0, %r8d
1245 ; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
1246 ; CHECK-NEXT: kmovw %k0, %r9d
1247 ; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k0 {%k1}
1248 ; CHECK-NEXT: kmovw %k0, %r10d
1249 ; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k0 {%k1}
1250 ; CHECK-NEXT: kmovw %k0, %esi
1251 ; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k0 {%k1}
1252 ; CHECK-NEXT: kmovw %k0, %edi
1253 ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k0 {%k1}
1254 ; CHECK-NEXT: kmovw %k0, %eax
1255 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 {%k1}
1256 ; CHECK-NEXT: kmovw %k0, %ecx
1257 ; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k0 {%k1}
1258 ; CHECK-NEXT: kmovw %k0, %edx
1259 ; CHECK-NEXT: vmovd %r8d, %xmm0
1260 ; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0
1261 ; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0
1262 ; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0
1263 ; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0
1264 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
1265 ; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
1266 ; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
1268 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
1269 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
1270 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
1271 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
1272 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
1273 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
1274 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
1275 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
1276 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
1277 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
1278 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
1279 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
1280 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
1281 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
1282 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
1283 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
1287 declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
1289 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
1290 ; CHECK-LABEL: test_cmp_q_512:
1292 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
1293 ; CHECK-NEXT: kmovw %k0, %r8d
1294 ; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k0
1295 ; CHECK-NEXT: kmovw %k0, %r9d
1296 ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k0
1297 ; CHECK-NEXT: kmovw %k0, %r10d
1298 ; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k0
1299 ; CHECK-NEXT: kmovw %k0, %r11d
1300 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k0
1301 ; CHECK-NEXT: kmovw %k0, %edi
1302 ; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
1303 ; CHECK-NEXT: kmovw %k0, %eax
1304 ; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k0
1305 ; CHECK-NEXT: kmovw %k0, %ecx
1306 ; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k0
1307 ; CHECK-NEXT: kmovw %k0, %edx
1308 ; CHECK-NEXT: movzbl %r8b, %esi
1309 ; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
1310 ; CHECK-NEXT: movzbl %r9b, %esi
1311 ; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0
1312 ; CHECK-NEXT: movzbl %r10b, %esi
1313 ; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
1314 ; CHECK-NEXT: movzbl %r11b, %esi
1315 ; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0
1316 ; CHECK-NEXT: movzbl %dil, %esi
1317 ; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
1318 ; CHECK-NEXT: movzbl %al, %eax
1319 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1320 ; CHECK-NEXT: movzbl %cl, %eax
1321 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1322 ; CHECK-NEXT: movzbl %dl, %eax
1323 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1325 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
1326 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
1327 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
1328 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
1329 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
1330 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
1331 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
1332 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
1333 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
1334 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
1335 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
1336 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1337 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
1338 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1339 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
1340 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1344 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1345 ; CHECK-LABEL: test_mask_cmp_q_512:
1347 ; CHECK-NEXT: movzbl %dil, %eax
1348 ; CHECK-NEXT: kmovw %eax, %k1
1349 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
1350 ; CHECK-NEXT: kmovw %k0, %r8d
1351 ; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k0 {%k1}
1352 ; CHECK-NEXT: kmovw %k0, %r9d
1353 ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k0 {%k1}
1354 ; CHECK-NEXT: kmovw %k0, %r10d
1355 ; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k0 {%k1}
1356 ; CHECK-NEXT: kmovw %k0, %r11d
1357 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 {%k1}
1358 ; CHECK-NEXT: kmovw %k0, %edi
1359 ; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
1360 ; CHECK-NEXT: kmovw %k0, %eax
1361 ; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k0 {%k1}
1362 ; CHECK-NEXT: kmovw %k0, %ecx
1363 ; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k0 {%k1}
1364 ; CHECK-NEXT: kmovw %k0, %edx
1365 ; CHECK-NEXT: movzbl %r8b, %esi
1366 ; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
1367 ; CHECK-NEXT: movzbl %r9b, %esi
1368 ; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0
1369 ; CHECK-NEXT: movzbl %r10b, %esi
1370 ; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
1371 ; CHECK-NEXT: movzbl %r11b, %esi
1372 ; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0
1373 ; CHECK-NEXT: movzbl %dil, %esi
1374 ; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
1375 ; CHECK-NEXT: movzbl %al, %eax
1376 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1377 ; CHECK-NEXT: movzbl %cl, %eax
1378 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1379 ; CHECK-NEXT: movzbl %dl, %eax
1380 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1382 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
1383 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
1384 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
1385 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
1386 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
1387 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
1388 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
1389 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
1390 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
1391 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
1392 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
1393 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1394 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
1395 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1396 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
1397 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1401 declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
1403 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
1404 ; CHECK-LABEL: test_ucmp_q_512:
1406 ; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k0
1407 ; CHECK-NEXT: kmovw %k0, %r8d
1408 ; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
1409 ; CHECK-NEXT: kmovw %k0, %r9d
1410 ; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k0
1411 ; CHECK-NEXT: kmovw %k0, %r10d
1412 ; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k0
1413 ; CHECK-NEXT: kmovw %k0, %r11d
1414 ; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k0
1415 ; CHECK-NEXT: kmovw %k0, %edi
1416 ; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k0
1417 ; CHECK-NEXT: kmovw %k0, %eax
1418 ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0
1419 ; CHECK-NEXT: kmovw %k0, %ecx
1420 ; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k0
1421 ; CHECK-NEXT: kmovw %k0, %edx
1422 ; CHECK-NEXT: movzbl %r8b, %esi
1423 ; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
1424 ; CHECK-NEXT: movzbl %r9b, %esi
1425 ; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0
1426 ; CHECK-NEXT: movzbl %r10b, %esi
1427 ; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
1428 ; CHECK-NEXT: movzbl %r11b, %esi
1429 ; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0
1430 ; CHECK-NEXT: movzbl %dil, %esi
1431 ; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
1432 ; CHECK-NEXT: movzbl %al, %eax
1433 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1434 ; CHECK-NEXT: movzbl %cl, %eax
1435 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1436 ; CHECK-NEXT: movzbl %dl, %eax
1437 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1439 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
1440 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
1441 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
1442 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
1443 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
1444 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
1445 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
1446 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
1447 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
1448 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
1449 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
1450 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1451 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
1452 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1453 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
1454 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1458 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1459 ; CHECK-LABEL: test_mask_ucmp_q_512:
1461 ; CHECK-NEXT: movzbl %dil, %eax
1462 ; CHECK-NEXT: kmovw %eax, %k1
1463 ; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k0 {%k1}
1464 ; CHECK-NEXT: kmovw %k0, %r8d
1465 ; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
1466 ; CHECK-NEXT: kmovw %k0, %r9d
1467 ; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k0 {%k1}
1468 ; CHECK-NEXT: kmovw %k0, %r10d
1469 ; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1}
1470 ; CHECK-NEXT: kmovw %k0, %r11d
1471 ; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k0 {%k1}
1472 ; CHECK-NEXT: kmovw %k0, %edi
1473 ; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1}
1474 ; CHECK-NEXT: kmovw %k0, %eax
1475 ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1}
1476 ; CHECK-NEXT: kmovw %k0, %ecx
1477 ; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k0 {%k1}
1478 ; CHECK-NEXT: kmovw %k0, %edx
1479 ; CHECK-NEXT: movzbl %r8b, %esi
1480 ; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
1481 ; CHECK-NEXT: movzbl %r9b, %esi
1482 ; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0
1483 ; CHECK-NEXT: movzbl %r10b, %esi
1484 ; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
1485 ; CHECK-NEXT: movzbl %r11b, %esi
1486 ; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0
1487 ; CHECK-NEXT: movzbl %dil, %esi
1488 ; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
1489 ; CHECK-NEXT: movzbl %al, %eax
1490 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1491 ; CHECK-NEXT: movzbl %cl, %eax
1492 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1493 ; CHECK-NEXT: movzbl %dl, %eax
1494 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1496 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
1497 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
1498 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
1499 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
1500 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
1501 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
1502 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
1503 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
1504 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
1505 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
1506 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
1507 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1508 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
1509 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1510 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
1511 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1515 declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
1517 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
1518 ; CHECK-LABEL: test_mask_vextractf32x4:
1520 ; CHECK-NEXT: kmovw %edi, %k1
1521 ; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
1523 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask)
1524 ret <4 x float> %res
1527 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i32, <4 x float>, i8)
1529 define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
1530 ; CHECK-LABEL: test_mask_vextracti64x4:
1532 ; CHECK-NEXT: kmovw %edi, %k1
1533 ; CHECK-NEXT: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
1535 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 2, <4 x i64> %b, i8 %mask)
1539 declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i64>, i8)
1541 define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
1542 ; CHECK-LABEL: test_maskz_vextracti32x4:
1544 ; CHECK-NEXT: kmovw %edi, %k1
1545 ; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
1547 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask)
1551 declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i32, <4 x i32>, i8)
1553 define <4 x double> @test_vextractf64x4(<8 x double> %a) {
1554 ; CHECK-LABEL: test_vextractf64x4:
1556 ; CHECK-NEXT: vextractf64x4 $2, %zmm0, %ymm0
1558 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i32 2, <4 x double> zeroinitializer, i8 -1)
1559 ret <4 x double> %res
1562 declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i32, <4 x double>, i8)
1564 define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
1565 ; CHECK-LABEL: test_x86_avx512_pslli_d:
1567 ; CHECK-NEXT: vpslld $7, %zmm0, %zmm0
1569 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1573 define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1574 ; CHECK-LABEL: test_x86_avx512_mask_pslli_d:
1576 ; CHECK-NEXT: kmovw %edi, %k1
1577 ; CHECK-NEXT: vpslld $7, %zmm0, %zmm1 {%k1}
1578 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1580 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1584 define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
1585 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d:
1587 ; CHECK-NEXT: kmovw %edi, %k1
1588 ; CHECK-NEXT: vpslld $7, %zmm0, %zmm0 {%k1} {z}
1590 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1594 declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1596 define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
1597 ; CHECK-LABEL: test_x86_avx512_pslli_q:
1599 ; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0
1601 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1605 define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1606 ; CHECK-LABEL: test_x86_avx512_mask_pslli_q:
1608 ; CHECK-NEXT: movzbl %dil, %eax
1609 ; CHECK-NEXT: kmovw %eax, %k1
1610 ; CHECK-NEXT: vpsllq $7, %zmm0, %zmm1 {%k1}
1611 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1613 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1617 define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
1618 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q:
1620 ; CHECK-NEXT: movzbl %dil, %eax
1621 ; CHECK-NEXT: kmovw %eax, %k1
1622 ; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
1624 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1628 declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1630 define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
1631 ; CHECK-LABEL: test_x86_avx512_psrli_d:
1633 ; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0
1635 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1639 define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1640 ; CHECK-LABEL: test_x86_avx512_mask_psrli_d:
1642 ; CHECK-NEXT: kmovw %edi, %k1
1643 ; CHECK-NEXT: vpsrld $7, %zmm0, %zmm1 {%k1}
1644 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1646 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1650 define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
1651 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d:
1653 ; CHECK-NEXT: kmovw %edi, %k1
1654 ; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
1656 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1660 declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1662 define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
1663 ; CHECK-LABEL: test_x86_avx512_psrli_q:
1665 ; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0
1667 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1671 define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1672 ; CHECK-LABEL: test_x86_avx512_mask_psrli_q:
1674 ; CHECK-NEXT: movzbl %dil, %eax
1675 ; CHECK-NEXT: kmovw %eax, %k1
1676 ; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm1 {%k1}
1677 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1679 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1683 define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
1684 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q:
1686 ; CHECK-NEXT: movzbl %dil, %eax
1687 ; CHECK-NEXT: kmovw %eax, %k1
1688 ; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
1690 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1694 declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1696 define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
1697 ; CHECK-LABEL: test_x86_avx512_psrai_d:
1699 ; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0
1701 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1705 define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1706 ; CHECK-LABEL: test_x86_avx512_mask_psrai_d:
1708 ; CHECK-NEXT: kmovw %edi, %k1
1709 ; CHECK-NEXT: vpsrad $7, %zmm0, %zmm1 {%k1}
1710 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1712 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1716 define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
1717 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d:
1719 ; CHECK-NEXT: kmovw %edi, %k1
1720 ; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
1722 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1726 declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1728 define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
1729 ; CHECK-LABEL: test_x86_avx512_psrai_q:
1731 ; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0
1733 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1737 define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1738 ; CHECK-LABEL: test_x86_avx512_mask_psrai_q:
1740 ; CHECK-NEXT: movzbl %dil, %eax
1741 ; CHECK-NEXT: kmovw %eax, %k1
1742 ; CHECK-NEXT: vpsraq $7, %zmm0, %zmm1 {%k1}
1743 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1745 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1749 define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
1750 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q:
1752 ; CHECK-NEXT: movzbl %dil, %eax
1753 ; CHECK-NEXT: kmovw %eax, %k1
1754 ; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
1756 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1760 declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1762 define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
1763 ; CHECK-LABEL: test_x86_avx512_psll_d:
1765 ; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm0
1767 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1771 define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1772 ; CHECK-LABEL: test_x86_avx512_mask_psll_d:
1774 ; CHECK-NEXT: kmovw %edi, %k1
1775 ; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm2 {%k1}
1776 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1778 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1782 define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1783 ; CHECK-LABEL: test_x86_avx512_maskz_psll_d:
1785 ; CHECK-NEXT: kmovw %edi, %k1
1786 ; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
1788 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1792 declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1794 define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
1795 ; CHECK-LABEL: test_x86_avx512_psll_q:
1797 ; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm0
1799 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1803 define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1804 ; CHECK-LABEL: test_x86_avx512_mask_psll_q:
1806 ; CHECK-NEXT: movzbl %dil, %eax
1807 ; CHECK-NEXT: kmovw %eax, %k1
1808 ; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
1809 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1811 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1815 define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1816 ; CHECK-LABEL: test_x86_avx512_maskz_psll_q:
1818 ; CHECK-NEXT: movzbl %dil, %eax
1819 ; CHECK-NEXT: kmovw %eax, %k1
1820 ; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
1822 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1826 declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1828 define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
1829 ; CHECK-LABEL: test_x86_avx512_psrl_d:
1831 ; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm0
1833 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1837 define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1838 ; CHECK-LABEL: test_x86_avx512_mask_psrl_d:
1840 ; CHECK-NEXT: kmovw %edi, %k1
1841 ; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
1842 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1844 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1848 define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1849 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d:
1851 ; CHECK-NEXT: kmovw %edi, %k1
1852 ; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
1854 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1858 declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1860 define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
1861 ; CHECK-LABEL: test_x86_avx512_psrl_q:
1863 ; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm0
1865 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1869 define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1870 ; CHECK-LABEL: test_x86_avx512_mask_psrl_q:
1872 ; CHECK-NEXT: movzbl %dil, %eax
1873 ; CHECK-NEXT: kmovw %eax, %k1
1874 ; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
1875 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1877 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1881 define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1882 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q:
1884 ; CHECK-NEXT: movzbl %dil, %eax
1885 ; CHECK-NEXT: kmovw %eax, %k1
1886 ; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
1888 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1892 declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1894 define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
1895 ; CHECK-LABEL: test_x86_avx512_psra_d:
1897 ; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm0
1899 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1903 define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1904 ; CHECK-LABEL: test_x86_avx512_mask_psra_d:
1906 ; CHECK-NEXT: kmovw %edi, %k1
1907 ; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
1908 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1910 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1914 define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1915 ; CHECK-LABEL: test_x86_avx512_maskz_psra_d:
1917 ; CHECK-NEXT: kmovw %edi, %k1
1918 ; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
1920 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1924 declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1926 define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
1927 ; CHECK-LABEL: test_x86_avx512_psra_q:
1929 ; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm0
1931 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1935 define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1936 ; CHECK-LABEL: test_x86_avx512_mask_psra_q:
1938 ; CHECK-NEXT: movzbl %dil, %eax
1939 ; CHECK-NEXT: kmovw %eax, %k1
1940 ; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
1941 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1943 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1947 define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1948 ; CHECK-LABEL: test_x86_avx512_maskz_psra_q:
1950 ; CHECK-NEXT: movzbl %dil, %eax
1951 ; CHECK-NEXT: kmovw %eax, %k1
1952 ; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
1954 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1958 declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1960 define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
1961 ; CHECK-LABEL: test_x86_avx512_psllv_d:
1963 ; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
1965 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1969 define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1970 ; CHECK-LABEL: test_x86_avx512_mask_psllv_d:
1972 ; CHECK-NEXT: kmovw %edi, %k1
1973 ; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
1974 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1976 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1980 define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1981 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d:
1983 ; CHECK-NEXT: kmovw %edi, %k1
1984 ; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1986 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1990 declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1992 define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
1993 ; CHECK-LABEL: test_x86_avx512_psllv_q:
1995 ; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm0
1997 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
2001 define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
2002 ; CHECK-LABEL: test_x86_avx512_mask_psllv_q:
2004 ; CHECK-NEXT: movzbl %dil, %eax
2005 ; CHECK-NEXT: kmovw %eax, %k1
2006 ; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
2007 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2009 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
2013 define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
2014 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q:
2016 ; CHECK-NEXT: movzbl %dil, %eax
2017 ; CHECK-NEXT: kmovw %eax, %k1
2018 ; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z}
2020 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
2024 declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
2027 define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
2028 ; CHECK-LABEL: test_x86_avx512_psrav_d:
2030 ; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm0
2032 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
2036 define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
2037 ; CHECK-LABEL: test_x86_avx512_mask_psrav_d:
2039 ; CHECK-NEXT: kmovw %edi, %k1
2040 ; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
2041 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2043 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
2047 define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
2048 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d:
2050 ; CHECK-NEXT: kmovw %edi, %k1
2051 ; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z}
2053 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
2057 declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
2059 define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
2060 ; CHECK-LABEL: test_x86_avx512_psrav_q:
2062 ; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm0
2064 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
2068 define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
2069 ; CHECK-LABEL: test_x86_avx512_mask_psrav_q:
2071 ; CHECK-NEXT: movzbl %dil, %eax
2072 ; CHECK-NEXT: kmovw %eax, %k1
2073 ; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
2074 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2076 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
2080 define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
2081 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q:
2083 ; CHECK-NEXT: movzbl %dil, %eax
2084 ; CHECK-NEXT: kmovw %eax, %k1
2085 ; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z}
2087 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
2091 declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
2093 define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
2094 ; CHECK-LABEL: test_x86_avx512_psrlv_d:
2096 ; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
2098 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
2102 define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
2103 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d:
2105 ; CHECK-NEXT: kmovw %edi, %k1
2106 ; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
2107 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2109 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
2113 define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
2114 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d:
2116 ; CHECK-NEXT: kmovw %edi, %k1
2117 ; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z}
2119 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
2123 declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
2125 define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
2126 ; CHECK-LABEL: test_x86_avx512_psrlv_q:
2128 ; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0
2130 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
2134 define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
2135 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q:
2137 ; CHECK-NEXT: movzbl %dil, %eax
2138 ; CHECK-NEXT: kmovw %eax, %k1
2139 ; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
2140 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2142 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
2146 define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
2147 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q:
2149 ; CHECK-NEXT: movzbl %dil, %eax
2150 ; CHECK-NEXT: kmovw %eax, %k1
2151 ; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z}
2153 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
2157 declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
2159 define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
2160 ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop:
2162 ; CHECK-NEXT: vpsrlvq (%rdi), %zmm0, %zmm0
2164 %b = load <8 x i64>, <8 x i64>* %ptr
2165 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2169 declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2170 declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2171 declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
2173 define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
2174 ; CHECK-LABEL: test_vsubps_rn:
2176 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
2178 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
2179 <16 x float> zeroinitializer, i16 -1, i32 0)
2180 ret <16 x float> %res
2183 define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
2184 ; CHECK-LABEL: test_vsubps_rd:
2186 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
2188 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
2189 <16 x float> zeroinitializer, i16 -1, i32 1)
2190 ret <16 x float> %res
2193 define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
2194 ; CHECK-LABEL: test_vsubps_ru:
2196 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
2198 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
2199 <16 x float> zeroinitializer, i16 -1, i32 2)
2200 ret <16 x float> %res
2203 define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
2204 ; CHECK-LABEL: test_vsubps_rz:
2206 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
2208 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
2209 <16 x float> zeroinitializer, i16 -1, i32 3)
2210 ret <16 x float> %res
2213 define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
2214 ; CHECK-LABEL: test_vmulps_rn:
2216 ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0
2218 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2219 <16 x float> zeroinitializer, i16 -1, i32 0)
2220 ret <16 x float> %res
2223 define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
2224 ; CHECK-LABEL: test_vmulps_rd:
2226 ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0
2228 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2229 <16 x float> zeroinitializer, i16 -1, i32 1)
2230 ret <16 x float> %res
2233 define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
2234 ; CHECK-LABEL: test_vmulps_ru:
2236 ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0
2238 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2239 <16 x float> zeroinitializer, i16 -1, i32 2)
2240 ret <16 x float> %res
2243 define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
2244 ; CHECK-LABEL: test_vmulps_rz:
2246 ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0
2248 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2249 <16 x float> zeroinitializer, i16 -1, i32 3)
2250 ret <16 x float> %res
2254 define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2255 ; CHECK-LABEL: test_vmulps_mask_rn:
2257 ; CHECK-NEXT: kmovw %edi, %k1
2258 ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2260 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2261 <16 x float> zeroinitializer, i16 %mask, i32 0)
2262 ret <16 x float> %res
2265 define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2266 ; CHECK-LABEL: test_vmulps_mask_rd:
2268 ; CHECK-NEXT: kmovw %edi, %k1
2269 ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2271 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2272 <16 x float> zeroinitializer, i16 %mask, i32 1)
2273 ret <16 x float> %res
2276 define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2277 ; CHECK-LABEL: test_vmulps_mask_ru:
2279 ; CHECK-NEXT: kmovw %edi, %k1
2280 ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2282 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2283 <16 x float> zeroinitializer, i16 %mask, i32 2)
2284 ret <16 x float> %res
2287 define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2288 ; CHECK-LABEL: test_vmulps_mask_rz:
2290 ; CHECK-NEXT: kmovw %edi, %k1
2291 ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2293 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2294 <16 x float> zeroinitializer, i16 %mask, i32 3)
2295 ret <16 x float> %res
2298 ;; With Passthru value
2299 define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
2300 ; CHECK-LABEL: test_vmulps_mask_passthru_rn:
2302 ; CHECK-NEXT: kmovw %edi, %k1
2303 ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2304 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2306 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2307 <16 x float> %passthru, i16 %mask, i32 0)
2308 ret <16 x float> %res
2311 define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
2312 ; CHECK-LABEL: test_vmulps_mask_passthru_rd:
2314 ; CHECK-NEXT: kmovw %edi, %k1
2315 ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2316 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2318 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2319 <16 x float> %passthru, i16 %mask, i32 1)
2320 ret <16 x float> %res
2323 define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
2324 ; CHECK-LABEL: test_vmulps_mask_passthru_ru:
2326 ; CHECK-NEXT: kmovw %edi, %k1
2327 ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2328 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2330 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2331 <16 x float> %passthru, i16 %mask, i32 2)
2332 ret <16 x float> %res
2335 define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
2336 ; CHECK-LABEL: test_vmulps_mask_passthru_rz:
2338 ; CHECK-NEXT: kmovw %edi, %k1
2339 ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2340 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2342 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
2343 <16 x float> %passthru, i16 %mask, i32 3)
2344 ret <16 x float> %res
2348 define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
2349 ; CHECK-LABEL: test_vmulpd_mask_rn:
2351 ; CHECK-NEXT: movzbl %dil, %eax
2352 ; CHECK-NEXT: kmovw %eax, %k1
2353 ; CHECK-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2355 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
2356 <8 x double> zeroinitializer, i8 %mask, i32 0)
2357 ret <8 x double> %res
2360 define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
2361 ; CHECK-LABEL: test_vmulpd_mask_rd:
2363 ; CHECK-NEXT: movzbl %dil, %eax
2364 ; CHECK-NEXT: kmovw %eax, %k1
2365 ; CHECK-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2367 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
2368 <8 x double> zeroinitializer, i8 %mask, i32 1)
2369 ret <8 x double> %res
2372 define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
2373 ; CHECK-LABEL: test_vmulpd_mask_ru:
2375 ; CHECK-NEXT: movzbl %dil, %eax
2376 ; CHECK-NEXT: kmovw %eax, %k1
2377 ; CHECK-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2379 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
2380 <8 x double> zeroinitializer, i8 %mask, i32 2)
2381 ret <8 x double> %res
2384 define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
2385 ; CHECK-LABEL: test_vmulpd_mask_rz:
2387 ; CHECK-NEXT: movzbl %dil, %eax
2388 ; CHECK-NEXT: kmovw %eax, %k1
2389 ; CHECK-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2391 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
2392 <8 x double> zeroinitializer, i8 %mask, i32 3)
2393 ret <8 x double> %res
2396 define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
2397 ; CHECK-LABEL: test_xor_epi32:
2399 ; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm0
2401 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
2402 ret < 16 x i32> %res
2405 define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2406 ; CHECK-LABEL: test_mask_xor_epi32:
2408 ; CHECK-NEXT: kmovw %edi, %k1
2409 ; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm2 {%k1}
2410 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2412 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2413 ret < 16 x i32> %res
2416 declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2418 define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
2419 ; CHECK-LABEL: test_or_epi32:
2421 ; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0
2423 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
2424 ret < 16 x i32> %res
2427 define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2428 ; CHECK-LABEL: test_mask_or_epi32:
2430 ; CHECK-NEXT: kmovw %edi, %k1
2431 ; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm2 {%k1}
2432 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2434 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2435 ret < 16 x i32> %res
2438 declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2440 define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
2441 ; CHECK-LABEL: test_and_epi32:
2443 ; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm0
2445 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
2446 ret < 16 x i32> %res
2449 define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2450 ; CHECK-LABEL: test_mask_and_epi32:
2452 ; CHECK-NEXT: kmovw %edi, %k1
2453 ; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1}
2454 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2456 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2457 ret < 16 x i32> %res
2460 declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2462 define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
2463 ; CHECK-LABEL: test_xor_epi64:
2465 ; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0
2467 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
2471 define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2472 ; CHECK-LABEL: test_mask_xor_epi64:
2474 ; CHECK-NEXT: movzbl %dil, %eax
2475 ; CHECK-NEXT: kmovw %eax, %k1
2476 ; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm2 {%k1}
2477 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2479 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2483 declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2485 define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
2486 ; CHECK-LABEL: test_or_epi64:
2488 ; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0
2490 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
2494 define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2495 ; CHECK-LABEL: test_mask_or_epi64:
2497 ; CHECK-NEXT: movzbl %dil, %eax
2498 ; CHECK-NEXT: kmovw %eax, %k1
2499 ; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm2 {%k1}
2500 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2502 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2506 declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2508 define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
2509 ; CHECK-LABEL: test_and_epi64:
2511 ; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0
2513 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
2517 define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2518 ; CHECK-LABEL: test_mask_and_epi64:
2520 ; CHECK-NEXT: movzbl %dil, %eax
2521 ; CHECK-NEXT: kmovw %eax, %k1
2522 ; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1}
2523 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2525 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2529 declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2532 define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
2533 ; CHECK-LABEL: test_mask_add_epi32_rr:
2535 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
2537 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2538 ret < 16 x i32> %res
2541 define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2542 ; CHECK-LABEL: test_mask_add_epi32_rrk:
2544 ; CHECK-NEXT: kmovw %edi, %k1
2545 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm2 {%k1}
2546 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2548 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2549 ret < 16 x i32> %res
2552 define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2553 ; CHECK-LABEL: test_mask_add_epi32_rrkz:
2555 ; CHECK-NEXT: kmovw %edi, %k1
2556 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
2558 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2559 ret < 16 x i32> %res
2562 define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2563 ; CHECK-LABEL: test_mask_add_epi32_rm:
2565 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0
2567 %b = load <16 x i32>, <16 x i32>* %ptr_b
2568 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2569 ret < 16 x i32> %res
2572 define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2573 ; CHECK-LABEL: test_mask_add_epi32_rmk:
2575 ; CHECK-NEXT: kmovw %esi, %k1
2576 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm1 {%k1}
2577 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2579 %b = load <16 x i32>, <16 x i32>* %ptr_b
2580 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2581 ret < 16 x i32> %res
2584 define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2585 ; CHECK-LABEL: test_mask_add_epi32_rmkz:
2587 ; CHECK-NEXT: kmovw %esi, %k1
2588 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
2590 %b = load <16 x i32>, <16 x i32>* %ptr_b
2591 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2592 ret < 16 x i32> %res
2595 define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
2596 ; CHECK-LABEL: test_mask_add_epi32_rmb:
2598 ; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0
2600 %q = load i32, i32* %ptr_b
2601 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2602 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2603 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2604 ret < 16 x i32> %res
2607 define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2608 ; CHECK-LABEL: test_mask_add_epi32_rmbk:
2610 ; CHECK-NEXT: kmovw %esi, %k1
2611 ; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1}
2612 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2614 %q = load i32, i32* %ptr_b
2615 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2616 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2617 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2618 ret < 16 x i32> %res
2621 define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2622 ; CHECK-LABEL: test_mask_add_epi32_rmbkz:
2624 ; CHECK-NEXT: kmovw %esi, %k1
2625 ; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
2627 %q = load i32, i32* %ptr_b
2628 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2629 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2630 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2631 ret < 16 x i32> %res
2634 declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2636 define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
2637 ; CHECK-LABEL: test_mask_sub_epi32_rr:
2639 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0
2641 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2642 ret < 16 x i32> %res
2645 define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2646 ; CHECK-LABEL: test_mask_sub_epi32_rrk:
2648 ; CHECK-NEXT: kmovw %edi, %k1
2649 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1}
2650 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2652 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2653 ret < 16 x i32> %res
2656 define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2657 ; CHECK-LABEL: test_mask_sub_epi32_rrkz:
2659 ; CHECK-NEXT: kmovw %edi, %k1
2660 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z}
2662 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2663 ret < 16 x i32> %res
2666 define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2667 ; CHECK-LABEL: test_mask_sub_epi32_rm:
2669 ; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm0
2671 %b = load <16 x i32>, <16 x i32>* %ptr_b
2672 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2673 ret < 16 x i32> %res
2676 define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2677 ; CHECK-LABEL: test_mask_sub_epi32_rmk:
2679 ; CHECK-NEXT: kmovw %esi, %k1
2680 ; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm1 {%k1}
2681 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2683 %b = load <16 x i32>, <16 x i32>* %ptr_b
2684 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2685 ret < 16 x i32> %res
2688 define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2689 ; CHECK-LABEL: test_mask_sub_epi32_rmkz:
2691 ; CHECK-NEXT: kmovw %esi, %k1
2692 ; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z}
2694 %b = load <16 x i32>, <16 x i32>* %ptr_b
2695 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2696 ret < 16 x i32> %res
2699 define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
2700 ; CHECK-LABEL: test_mask_sub_epi32_rmb:
2702 ; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0
2704 %q = load i32, i32* %ptr_b
2705 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2706 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2707 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2708 ret < 16 x i32> %res
2711 define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2712 ; CHECK-LABEL: test_mask_sub_epi32_rmbk:
2714 ; CHECK-NEXT: kmovw %esi, %k1
2715 ; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1}
2716 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2718 %q = load i32, i32* %ptr_b
2719 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2720 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2721 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2722 ret < 16 x i32> %res
2725 define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2726 ; CHECK-LABEL: test_mask_sub_epi32_rmbkz:
2728 ; CHECK-NEXT: kmovw %esi, %k1
2729 ; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
2731 %q = load i32, i32* %ptr_b
2732 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2733 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2734 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2735 ret < 16 x i32> %res
2738 declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2740 define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
2741 ; CHECK-LABEL: test_mask_add_epi64_rr:
2743 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
2745 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2749 define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2750 ; CHECK-LABEL: test_mask_add_epi64_rrk:
2752 ; CHECK-NEXT: movzbl %dil, %eax
2753 ; CHECK-NEXT: kmovw %eax, %k1
2754 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm2 {%k1}
2755 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2757 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2761 define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
2762 ; CHECK-LABEL: test_mask_add_epi64_rrkz:
2764 ; CHECK-NEXT: movzbl %dil, %eax
2765 ; CHECK-NEXT: kmovw %eax, %k1
2766 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z}
2768 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2772 define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
2773 ; CHECK-LABEL: test_mask_add_epi64_rm:
2775 ; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0
2777 %b = load <8 x i64>, <8 x i64>* %ptr_b
2778 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2782 define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2783 ; CHECK-LABEL: test_mask_add_epi64_rmk:
2785 ; CHECK-NEXT: movzbl %sil, %eax
2786 ; CHECK-NEXT: kmovw %eax, %k1
2787 ; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm1 {%k1}
2788 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2790 %b = load <8 x i64>, <8 x i64>* %ptr_b
2791 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2795 define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
2796 ; CHECK-LABEL: test_mask_add_epi64_rmkz:
2798 ; CHECK-NEXT: movzbl %sil, %eax
2799 ; CHECK-NEXT: kmovw %eax, %k1
2800 ; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z}
2802 %b = load <8 x i64>, <8 x i64>* %ptr_b
2803 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2807 define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
2808 ; CHECK-LABEL: test_mask_add_epi64_rmb:
2810 ; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0
2812 %q = load i64, i64* %ptr_b
2813 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2814 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2815 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2819 define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2820 ; CHECK-LABEL: test_mask_add_epi64_rmbk:
2822 ; CHECK-NEXT: movzbl %sil, %eax
2823 ; CHECK-NEXT: kmovw %eax, %k1
2824 ; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
2825 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2827 %q = load i64, i64* %ptr_b
2828 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2829 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2830 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2834 define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
2835 ; CHECK-LABEL: test_mask_add_epi64_rmbkz:
2837 ; CHECK-NEXT: movzbl %sil, %eax
2838 ; CHECK-NEXT: kmovw %eax, %k1
2839 ; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
2841 %q = load i64, i64* %ptr_b
2842 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2843 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2844 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2848 declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2850 define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
2851 ; CHECK-LABEL: test_mask_sub_epi64_rr:
2853 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0
2855 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2859 define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2860 ; CHECK-LABEL: test_mask_sub_epi64_rrk:
2862 ; CHECK-NEXT: movzbl %dil, %eax
2863 ; CHECK-NEXT: kmovw %eax, %k1
2864 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm2 {%k1}
2865 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2867 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2871 define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
2872 ; CHECK-LABEL: test_mask_sub_epi64_rrkz:
2874 ; CHECK-NEXT: movzbl %dil, %eax
2875 ; CHECK-NEXT: kmovw %eax, %k1
2876 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z}
2878 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2882 define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
2883 ; CHECK-LABEL: test_mask_sub_epi64_rm:
2885 ; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm0
2887 %b = load <8 x i64>, <8 x i64>* %ptr_b
2888 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2892 define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2893 ; CHECK-LABEL: test_mask_sub_epi64_rmk:
2895 ; CHECK-NEXT: movzbl %sil, %eax
2896 ; CHECK-NEXT: kmovw %eax, %k1
2897 ; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm1 {%k1}
2898 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2900 %b = load <8 x i64>, <8 x i64>* %ptr_b
2901 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2905 define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
2906 ; CHECK-LABEL: test_mask_sub_epi64_rmkz:
2908 ; CHECK-NEXT: movzbl %sil, %eax
2909 ; CHECK-NEXT: kmovw %eax, %k1
2910 ; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z}
2912 %b = load <8 x i64>, <8 x i64>* %ptr_b
2913 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2917 define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
2918 ; CHECK-LABEL: test_mask_sub_epi64_rmb:
2920 ; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0
2922 %q = load i64, i64* %ptr_b
2923 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2924 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2925 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2929 define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2930 ; CHECK-LABEL: test_mask_sub_epi64_rmbk:
2932 ; CHECK-NEXT: movzbl %sil, %eax
2933 ; CHECK-NEXT: kmovw %eax, %k1
2934 ; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
2935 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2937 %q = load i64, i64* %ptr_b
2938 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2939 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2940 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2944 define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
2945 ; CHECK-LABEL: test_mask_sub_epi64_rmbkz:
2947 ; CHECK-NEXT: movzbl %sil, %eax
2948 ; CHECK-NEXT: kmovw %eax, %k1
2949 ; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
2951 %q = load i64, i64* %ptr_b
2952 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2953 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2954 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2958 declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2960 define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
2961 ; CHECK-LABEL: test_mask_mul_epi32_rr:
2963 ; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0
2965 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2969 define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
2970 ; CHECK-LABEL: test_mask_mul_epi32_rrk:
2972 ; CHECK-NEXT: movzbl %dil, %eax
2973 ; CHECK-NEXT: kmovw %eax, %k1
2974 ; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1}
2975 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2977 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2981 define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
2982 ; CHECK-LABEL: test_mask_mul_epi32_rrkz:
2984 ; CHECK-NEXT: movzbl %dil, %eax
2985 ; CHECK-NEXT: kmovw %eax, %k1
2986 ; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z}
2988 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2992 define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2993 ; CHECK-LABEL: test_mask_mul_epi32_rm:
2995 ; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm0
2997 %b = load <16 x i32>, <16 x i32>* %ptr_b
2998 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
3002 define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
3003 ; CHECK-LABEL: test_mask_mul_epi32_rmk:
3005 ; CHECK-NEXT: movzbl %sil, %eax
3006 ; CHECK-NEXT: kmovw %eax, %k1
3007 ; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm1 {%k1}
3008 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
3010 %b = load <16 x i32>, <16 x i32>* %ptr_b
3011 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
3015 define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
3016 ; CHECK-LABEL: test_mask_mul_epi32_rmkz:
3018 ; CHECK-NEXT: movzbl %sil, %eax
3019 ; CHECK-NEXT: kmovw %eax, %k1
3020 ; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z}
3022 %b = load <16 x i32>, <16 x i32>* %ptr_b
3023 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
3027 define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
3028 ; CHECK-LABEL: test_mask_mul_epi32_rmb:
3030 ; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0
3032 %q = load i64, i64* %ptr_b
3033 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
3034 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
3035 %b = bitcast <8 x i64> %b64 to <16 x i32>
3036 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
3040 define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
3041 ; CHECK-LABEL: test_mask_mul_epi32_rmbk:
3043 ; CHECK-NEXT: movzbl %sil, %eax
3044 ; CHECK-NEXT: kmovw %eax, %k1
3045 ; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
3046 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
3048 %q = load i64, i64* %ptr_b
3049 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
3050 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
3051 %b = bitcast <8 x i64> %b64 to <16 x i32>
3052 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
3056 define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
3057 ; CHECK-LABEL: test_mask_mul_epi32_rmbkz:
3059 ; CHECK-NEXT: movzbl %sil, %eax
3060 ; CHECK-NEXT: kmovw %eax, %k1
3061 ; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
3063 %q = load i64, i64* %ptr_b
3064 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
3065 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
3066 %b = bitcast <8 x i64> %b64 to <16 x i32>
3067 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
3071 declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
3073 define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
3074 ; CHECK-LABEL: test_mask_mul_epu32_rr:
3076 ; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
3078 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
3082 define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
3083 ; CHECK-LABEL: test_mask_mul_epu32_rrk:
3085 ; CHECK-NEXT: movzbl %dil, %eax
3086 ; CHECK-NEXT: kmovw %eax, %k1
3087 ; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1}
3088 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3090 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
3094 define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
3095 ; CHECK-LABEL: test_mask_mul_epu32_rrkz:
3097 ; CHECK-NEXT: movzbl %dil, %eax
3098 ; CHECK-NEXT: kmovw %eax, %k1
3099 ; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z}
3101 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
3105 define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
3106 ; CHECK-LABEL: test_mask_mul_epu32_rm:
3108 ; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm0
3110 %b = load <16 x i32>, <16 x i32>* %ptr_b
3111 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
3115 define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
3116 ; CHECK-LABEL: test_mask_mul_epu32_rmk:
3118 ; CHECK-NEXT: movzbl %sil, %eax
3119 ; CHECK-NEXT: kmovw %eax, %k1
3120 ; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm1 {%k1}
3121 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
3123 %b = load <16 x i32>, <16 x i32>* %ptr_b
3124 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
3128 define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
3129 ; CHECK-LABEL: test_mask_mul_epu32_rmkz:
3131 ; CHECK-NEXT: movzbl %sil, %eax
3132 ; CHECK-NEXT: kmovw %eax, %k1
3133 ; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z}
3135 %b = load <16 x i32>, <16 x i32>* %ptr_b
3136 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
3140 define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
3141 ; CHECK-LABEL: test_mask_mul_epu32_rmb:
3143 ; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0
3145 %q = load i64, i64* %ptr_b
3146 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
3147 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
3148 %b = bitcast <8 x i64> %b64 to <16 x i32>
3149 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
3153 define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
3154 ; CHECK-LABEL: test_mask_mul_epu32_rmbk:
3156 ; CHECK-NEXT: movzbl %sil, %eax
3157 ; CHECK-NEXT: kmovw %eax, %k1
3158 ; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
3159 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
3161 %q = load i64, i64* %ptr_b
3162 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
3163 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
3164 %b = bitcast <8 x i64> %b64 to <16 x i32>
3165 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
3169 define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
3170 ; CHECK-LABEL: test_mask_mul_epu32_rmbkz:
3172 ; CHECK-NEXT: movzbl %sil, %eax
3173 ; CHECK-NEXT: kmovw %eax, %k1
3174 ; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
3176 %q = load i64, i64* %ptr_b
3177 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
3178 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
3179 %b = bitcast <8 x i64> %b64 to <16 x i32>
3180 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
3184 declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
3186 define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
3187 ; CHECK-LABEL: test_mask_mullo_epi32_rr_512:
3189 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0
3191 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
3195 define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
3196 ; CHECK-LABEL: test_mask_mullo_epi32_rrk_512:
3198 ; CHECK-NEXT: kmovw %edi, %k1
3199 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm2 {%k1}
3200 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3202 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
3203 ret < 16 x i32> %res
3206 define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
3207 ; CHECK-LABEL: test_mask_mullo_epi32_rrkz_512:
3209 ; CHECK-NEXT: kmovw %edi, %k1
3210 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z}
3212 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
3213 ret < 16 x i32> %res
3216 define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
3217 ; CHECK-LABEL: test_mask_mullo_epi32_rm_512:
3219 ; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm0
3221 %b = load <16 x i32>, <16 x i32>* %ptr_b
3222 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
3223 ret < 16 x i32> %res
3226 define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
3227 ; CHECK-LABEL: test_mask_mullo_epi32_rmk_512:
3229 ; CHECK-NEXT: kmovw %esi, %k1
3230 ; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm1 {%k1}
3231 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
3233 %b = load <16 x i32>, <16 x i32>* %ptr_b
3234 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
3235 ret < 16 x i32> %res
3238 define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
3239 ; CHECK-LABEL: test_mask_mullo_epi32_rmkz_512:
3241 ; CHECK-NEXT: kmovw %esi, %k1
3242 ; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z}
3244 %b = load <16 x i32>, <16 x i32>* %ptr_b
3245 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
3246 ret < 16 x i32> %res
3249 define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
3250 ; CHECK-LABEL: test_mask_mullo_epi32_rmb_512:
3252 ; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0
3254 %q = load i32, i32* %ptr_b
3255 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
3256 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
3257 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
3258 ret < 16 x i32> %res
3261 define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
3262 ; CHECK-LABEL: test_mask_mullo_epi32_rmbk_512:
3264 ; CHECK-NEXT: kmovw %esi, %k1
3265 ; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1}
3266 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
3268 %q = load i32, i32* %ptr_b
3269 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
3270 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
3271 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
3272 ret < 16 x i32> %res
3275 define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
3276 ; CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512:
3278 ; CHECK-NEXT: kmovw %esi, %k1
3279 ; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
3281 %q = load i32, i32* %ptr_b
3282 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
3283 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
3284 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
3285 ret < 16 x i32> %res
3288 declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3290 define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3291 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae:
3293 ; CHECK-NEXT: kmovw %edi, %k1
3294 ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3296 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
3297 ret <16 x float> %res
3299 define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3300 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae:
3302 ; CHECK-NEXT: kmovw %edi, %k1
3303 ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3305 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
3306 ret <16 x float> %res
3308 define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3309 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae:
3311 ; CHECK-NEXT: kmovw %edi, %k1
3312 ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3314 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
3315 ret <16 x float> %res
3318 define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3319 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae:
3321 ; CHECK-NEXT: kmovw %edi, %k1
3322 ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3324 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
3325 ret <16 x float> %res
3329 define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3330 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_current:
3332 ; CHECK-NEXT: kmovw %edi, %k1
3333 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z}
3335 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
3336 ret <16 x float> %res
3339 define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3340 ; CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae:
3342 ; CHECK-NEXT: kmovw %edi, %k1
3343 ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3344 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3346 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
3347 ret <16 x float> %res
3349 define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3350 ; CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae:
3352 ; CHECK-NEXT: kmovw %edi, %k1
3353 ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3354 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3356 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
3357 ret <16 x float> %res
3359 define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3360 ; CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae:
3362 ; CHECK-NEXT: kmovw %edi, %k1
3363 ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3364 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3366 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
3367 ret <16 x float> %res
3370 define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3371 ; CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae:
3373 ; CHECK-NEXT: kmovw %edi, %k1
3374 ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3375 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3377 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
3378 ret <16 x float> %res
3382 define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3383 ; CHECK-LABEL: test_mm512_mask_add_round_ps_current:
3385 ; CHECK-NEXT: kmovw %edi, %k1
3386 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1}
3387 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3389 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
3390 ret <16 x float> %res
3394 define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3395 ; CHECK-LABEL: test_mm512_add_round_ps_rn_sae:
3397 ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
3399 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
3400 ret <16 x float> %res
3402 define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3403 ; CHECK-LABEL: test_mm512_add_round_ps_rd_sae:
3405 ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
3407 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
3408 ret <16 x float> %res
3410 define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3411 ; CHECK-LABEL: test_mm512_add_round_ps_ru_sae:
3413 ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
3415 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
3416 ret <16 x float> %res
3419 define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3420 ; CHECK-LABEL: test_mm512_add_round_ps_rz_sae:
3422 ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
3424 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
3425 ret <16 x float> %res
3428 define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3429 ; CHECK-LABEL: test_mm512_add_round_ps_current:
3431 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
3433 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
3434 ret <16 x float> %res
3436 declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
3438 define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3439 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae:
3441 ; CHECK-NEXT: kmovw %edi, %k1
3442 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3443 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3445 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
3446 ret <16 x float> %res
3448 define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3449 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae:
3451 ; CHECK-NEXT: kmovw %edi, %k1
3452 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3453 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3455 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
3456 ret <16 x float> %res
3458 define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3459 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae:
3461 ; CHECK-NEXT: kmovw %edi, %k1
3462 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3463 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3465 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
3466 ret <16 x float> %res
3469 define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3470 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae:
3472 ; CHECK-NEXT: kmovw %edi, %k1
3473 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3474 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3476 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
3477 ret <16 x float> %res
3481 define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3482 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_current:
3484 ; CHECK-NEXT: kmovw %edi, %k1
3485 ; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1}
3486 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3488 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
3489 ret <16 x float> %res
3492 define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3493 ; CHECK-LABEL: test_mm512_sub_round_ps_rn_sae:
3495 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
3497 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
3498 ret <16 x float> %res
3500 define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3501 ; CHECK-LABEL: test_mm512_sub_round_ps_rd_sae:
3503 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
3505 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
3506 ret <16 x float> %res
3508 define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3509 ; CHECK-LABEL: test_mm512_sub_round_ps_ru_sae:
3511 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
3513 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
3514 ret <16 x float> %res
3517 define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3518 ; CHECK-LABEL: test_mm512_sub_round_ps_rz_sae:
3520 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
3522 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
3523 ret <16 x float> %res
3526 define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3527 ; CHECK-LABEL: test_mm512_sub_round_ps_current:
3529 ; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm0
3531 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
3532 ret <16 x float> %res
3535 define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3536 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae:
3538 ; CHECK-NEXT: kmovw %edi, %k1
3539 ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3541 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
3542 ret <16 x float> %res
3544 define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3545 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae:
3547 ; CHECK-NEXT: kmovw %edi, %k1
3548 ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3550 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
3551 ret <16 x float> %res
3553 define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3554 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae:
3556 ; CHECK-NEXT: kmovw %edi, %k1
3557 ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3559 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
3560 ret <16 x float> %res
3563 define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3564 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae:
3566 ; CHECK-NEXT: kmovw %edi, %k1
3567 ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3569 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
3570 ret <16 x float> %res
3574 define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3575 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_current:
3577 ; CHECK-NEXT: kmovw %edi, %k1
3578 ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z}
3580 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
3581 ret <16 x float> %res
3584 define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3585 ; CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae:
3587 ; CHECK-NEXT: kmovw %edi, %k1
3588 ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3589 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3591 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
3592 ret <16 x float> %res
3594 define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3595 ; CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae:
3597 ; CHECK-NEXT: kmovw %edi, %k1
3598 ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3599 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3601 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
3602 ret <16 x float> %res
3604 define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3605 ; CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae:
3607 ; CHECK-NEXT: kmovw %edi, %k1
3608 ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3609 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3611 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
3612 ret <16 x float> %res
3615 define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3616 ; CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae:
3618 ; CHECK-NEXT: kmovw %edi, %k1
3619 ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
3620 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3622 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
3623 ret <16 x float> %res
3627 define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3628 ; CHECK-LABEL: test_mm512_mask_div_round_ps_current:
3630 ; CHECK-NEXT: kmovw %edi, %k1
3631 ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm2 {%k1}
3632 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3634 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
3635 ret <16 x float> %res
3639 define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3640 ; CHECK-LABEL: test_mm512_div_round_ps_rn_sae:
3642 ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
3644 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
3645 ret <16 x float> %res
3647 define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3648 ; CHECK-LABEL: test_mm512_div_round_ps_rd_sae:
3650 ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
3652 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
3653 ret <16 x float> %res
3655 define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3656 ; CHECK-LABEL: test_mm512_div_round_ps_ru_sae:
3658 ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
3660 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
3661 ret <16 x float> %res
3664 define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3665 ; CHECK-LABEL: test_mm512_div_round_ps_rz_sae:
3667 ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
3669 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
3670 ret <16 x float> %res
3673 define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3674 ; CHECK-LABEL: test_mm512_div_round_ps_current:
3676 ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0
3678 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
3679 ret <16 x float> %res
3681 declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
3683 define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3684 ; CHECK-LABEL: test_mm512_maskz_min_round_ps_sae:
3686 ; CHECK-NEXT: kmovw %edi, %k1
3687 ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3689 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
3690 ret <16 x float> %res
3693 define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3694 ; CHECK-LABEL: test_mm512_maskz_min_round_ps_current:
3696 ; CHECK-NEXT: kmovw %edi, %k1
3697 ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
3699 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
3700 ret <16 x float> %res
3703 define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3704 ; CHECK-LABEL: test_mm512_mask_min_round_ps_sae:
3706 ; CHECK-NEXT: kmovw %edi, %k1
3707 ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
3708 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3710 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
3711 ret <16 x float> %res
3714 define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3715 ; CHECK-LABEL: test_mm512_mask_min_round_ps_current:
3717 ; CHECK-NEXT: kmovw %edi, %k1
3718 ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm2 {%k1}
3719 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3721 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
3722 ret <16 x float> %res
3725 define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3726 ; CHECK-LABEL: test_mm512_min_round_ps_sae:
3728 ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0
3730 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
3731 ret <16 x float> %res
3734 define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3735 ; CHECK-LABEL: test_mm512_min_round_ps_current:
3737 ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0
3739 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
3740 ret <16 x float> %res
3742 declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
3744 define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3745 ; CHECK-LABEL: test_mm512_maskz_max_round_ps_sae:
3747 ; CHECK-NEXT: kmovw %edi, %k1
3748 ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
3750 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
3751 ret <16 x float> %res
3754 define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3755 ; CHECK-LABEL: test_mm512_maskz_max_round_ps_current:
3757 ; CHECK-NEXT: kmovw %edi, %k1
3758 ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
3760 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
3761 ret <16 x float> %res
3764 define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3765 ; CHECK-LABEL: test_mm512_mask_max_round_ps_sae:
3767 ; CHECK-NEXT: kmovw %edi, %k1
3768 ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
3769 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3771 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
3772 ret <16 x float> %res
3775 define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
3776 ; CHECK-LABEL: test_mm512_mask_max_round_ps_current:
3778 ; CHECK-NEXT: kmovw %edi, %k1
3779 ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm2 {%k1}
3780 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3782 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
3783 ret <16 x float> %res
3786 define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3787 ; CHECK-LABEL: test_mm512_max_round_ps_sae:
3789 ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0
3791 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
3792 ret <16 x float> %res
3795 define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
3796 ; CHECK-LABEL: test_mm512_max_round_ps_current:
3798 ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0
3800 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
3801 ret <16 x float> %res
3803 declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
3805 declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
3807 define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
3808 ; CHECK-LABEL: test_mask_add_ss_rn:
3810 ; CHECK-NEXT: andl $1, %edi
3811 ; CHECK-NEXT: kmovw %edi, %k1
3812 ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3813 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3815 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0)
3816 ret <4 x float> %res
3819 define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
3820 ; CHECK-LABEL: test_mask_add_ss_rd:
3822 ; CHECK-NEXT: andl $1, %edi
3823 ; CHECK-NEXT: kmovw %edi, %k1
3824 ; CHECK-NEXT: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3825 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3827 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
3828 ret <4 x float> %res
3831 define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
3832 ; CHECK-LABEL: test_mask_add_ss_ru:
3834 ; CHECK-NEXT: andl $1, %edi
3835 ; CHECK-NEXT: kmovw %edi, %k1
3836 ; CHECK-NEXT: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3837 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3839 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2)
3840 ret <4 x float> %res
3843 define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
3844 ; CHECK-LABEL: test_mask_add_ss_rz:
3846 ; CHECK-NEXT: andl $1, %edi
3847 ; CHECK-NEXT: kmovw %edi, %k1
3848 ; CHECK-NEXT: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3849 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3851 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3)
3852 ret <4 x float> %res
3855 define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
3856 ; CHECK-LABEL: test_mask_add_ss_current:
3858 ; CHECK-NEXT: andl $1, %edi
3859 ; CHECK-NEXT: kmovw %edi, %k1
3860 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1}
3861 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3863 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
3864 ret <4 x float> %res
3867 define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
3868 ; CHECK-LABEL: test_maskz_add_ss_rn:
3870 ; CHECK-NEXT: andl $1, %edi
3871 ; CHECK-NEXT: kmovw %edi, %k1
3872 ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
3874 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0)
3875 ret <4 x float> %res
3878 define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) {
3879 ; CHECK-LABEL: test_add_ss_rn:
3881 ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0
3883 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0)
3884 ret <4 x float> %res
3887 declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
3889 define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
3890 ; CHECK-LABEL: test_mask_add_sd_rn:
3892 ; CHECK-NEXT: andl $1, %edi
3893 ; CHECK-NEXT: kmovw %edi, %k1
3894 ; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3895 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3897 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0)
3898 ret <2 x double> %res
3901 define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
3902 ; CHECK-LABEL: test_mask_add_sd_rd:
3904 ; CHECK-NEXT: andl $1, %edi
3905 ; CHECK-NEXT: kmovw %edi, %k1
3906 ; CHECK-NEXT: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3907 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3909 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
3910 ret <2 x double> %res
3913 define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
3914 ; CHECK-LABEL: test_mask_add_sd_ru:
3916 ; CHECK-NEXT: andl $1, %edi
3917 ; CHECK-NEXT: kmovw %edi, %k1
3918 ; CHECK-NEXT: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3919 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3921 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2)
3922 ret <2 x double> %res
3925 define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
3926 ; CHECK-LABEL: test_mask_add_sd_rz:
3928 ; CHECK-NEXT: andl $1, %edi
3929 ; CHECK-NEXT: kmovw %edi, %k1
3930 ; CHECK-NEXT: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3931 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3933 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3)
3934 ret <2 x double> %res
3937 define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
3938 ; CHECK-LABEL: test_mask_add_sd_current:
3940 ; CHECK-NEXT: andl $1, %edi
3941 ; CHECK-NEXT: kmovw %edi, %k1
3942 ; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
3943 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3945 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
3946 ret <2 x double> %res
3949 define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
3950 ; CHECK-LABEL: test_maskz_add_sd_rn:
3952 ; CHECK-NEXT: andl $1, %edi
3953 ; CHECK-NEXT: kmovw %edi, %k1
3954 ; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
3956 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 0)
3957 ret <2 x double> %res
3960 define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) {
3961 ; CHECK-LABEL: test_add_sd_rn:
3963 ; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0
3965 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 0)
3966 ret <2 x double> %res
3969 declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
3971 define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
3972 ; CHECK-LABEL: test_mask_max_ss_sae:
3974 ; CHECK-NEXT: andl $1, %edi
3975 ; CHECK-NEXT: kmovw %edi, %k1
3976 ; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
3977 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3979 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
3980 ret <4 x float> %res
3983 define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
3984 ; CHECK-LABEL: test_maskz_max_ss_sae:
3986 ; CHECK-NEXT: andl $1, %edi
3987 ; CHECK-NEXT: kmovw %edi, %k1
3988 ; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
3990 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
3991 ret <4 x float> %res
3994 define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) {
3995 ; CHECK-LABEL: test_max_ss_sae:
3997 ; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm0
3999 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
4000 ret <4 x float> %res
4003 define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
4004 ; CHECK-LABEL: test_mask_max_ss:
4006 ; CHECK-NEXT: andl $1, %edi
4007 ; CHECK-NEXT: kmovw %edi, %k1
4008 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm2 {%k1}
4009 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
4011 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
4012 ret <4 x float> %res
4015 define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
4016 ; CHECK-LABEL: test_maskz_max_ss:
4018 ; CHECK-NEXT: andl $1, %edi
4019 ; CHECK-NEXT: kmovw %edi, %k1
4020 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z}
4022 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4)
4023 ret <4 x float> %res
4026 define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) {
4027 ; CHECK-LABEL: test_max_ss:
4029 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0
4031 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4)
4032 ret <4 x float> %res
4034 declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
4036 define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
4037 ; CHECK-LABEL: test_mask_max_sd_sae:
4039 ; CHECK-NEXT: andl $1, %edi
4040 ; CHECK-NEXT: kmovw %edi, %k1
4041 ; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
4042 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
4044 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
4045 ret <2 x double> %res
4048 define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
4049 ; CHECK-LABEL: test_maskz_max_sd_sae:
4051 ; CHECK-NEXT: andl $1, %edi
4052 ; CHECK-NEXT: kmovw %edi, %k1
4053 ; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
4055 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
4056 ret <2 x double> %res
4059 define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) {
4060 ; CHECK-LABEL: test_max_sd_sae:
4062 ; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm0
4064 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
4065 ret <2 x double> %res
4068 define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
4069 ; CHECK-LABEL: test_mask_max_sd:
4071 ; CHECK-NEXT: andl $1, %edi
4072 ; CHECK-NEXT: kmovw %edi, %k1
4073 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm2 {%k1}
4074 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
4076 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
4077 ret <2 x double> %res
4080 define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
4081 ; CHECK-LABEL: test_maskz_max_sd:
4083 ; CHECK-NEXT: andl $1, %edi
4084 ; CHECK-NEXT: kmovw %edi, %k1
4085 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z}
4087 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4)
4088 ret <2 x double> %res
4091 define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) {
4092 ; CHECK-LABEL: test_max_sd:
4094 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
4096 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
4097 ret <2 x double> %res
4100 define <2 x double> @test_x86_avx512_cvtsi2sd32(<2 x double> %a, i32 %b) {
4101 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd32:
4103 ; CHECK-NEXT: vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0
4105 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double> %a, i32 %b, i32 3) ; <<<2 x double>> [#uses=1]
4106 ret <2 x double> %res
4108 declare <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double>, i32, i32) nounwind readnone
4110 define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) {
4111 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd64:
4113 ; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0
4115 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1]
4116 ret <2 x double> %res
4118 declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone
4120 define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) {
4121 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss32:
4123 ; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0
4125 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1]
4126 ret <4 x float> %res
4128 declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone
4130 define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) {
4131 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss64:
4133 ; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0
4135 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1]
4136 ret <4 x float> %res
4138 declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone
4140 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b)
4141 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss:
4143 ; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0
4146 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
4147 ret <4 x float> %res
4150 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32* %ptr)
4151 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss_mem:
4153 ; CHECK-NEXT: movl (%rdi), %eax
4154 ; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0
4157 %b = load i32, i32* %ptr
4158 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
4159 ret <4 x float> %res
4162 define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b)
4163 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss:
4165 ; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0
4168 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
4169 ret <4 x float> %res
4172 define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, i32* %ptr)
4173 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss_mem:
4175 ; CHECK-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0
4178 %b = load i32, i32* %ptr
4179 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
4180 ret <4 x float> %res
4182 declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone
4184 define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b)
4185 ; CHECK-LABEL: _mm_cvt_roundu64_ss:
4187 ; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0
4190 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1]
4191 ret <4 x float> %res
4194 define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b)
4195 ; CHECK-LABEL: _mm_cvtu64_ss:
4197 ; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0
4200 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1]
4201 ret <4 x float> %res
4203 declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind readnone
4205 define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b)
4206 ; CHECK-LABEL: test_x86_avx512_mm_cvtu32_sd:
4208 ; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0
4211 %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1]
4212 ret <2 x double> %res
4214 declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone
4216 define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b)
4217 ; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd:
4219 ; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0
4222 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1]
4223 ret <2 x double> %res
4226 define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b)
4227 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd:
4229 ; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0
4232 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1]
4233 ret <2 x double> %res
4235 declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone
4237 define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) {
4238 ; CHECK-LABEL: test_vpmaxq:
4240 ; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
4242 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1,
4243 <8 x i64>zeroinitializer, i8 -1)
4246 declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4248 define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) {
4249 ; CHECK-LABEL: test_vpminud:
4251 ; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm0
4253 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1,
4254 <16 x i32>zeroinitializer, i16 -1)
4257 declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4259 define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
4260 ; CHECK-LABEL: test_vpmaxsd:
4262 ; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
4264 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1,
4265 <16 x i32>zeroinitializer, i16 -1)
4268 declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4270 define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4271 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_d_512:
4273 ; CHECK-NEXT: kmovw %edi, %k1
4274 ; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm2 {%k1}
4275 ; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
4276 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4278 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4279 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4280 %res2 = add <16 x i32> %res, %res1
4281 ret <16 x i32> %res2
4284 define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4285 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_q_512:
4287 ; CHECK-NEXT: movzbl %dil, %eax
4288 ; CHECK-NEXT: kmovw %eax, %k1
4289 ; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm2 {%k1}
4290 ; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
4291 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4293 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4294 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4295 %res2 = add <8 x i64> %res, %res1
4299 declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4301 define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4302 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_d_512:
4304 ; CHECK-NEXT: kmovw %edi, %k1
4305 ; CHECK-NEXT: vpmaxud %zmm1, %zmm0, %zmm2 {%k1}
4306 ; CHECK-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
4307 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4309 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4310 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4311 %res2 = add <16 x i32> %res, %res1
4312 ret <16 x i32> %res2
4315 declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4317 define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4318 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_q_512:
4320 ; CHECK-NEXT: movzbl %dil, %eax
4321 ; CHECK-NEXT: kmovw %eax, %k1
4322 ; CHECK-NEXT: vpmaxuq %zmm1, %zmm0, %zmm2 {%k1}
4323 ; CHECK-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
4324 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4326 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4327 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4328 %res2 = add <8 x i64> %res, %res1
4332 declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4334 define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4335 ; CHECK-LABEL: test_int_x86_avx512_mask_pmins_d_512:
4337 ; CHECK-NEXT: kmovw %edi, %k1
4338 ; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm2 {%k1}
4339 ; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm0
4340 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4342 %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4343 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4344 %res2 = add <16 x i32> %res, %res1
4345 ret <16 x i32> %res2
4348 declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4350 define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4351 ; CHECK-LABEL: test_int_x86_avx512_mask_pmins_q_512:
4353 ; CHECK-NEXT: movzbl %dil, %eax
4354 ; CHECK-NEXT: kmovw %eax, %k1
4355 ; CHECK-NEXT: vpminsq %zmm1, %zmm0, %zmm2 {%k1}
4356 ; CHECK-NEXT: vpminsq %zmm1, %zmm0, %zmm0
4357 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4359 %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4360 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4361 %res2 = add <8 x i64> %res, %res1
4365 define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4366 ; CHECK-LABEL: test_int_x86_avx512_mask_pminu_d_512:
4368 ; CHECK-NEXT: kmovw %edi, %k1
4369 ; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm2 {%k1}
4370 ; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm0
4371 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4373 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4374 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4375 %res2 = add <16 x i32> %res, %res1
4376 ret <16 x i32> %res2
4379 declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4381 define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4382 ; CHECK-LABEL: test_int_x86_avx512_mask_pminu_q_512:
4384 ; CHECK-NEXT: movzbl %dil, %eax
4385 ; CHECK-NEXT: kmovw %eax, %k1
4386 ; CHECK-NEXT: vpminuq %zmm1, %zmm0, %zmm2 {%k1}
4387 ; CHECK-NEXT: vpminuq %zmm1, %zmm0, %zmm0
4388 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4390 %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4391 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4392 %res2 = add <8 x i64> %res, %res1
4396 declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4398 define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
4399 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512:
4401 ; CHECK-NEXT: kmovw %esi, %k1
4402 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
4403 ; CHECK-NEXT: vpermi2d (%rdi), %zmm0, %zmm3 {%k1}
4404 ; CHECK-NEXT: vpermi2d %zmm2, %zmm0, %zmm1
4405 ; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0
4407 %x2 = load <16 x i32>, <16 x i32>* %x2p
4408 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4409 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
4410 %res2 = add <16 x i32> %res, %res1
4411 ret <16 x i32> %res2
4414 declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
4416 define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
4417 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
4419 ; CHECK-NEXT: movzbl %dil, %eax
4420 ; CHECK-NEXT: kmovw %eax, %k1
4421 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
4422 ; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm3 {%k1}
4423 ; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1
4424 ; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0
4426 %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
4427 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
4428 %res2 = fadd <8 x double> %res, %res1
4429 ret <8 x double> %res2
4432 declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
4434 define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
4435 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
4437 ; CHECK-NEXT: kmovw %edi, %k1
4438 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
4439 ; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm3 {%k1}
4440 ; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1
4441 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0
4443 %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
4444 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
4445 %res2 = fadd <16 x float> %res, %res1
4446 ret <16 x float> %res2
4449 declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4451 define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4452 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
4454 ; CHECK-NEXT: movzbl %dil, %eax
4455 ; CHECK-NEXT: kmovw %eax, %k1
4456 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
4457 ; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm3 {%k1}
4458 ; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1
4459 ; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0
4461 %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4462 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4463 %res2 = add <8 x i64> %res, %res1
4467 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4469 define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, i16 %x3) {
4470 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512:
4472 ; CHECK-NEXT: kmovw %esi, %k1
4473 ; CHECK-NEXT: vmovaps %zmm1, %zmm2
4474 ; CHECK-NEXT: vpermt2d (%rdi), %zmm0, %zmm2 {%k1} {z}
4475 ; CHECK-NEXT: vpermt2d %zmm1, %zmm0, %zmm1
4476 ; CHECK-NEXT: vpaddd %zmm1, %zmm2, %zmm0
4478 %x2 = load <16 x i32>, <16 x i32>* %x2p
4479 %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4480 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x1, i16 -1)
4481 %res2 = add <16 x i32> %res, %res1
4482 ret <16 x i32> %res2
4485 declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
4487 define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, double* %x2ptr, i8 %x3) {
4488 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512:
4490 ; CHECK-NEXT: movzbl %sil, %eax
4491 ; CHECK-NEXT: kmovw %eax, %k1
4492 ; CHECK-NEXT: vmovaps %zmm1, %zmm2
4493 ; CHECK-NEXT: vpermt2pd (%rdi){1to8}, %zmm0, %zmm2 {%k1} {z}
4494 ; CHECK-NEXT: vpermt2pd %zmm1, %zmm0, %zmm1
4495 ; CHECK-NEXT: vaddpd %zmm1, %zmm2, %zmm0
4497 %x2s = load double, double* %x2ptr
4498 %x2ins = insertelement <8 x double> undef, double %x2s, i32 0
4499 %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer
4500 %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
4501 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x1, i8 -1)
4502 %res2 = fadd <8 x double> %res, %res1
4503 ret <8 x double> %res2
4506 declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
4508 define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
4509 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512:
4511 ; CHECK-NEXT: kmovw %edi, %k1
4512 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
4513 ; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm3 {%k1} {z}
4514 ; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1
4515 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0
4517 %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
4518 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
4519 %res2 = fadd <16 x float> %res, %res1
4520 ret <16 x float> %res2
4524 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4526 define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4527 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512:
4529 ; CHECK-NEXT: movzbl %dil, %eax
4530 ; CHECK-NEXT: kmovw %eax, %k1
4531 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
4532 ; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm3 {%k1} {z}
4533 ; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm1
4534 ; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0
4536 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4537 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4538 %res2 = add <8 x i64> %res, %res1
4542 declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4544 define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4545 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512:
4547 ; CHECK-NEXT: kmovw %edi, %k1
4548 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
4549 ; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm3 {%k1}
4550 ; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm1
4551 ; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0
4553 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4554 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4555 %res2 = add <16 x i32> %res, %res1
4556 ret <16 x i32> %res2
4559 declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
4560 define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
4561 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_pd_512:
4563 ; CHECK-NEXT: movzbl %dil, %eax
4564 ; CHECK-NEXT: kmovw %eax, %k1
4565 ; CHECK-NEXT: vscalefpd {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
4566 ; CHECK-NEXT: vscalefpd {rn-sae}, %zmm1, %zmm0, %zmm0
4567 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4569 %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 3)
4570 %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
4571 %res2 = fadd <8 x double> %res, %res1
4572 ret <8 x double> %res2
4575 declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
4576 define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
4577 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ps_512:
4579 ; CHECK-NEXT: kmovw %edi, %k1
4580 ; CHECK-NEXT: vscalefps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
4581 ; CHECK-NEXT: vscalefps {rn-sae}, %zmm1, %zmm0, %zmm0
4582 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4584 %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 2)
4585 %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
4586 %res2 = fadd <16 x float> %res, %res1
4587 ret <16 x float> %res2
4590 declare <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
4592 define <8 x double>@test_int_x86_avx512_mask_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
4593 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_512:
4595 ; CHECK-NEXT: movzbl %dil, %eax
4596 ; CHECK-NEXT: kmovw %eax, %k1
4597 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 = zmm2[1],k1[1],zmm2[3],k1[3],zmm2[5],k1[5],zmm2[7],k1[7]
4598 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
4599 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4601 %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
4602 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
4603 %res2 = fadd <8 x double> %res, %res1
4604 ret <8 x double> %res2
4607 declare <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
4609 define <16 x float>@test_int_x86_avx512_mask_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
4610 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_512:
4612 ; CHECK-NEXT: kmovw %edi, %k1
4613 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 = zmm2[2],k1[2],zmm2[3],k1[3],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[14],k1[14],zmm2[15],k1[15]
4614 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
4615 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4617 %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
4618 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
4619 %res2 = fadd <16 x float> %res, %res1
4620 ret <16 x float> %res2
4623 declare <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
4625 define <8 x double>@test_int_x86_avx512_mask_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
4626 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_512:
4628 ; CHECK-NEXT: movzbl %dil, %eax
4629 ; CHECK-NEXT: kmovw %eax, %k1
4630 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[2],k1[2],zmm2[4],k1[4],zmm2[6],k1[6]
4631 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
4632 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4634 %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
4635 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
4636 %res2 = fadd <8 x double> %res, %res1
4637 ret <8 x double> %res2
4640 declare <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
4642 define <16 x float>@test_int_x86_avx512_mask_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
4643 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_512:
4645 ; CHECK-NEXT: kmovw %edi, %k1
4646 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[12],k1[12],zmm2[13],k1[13]
4647 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
4648 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4650 %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
4651 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
4652 %res2 = fadd <16 x float> %res, %res1
4653 ret <16 x float> %res2
4656 declare <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4658 define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4659 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512:
4661 ; CHECK-NEXT: movzbl %dil, %eax
4662 ; CHECK-NEXT: kmovw %eax, %k1
4663 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[2],k1[2],zmm2[4],k1[4],zmm2[6],k1[6]
4664 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm3 = k1[0],zmm0[0],k1[2],zmm0[2],k1[4],zmm0[4],k1[6],zmm0[6]
4665 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
4666 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4667 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
4669 %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4670 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4671 %res2 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer,i8 %x3)
4672 %res3 = add <8 x i64> %res, %res1
4673 %res4 = add <8 x i64> %res2, %res3
4677 declare <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4679 define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4680 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512:
4682 ; CHECK-NEXT: movzbl %dil, %eax
4683 ; CHECK-NEXT: kmovw %eax, %k1
4684 ; CHECK-NEXT: vpunpckhqdq {{.*#+}} zmm2 = zmm2[1],k1[1],zmm2[3],k1[3],zmm2[5],k1[5],zmm2[7],k1[7]
4685 ; CHECK-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
4686 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4688 %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4689 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4690 %res2 = add <8 x i64> %res, %res1
4694 declare <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4696 define <16 x i32>@test_int_x86_avx512_mask_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4697 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_512:
4699 ; CHECK-NEXT: kmovw %edi, %k1
4700 ; CHECK-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm2[2],k1[2],zmm2[3],k1[3],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[14],k1[14],zmm2[15],k1[15]
4701 ; CHECK-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
4702 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4704 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4705 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4706 %res2 = add <16 x i32> %res, %res1
4707 ret <16 x i32> %res2
4710 declare <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4712 define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4713 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_512:
4715 ; CHECK-NEXT: kmovw %edi, %k1
4716 ; CHECK-NEXT: vpunpckldq {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[12],k1[12],zmm2[13],k1[13]
4717 ; CHECK-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
4718 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4720 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4721 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4722 %res2 = add <16 x i32> %res, %res1
4723 ret <16 x i32> %res2
4726 declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8)
4728 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
4729 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512:
4731 ; CHECK-NEXT: kmovw %edi, %k1
4732 ; CHECK-NEXT: vpmovqb %zmm0, %xmm1 {%k1}
4733 ; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z}
4734 ; CHECK-NEXT: vpmovqb %zmm0, %xmm0
4735 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
4736 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4738 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
4739 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
4740 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
4741 %res3 = add <16 x i8> %res0, %res1
4742 %res4 = add <16 x i8> %res3, %res2
4746 declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64>, i8)
4748 define void @test_int_x86_avx512_mask_pmov_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4749 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_512:
4751 ; CHECK-NEXT: movzbl %sil, %eax
4752 ; CHECK-NEXT: kmovw %eax, %k1
4753 ; CHECK-NEXT: vpmovqb %zmm0, (%rdi)
4754 ; CHECK-NEXT: vpmovqb %zmm0, (%rdi) {%k1}
4756 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4757 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
4761 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8)
4763 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
4764 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512:
4766 ; CHECK-NEXT: kmovw %edi, %k1
4767 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm1 {%k1}
4768 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z}
4769 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm0
4770 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
4771 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4773 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
4774 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
4775 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
4776 %res3 = add <16 x i8> %res0, %res1
4777 %res4 = add <16 x i8> %res3, %res2
4781 declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64>, i8)
4783 define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4784 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512:
4786 ; CHECK-NEXT: vpmovsqb %zmm0, (%rdi)
4787 ; CHECK-NEXT: kmovw %esi, %k1
4788 ; CHECK-NEXT: vpmovsqb %zmm0, (%rdi) {%k1}
4790 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4791 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
4795 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8)
4797 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
4798 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512:
4800 ; CHECK-NEXT: kmovw %edi, %k1
4801 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm1 {%k1}
4802 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z}
4803 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm0
4804 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
4805 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4807 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
4808 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
4809 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
4810 %res3 = add <16 x i8> %res0, %res1
4811 %res4 = add <16 x i8> %res3, %res2
4815 declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64>, i8)
4817 define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4818 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512:
4820 ; CHECK-NEXT: vpmovusqb %zmm0, (%rdi)
4821 ; CHECK-NEXT: kmovw %esi, %k1
4822 ; CHECK-NEXT: vpmovusqb %zmm0, (%rdi) {%k1}
4824 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4825 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
4829 declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
4831 define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
4832 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512:
4834 ; CHECK-NEXT: movzbl %dil, %eax
4835 ; CHECK-NEXT: kmovw %eax, %k1
4836 ; CHECK-NEXT: vpmovqw %zmm0, %xmm1 {%k1}
4837 ; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z}
4838 ; CHECK-NEXT: vpmovqw %zmm0, %xmm0
4839 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
4840 ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0
4842 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
4843 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
4844 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
4845 %res3 = add <8 x i16> %res0, %res1
4846 %res4 = add <8 x i16> %res3, %res2
4850 declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64>, i8)
4852 define void @test_int_x86_avx512_mask_pmov_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4853 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_512:
4855 ; CHECK-NEXT: movzbl %sil, %eax
4856 ; CHECK-NEXT: kmovw %eax, %k1
4857 ; CHECK-NEXT: vpmovqw %zmm0, (%rdi)
4858 ; CHECK-NEXT: vpmovqw %zmm0, (%rdi) {%k1}
4860 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4861 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
4865 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8)
4867 define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
4868 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512:
4870 ; CHECK-NEXT: movzbl %dil, %eax
4871 ; CHECK-NEXT: kmovw %eax, %k1
4872 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm1 {%k1}
4873 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z}
4874 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm0
4875 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
4876 ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0
4878 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
4879 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
4880 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
4881 %res3 = add <8 x i16> %res0, %res1
4882 %res4 = add <8 x i16> %res3, %res2
4886 declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64>, i8)
4888 define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4889 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512:
4891 ; CHECK-NEXT: vpmovsqw %zmm0, (%rdi)
4892 ; CHECK-NEXT: kmovw %esi, %k1
4893 ; CHECK-NEXT: vpmovsqw %zmm0, (%rdi) {%k1}
4895 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4896 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
4900 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8)
4902 define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
4903 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512:
4905 ; CHECK-NEXT: movzbl %dil, %eax
4906 ; CHECK-NEXT: kmovw %eax, %k1
4907 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm1 {%k1}
4908 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z}
4909 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm0
4910 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
4911 ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0
4913 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
4914 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
4915 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
4916 %res3 = add <8 x i16> %res0, %res1
4917 %res4 = add <8 x i16> %res3, %res2
4921 declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64>, i8)
4923 define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4924 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512:
4926 ; CHECK-NEXT: vpmovusqw %zmm0, (%rdi)
4927 ; CHECK-NEXT: kmovw %esi, %k1
4928 ; CHECK-NEXT: vpmovusqw %zmm0, (%rdi) {%k1}
4930 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4931 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
4935 declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8)
4937 define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
4938 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
4940 ; CHECK-NEXT: movzbl %dil, %eax
4941 ; CHECK-NEXT: kmovw %eax, %k1
4942 ; CHECK-NEXT: vpmovqd %zmm0, %ymm1 {%k1}
4943 ; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z}
4944 ; CHECK-NEXT: vpmovqd %zmm0, %ymm0
4945 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
4946 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0
4948 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
4949 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
4950 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
4951 %res3 = add <8 x i32> %res0, %res1
4952 %res4 = add <8 x i32> %res3, %res2
4956 declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64>, i8)
4958 define void @test_int_x86_avx512_mask_pmov_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4959 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_512:
4961 ; CHECK-NEXT: movzbl %sil, %eax
4962 ; CHECK-NEXT: kmovw %eax, %k1
4963 ; CHECK-NEXT: vpmovqd %zmm0, (%rdi)
4964 ; CHECK-NEXT: vpmovqd %zmm0, (%rdi) {%k1}
4966 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
4967 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
4971 declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8)
4973 define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
4974 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_512:
4976 ; CHECK-NEXT: movzbl %dil, %eax
4977 ; CHECK-NEXT: kmovw %eax, %k1
4978 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm1 {%k1}
4979 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm2 {%k1} {z}
4980 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm0
4981 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
4982 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0
4984 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
4985 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
4986 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
4987 %res3 = add <8 x i32> %res0, %res1
4988 %res4 = add <8 x i32> %res3, %res2
4992 declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64>, i8)
4994 define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
4995 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512:
4997 ; CHECK-NEXT: vpmovsqd %zmm0, (%rdi)
4998 ; CHECK-NEXT: kmovw %esi, %k1
4999 ; CHECK-NEXT: vpmovsqd %zmm0, (%rdi) {%k1}
5001 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
5002 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
5006 declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8)
5008 define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
5009 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_512:
5011 ; CHECK-NEXT: movzbl %dil, %eax
5012 ; CHECK-NEXT: kmovw %eax, %k1
5013 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm1 {%k1}
5014 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm2 {%k1} {z}
5015 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm0
5016 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
5017 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0
5019 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
5020 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
5021 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
5022 %res3 = add <8 x i32> %res0, %res1
5023 %res4 = add <8 x i32> %res3, %res2
5027 declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64>, i8)
5029 define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
5030 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512:
5032 ; CHECK-NEXT: vpmovusqd %zmm0, (%rdi)
5033 ; CHECK-NEXT: kmovw %esi, %k1
5034 ; CHECK-NEXT: vpmovusqd %zmm0, (%rdi) {%k1}
5036 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
5037 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
5041 declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
5043 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
5044 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512:
5046 ; CHECK-NEXT: kmovw %edi, %k1
5047 ; CHECK-NEXT: vpmovdb %zmm0, %xmm1 {%k1}
5048 ; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z}
5049 ; CHECK-NEXT: vpmovdb %zmm0, %xmm0
5050 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
5051 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5053 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
5054 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
5055 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
5056 %res3 = add <16 x i8> %res0, %res1
5057 %res4 = add <16 x i8> %res3, %res2
5061 declare void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32>, i16)
5063 define void @test_int_x86_avx512_mask_pmov_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
5064 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_512:
5066 ; CHECK-NEXT: kmovw %esi, %k1
5067 ; CHECK-NEXT: vpmovdb %zmm0, (%rdi)
5068 ; CHECK-NEXT: vpmovdb %zmm0, (%rdi) {%k1}
5070 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
5071 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
5075 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16)
5077 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
5078 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512:
5080 ; CHECK-NEXT: kmovw %edi, %k1
5081 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm1 {%k1}
5082 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z}
5083 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm0
5084 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
5085 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5087 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
5088 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
5089 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
5090 %res3 = add <16 x i8> %res0, %res1
5091 %res4 = add <16 x i8> %res3, %res2
5095 declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32>, i16)
5097 define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
5098 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512:
5100 ; CHECK-NEXT: vpmovsdb %zmm0, (%rdi)
5101 ; CHECK-NEXT: kmovw %esi, %k1
5102 ; CHECK-NEXT: vpmovsdb %zmm0, (%rdi) {%k1}
5104 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
5105 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
5109 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16)
5111 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
5112 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512:
5114 ; CHECK-NEXT: kmovw %edi, %k1
5115 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm1 {%k1}
5116 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z}
5117 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm0
5118 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
5119 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5121 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
5122 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
5123 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
5124 %res3 = add <16 x i8> %res0, %res1
5125 %res4 = add <16 x i8> %res3, %res2
5129 declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32>, i16)
5131 define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
5132 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512:
5134 ; CHECK-NEXT: vpmovusdb %zmm0, (%rdi)
5135 ; CHECK-NEXT: kmovw %esi, %k1
5136 ; CHECK-NEXT: vpmovusdb %zmm0, (%rdi) {%k1}
5138 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
5139 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
5143 declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16)
5145 define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
5146 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512:
5148 ; CHECK-NEXT: kmovw %edi, %k1
5149 ; CHECK-NEXT: vpmovdw %zmm0, %ymm1 {%k1}
5150 ; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z}
5151 ; CHECK-NEXT: vpmovdw %zmm0, %ymm0
5152 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
5153 ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0
5155 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
5156 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
5157 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
5158 %res3 = add <16 x i16> %res0, %res1
5159 %res4 = add <16 x i16> %res3, %res2
5160 ret <16 x i16> %res4
5163 declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32>, i16)
5165 define void @test_int_x86_avx512_mask_pmov_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
5166 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_512:
5168 ; CHECK-NEXT: kmovw %esi, %k1
5169 ; CHECK-NEXT: vpmovdw %zmm0, (%rdi)
5170 ; CHECK-NEXT: vpmovdw %zmm0, (%rdi) {%k1}
5172 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
5173 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
5177 declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16)
5179 define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
5180 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512:
5182 ; CHECK-NEXT: kmovw %edi, %k1
5183 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm1 {%k1}
5184 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z}
5185 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm0
5186 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
5187 ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0
5189 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
5190 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
5191 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
5192 %res3 = add <16 x i16> %res0, %res1
5193 %res4 = add <16 x i16> %res3, %res2
5194 ret <16 x i16> %res4
5197 declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32>, i16)
5199 define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
5200 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512:
5202 ; CHECK-NEXT: vpmovsdw %zmm0, (%rdi)
5203 ; CHECK-NEXT: kmovw %esi, %k1
5204 ; CHECK-NEXT: vpmovsdw %zmm0, (%rdi) {%k1}
5206 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
5207 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
5211 declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16)
5213 define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
5214 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512:
5216 ; CHECK-NEXT: kmovw %edi, %k1
5217 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm1 {%k1}
5218 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z}
5219 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm0
5220 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
5221 ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0
5223 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
5224 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
5225 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
5226 %res3 = add <16 x i16> %res0, %res1
5227 %res4 = add <16 x i16> %res3, %res2
5228 ret <16 x i16> %res4
5231 declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32>, i16)
5233 define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
5234 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512:
5236 ; CHECK-NEXT: vpmovusdw %zmm0, (%rdi)
5237 ; CHECK-NEXT: kmovw %esi, %k1
5238 ; CHECK-NEXT: vpmovusdw %zmm0, (%rdi) {%k1}
5240 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
5241 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
5245 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
5247 define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
5248 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
5250 ; CHECK-NEXT: movzbl %dil, %eax
5251 ; CHECK-NEXT: kmovw %eax, %k1
5252 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1}
5253 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0
5254 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
5256 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
5257 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
5258 %res2 = fadd <8 x double> %res, %res1
5259 ret <8 x double> %res2
5262 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
5264 define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
5265 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512:
5267 ; CHECK-NEXT: kmovw %edi, %k1
5268 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1}
5269 ; CHECK-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0
5270 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
5272 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
5273 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
5274 %res2 = fadd <16 x float> %res, %res1
5275 ret <16 x float> %res2
5278 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
5280 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
5281 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_512:
5283 ; CHECK-NEXT: movzbl %dil, %eax
5284 ; CHECK-NEXT: kmovw %eax, %k1
5285 ; CHECK-NEXT: vcvtpd2dq %zmm0, %ymm1 {%k1}
5286 ; CHECK-NEXT: vcvtpd2dq {rn-sae}, %zmm0, %ymm0
5287 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
5289 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
5290 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
5291 %res2 = add <8 x i32> %res, %res1
5295 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
5297 define <8 x float>@test_int_x86_avx512_mask_cvt_pd2ps_512(<8 x double> %x0, <8 x float> %x1, i8 %x2) {
5298 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_512:
5300 ; CHECK-NEXT: movzbl %dil, %eax
5301 ; CHECK-NEXT: kmovw %eax, %k1
5302 ; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm1 {%k1}
5303 ; CHECK-NEXT: vcvtpd2ps {ru-sae}, %zmm0, %ymm0
5304 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
5306 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 %x2, i32 4)
5307 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 2)
5308 %res2 = fadd <8 x float> %res, %res1
5309 ret <8 x float> %res2
5312 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
5314 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
5315 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_512:
5317 ; CHECK-NEXT: movzbl %dil, %eax
5318 ; CHECK-NEXT: kmovw %eax, %k1
5319 ; CHECK-NEXT: vcvtpd2udq {ru-sae}, %zmm0, %ymm1 {%k1}
5320 ; CHECK-NEXT: vcvtpd2udq {rn-sae}, %zmm0, %ymm0
5321 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
5323 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 2)
5324 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
5325 %res2 = add <8 x i32> %res, %res1
5329 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float>, <16 x i32>, i16, i32)
5331 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
5332 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_512:
5334 ; CHECK-NEXT: kmovw %edi, %k1
5335 ; CHECK-NEXT: vcvtps2dq {ru-sae}, %zmm0, %zmm1 {%k1}
5336 ; CHECK-NEXT: vcvtps2dq {rn-sae}, %zmm0, %zmm0
5337 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
5339 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
5340 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
5341 %res2 = add <16 x i32> %res, %res1
5342 ret <16 x i32> %res2
5345 declare <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float>, <8 x double>, i8, i32)
5347 define <8 x double>@test_int_x86_avx512_mask_cvt_ps2pd_512(<8 x float> %x0, <8 x double> %x1, i8 %x2) {
5348 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_512:
5350 ; CHECK-NEXT: movzbl %dil, %eax
5351 ; CHECK-NEXT: kmovw %eax, %k1
5352 ; CHECK-NEXT: vcvtps2pd %ymm0, %zmm1 {%k1}
5353 ; CHECK-NEXT: vcvtps2pd {sae}, %ymm0, %zmm0
5354 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
5356 %res = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 %x2, i32 4)
5357 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 -1, i32 8)
5358 %res2 = fadd <8 x double> %res, %res1
5359 ret <8 x double> %res2
5362 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
5364 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
5365 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_512:
5367 ; CHECK-NEXT: kmovw %edi, %k1
5368 ; CHECK-NEXT: vcvtps2udq {ru-sae}, %zmm0, %zmm1 {%k1}
5369 ; CHECK-NEXT: vcvtps2udq {rn-sae}, %zmm0, %zmm0
5370 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
5372 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
5373 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
5374 %res2 = add <16 x i32> %res, %res1
5375 ret <16 x i32> %res2
5378 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
5380 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
5381 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512:
5383 ; CHECK-NEXT: movzbl %dil, %eax
5384 ; CHECK-NEXT: kmovw %eax, %k1
5385 ; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1}
5386 ; CHECK-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0
5387 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
5389 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
5390 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
5391 %res2 = add <8 x i32> %res, %res1
5395 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
5397 define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
5398 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
5400 ; CHECK-NEXT: movzbl %dil, %eax
5401 ; CHECK-NEXT: kmovw %eax, %k1
5402 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1}
5403 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0
5404 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
5406 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
5407 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
5408 %res2 = fadd <8 x double> %res, %res1
5409 ret <8 x double> %res2
5413 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
5415 define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
5416 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512:
5418 ; CHECK-NEXT: kmovw %edi, %k1
5419 ; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1}
5420 ; CHECK-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0
5421 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
5423 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
5424 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
5425 %res2 = fadd <16 x float> %res, %res1
5426 ret <16 x float> %res2
5429 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
5431 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
5432 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512:
5434 ; CHECK-NEXT: movzbl %dil, %eax
5435 ; CHECK-NEXT: kmovw %eax, %k1
5436 ; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1}
5437 ; CHECK-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0
5438 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
5440 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
5441 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
5442 %res2 = add <8 x i32> %res, %res1
5446 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32)
5448 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
5449 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512:
5451 ; CHECK-NEXT: kmovw %edi, %k1
5452 ; CHECK-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1}
5453 ; CHECK-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0
5454 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
5456 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
5457 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
5458 %res2 = add <16 x i32> %res, %res1
5459 ret <16 x i32> %res2
5462 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32)
5464 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
5465 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512:
5467 ; CHECK-NEXT: kmovw %edi, %k1
5468 ; CHECK-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1}
5469 ; CHECK-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0
5470 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
5472 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
5473 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
5474 %res2 = add <16 x i32> %res, %res1
5475 ret <16 x i32> %res2
5479 declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32)
5480 define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
5481 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss:
5483 ; CHECK-NEXT: andl $1, %edi
5484 ; CHECK-NEXT: kmovw %edi, %k1
5485 ; CHECK-NEXT: vscalefss %xmm1, %xmm0, %xmm2 {%k1}
5486 ; CHECK-NEXT: vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0
5487 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
5489 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4)
5490 %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8)
5491 %res2 = fadd <4 x float> %res, %res1
5492 ret <4 x float> %res2
5495 declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32)
5496 define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
5497 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sd:
5499 ; CHECK-NEXT: andl $1, %edi
5500 ; CHECK-NEXT: kmovw %edi, %k1
5501 ; CHECK-NEXT: vscalefsd %xmm1, %xmm0, %xmm2 {%k1}
5502 ; CHECK-NEXT: vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0
5503 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
5505 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4)
5506 %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8)
5507 %res2 = fadd <2 x double> %res, %res1
5508 ret <2 x double> %res2
5511 declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
5513 define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
5514 ; CHECK-LABEL: test_getexp_ss:
5516 ; CHECK-NEXT: andl $1, %edi
5517 ; CHECK-NEXT: kmovw %edi, %k1
5518 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
5519 ; CHECK-NEXT: vgetexpss %xmm1, %xmm0, %xmm3 {%k1}
5520 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
5521 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
5522 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm0
5523 ; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1
5524 ; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0
5525 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
5527 %res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
5528 %res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
5529 %res2 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
5530 %res3 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
5532 %res.1 = fadd <4 x float> %res0, %res1
5533 %res.2 = fadd <4 x float> %res2, %res3
5534 %res = fadd <4 x float> %res.1, %res.2
5535 ret <4 x float> %res
5538 declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
5540 define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
5541 ; CHECK-LABEL: test_getexp_sd:
5543 ; CHECK-NEXT: andl $1, %edi
5544 ; CHECK-NEXT: kmovw %edi, %k1
5545 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
5546 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 {%k1}
5547 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4
5548 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
5549 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
5550 ; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1
5551 ; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0
5552 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
5554 %res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
5555 %res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
5556 %res2 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
5557 %res3 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
5559 %res.1 = fadd <2 x double> %res0, %res1
5560 %res.2 = fadd <2 x double> %res2, %res3
5561 %res = fadd <2 x double> %res.1, %res.2
5562 ret <2 x double> %res
5565 declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32)
5567 define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
5568 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd:
5570 ; CHECK-NEXT: andl $1, %edi
5571 ; CHECK-NEXT: kmovw %edi, %k1
5572 ; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1}
5573 ; CHECK-NEXT: kmovw %k0, %eax
5574 ; CHECK-NEXT: shlb $7, %al
5575 ; CHECK-NEXT: sarb $7, %al
5578 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
5582 define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
5583 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd_all:
5585 ; CHECK-NEXT: vcmpunordsd {sae}, %xmm1, %xmm0, %k0
5586 ; CHECK-NEXT: vcmplesd %xmm1, %xmm0, %k1
5587 ; CHECK-NEXT: korw %k0, %k1, %k0
5588 ; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k1
5589 ; CHECK-NEXT: vcmpneqsd %xmm1, %xmm0, %k2
5590 ; CHECK-NEXT: korw %k1, %k2, %k1
5591 ; CHECK-NEXT: andl $1, %edi
5592 ; CHECK-NEXT: kmovw %edi, %k2
5593 ; CHECK-NEXT: kandw %k2, %k1, %k1
5594 ; CHECK-NEXT: korw %k1, %k0, %k0
5595 ; CHECK-NEXT: kmovw %k0, %eax
5596 ; CHECK-NEXT: shlb $7, %al
5597 ; CHECK-NEXT: sarb $7, %al
5600 %res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4)
5601 %res2 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 3, i8 -1, i32 8)
5602 %res3 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 4, i8 %x3, i32 4)
5603 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
5605 %res11 = or i8 %res1, %res2
5606 %res12 = or i8 %res3, %res4
5607 %res13 = or i8 %res11, %res12
5611 declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32)
5613 define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
5614 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss:
5616 ; CHECK-NEXT: andl $1, %edi
5617 ; CHECK-NEXT: kmovw %edi, %k1
5618 ; CHECK-NEXT: vcmpunordss %xmm1, %xmm0, %k0 {%k1}
5619 ; CHECK-NEXT: kmovw %k0, %eax
5620 ; CHECK-NEXT: shlb $7, %al
5621 ; CHECK-NEXT: sarb $7, %al
5624 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4)
5629 define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
5630 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss_all:
5632 ; CHECK-NEXT: vcmpless %xmm1, %xmm0, %k1
5633 ; CHECK-NEXT: vcmpunordss {sae}, %xmm1, %xmm0, %k0 {%k1}
5634 ; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k1
5635 ; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k1 {%k1}
5636 ; CHECK-NEXT: andl $1, %edi
5637 ; CHECK-NEXT: kmovw %edi, %k2
5638 ; CHECK-NEXT: kandw %k2, %k1, %k1
5639 ; CHECK-NEXT: kandw %k1, %k0, %k0
5640 ; CHECK-NEXT: kmovw %k0, %eax
5641 ; CHECK-NEXT: shlb $7, %al
5642 ; CHECK-NEXT: sarb $7, %al
5644 %res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4)
5645 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8)
5646 %res3 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 4, i8 %x3, i32 4)
5647 %res4 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 5, i8 %x3, i32 8)
5649 %res11 = and i8 %res1, %res2
5650 %res12 = and i8 %res3, %res4
5651 %res13 = and i8 %res11, %res12
5655 declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16)
5657 define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
5658 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4:
5660 ; CHECK-NEXT: kmovw %edi, %k1
5661 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
5662 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
5663 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
5665 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
5666 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
5667 %res2 = fadd <16 x float> %res, %res1
5668 ret <16 x float> %res2
5671 declare <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double>, <8 x double>, i32, <8 x double>, i8)
5673 define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
5674 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2:
5676 ; CHECK-NEXT: movzbl %dil, %eax
5677 ; CHECK-NEXT: kmovw %eax, %k1
5678 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
5679 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
5680 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
5681 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
5682 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
5684 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
5685 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
5686 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
5688 %res3 = fadd <8 x double> %res, %res1
5689 %res4 = fadd <8 x double> %res3, %res2
5690 ret <8 x double> %res4
5693 declare <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
5695 define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
5696 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4:
5698 ; CHECK-NEXT: kmovw %edi, %k1
5699 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
5700 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
5701 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
5703 %res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
5704 %res1 = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1)
5705 %res2 = add <16 x i32> %res, %res1
5706 ret <16 x i32> %res2
5709 declare <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
5711 define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
5712 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2:
5714 ; CHECK-NEXT: movzbl %dil, %eax
5715 ; CHECK-NEXT: kmovw %eax, %k1
5716 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
5717 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
5718 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
5720 %res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
5721 %res1 = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1)
5722 %res2 = add <8 x i64> %res, %res1
5726 declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
5728 define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
5729 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_512:
5731 ; CHECK-NEXT: movzbl %dil, %eax
5732 ; CHECK-NEXT: kmovw %eax, %k1
5733 ; CHECK-NEXT: vgetmantpd $11, %zmm0, %zmm1 {%k1}
5734 ; CHECK-NEXT: vgetmantpd $11,{sae}, %zmm0, %zmm0
5735 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
5737 %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %x3, i32 4)
5738 %res1 = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 -1, i32 8)
5739 %res2 = fadd <8 x double> %res, %res1
5740 ret <8 x double> %res2
5743 declare <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
5745 define <16 x float>@test_int_x86_avx512_mask_getmant_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
5746 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_512:
5748 ; CHECK-NEXT: kmovw %edi, %k1
5749 ; CHECK-NEXT: vgetmantps $11, %zmm0, %zmm1 {%k1}
5750 ; CHECK-NEXT: vgetmantps $11,{sae}, %zmm0, %zmm0
5751 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
5753 %res = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 %x3, i32 4)
5754 %res1 = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 8)
5755 %res2 = fadd <16 x float> %res, %res1
5756 ret <16 x float> %res2
5759 declare <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double>, <2 x double>, i32, <2 x double>, i8, i32)
5761 define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
5762 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sd:
5764 ; CHECK-NEXT: andl $1, %edi
5765 ; CHECK-NEXT: kmovw %edi, %k1
5766 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
5767 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1}
5768 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z}
5769 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5
5770 ; CHECK-NEXT: vgetmantsd $11,{sae}, %xmm1, %xmm0, %xmm2 {%k1}
5771 ; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm0
5772 ; CHECK-NEXT: vaddpd %xmm5, %xmm2, %xmm1
5773 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
5775 %res = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 4)
5776 %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> zeroinitializer, i8 %x3, i32 4)
5777 %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 8)
5778 %res3 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 -1, i32 4)
5779 %res11 = fadd <2 x double> %res, %res1
5780 %res12 = fadd <2 x double> %res2, %res3
5781 %res13 = fadd <2 x double> %res11, %res12
5782 ret <2 x double> %res13
5785 declare <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float>, <4 x float>, i32, <4 x float>, i8, i32)
5787 define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
5788 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ss:
5790 ; CHECK-NEXT: andl $1, %edi
5791 ; CHECK-NEXT: kmovw %edi, %k1
5792 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm2 {%k1}
5793 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3 {%k1} {z}
5794 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm4
5795 ; CHECK-NEXT: vgetmantss $11,{sae}, %xmm1, %xmm0, %xmm0
5796 ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1
5797 ; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0
5798 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
5800 %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 %x3, i32 4)
5801 %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> zeroinitializer, i8 %x3, i32 4)
5802 %res2 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 8)
5803 %res3 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 4)
5804 %res11 = fadd <4 x float> %res, %res1
5805 %res12 = fadd <4 x float> %res2, %res3
5806 %res13 = fadd <4 x float> %res11, %res12
5807 ret <4 x float> %res13
5810 declare <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8)
5812 define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
5813 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_512:
5815 ; CHECK-NEXT: movzbl %dil, %eax
5816 ; CHECK-NEXT: kmovw %eax, %k1
5817 ; CHECK-NEXT: vshufpd {{.*#+}} zmm2 = zmm2[0],k1[1],zmm2[3],k1[2],zmm2[5],k1[4],zmm2[6],k1[6]
5818 ; CHECK-NEXT: vshufpd {{.*#+}} zmm3 = k1[0],zmm0[1],k1[3],zmm0[2],k1[5],zmm0[4],k1[6],zmm0[6]
5819 ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
5820 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
5821 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
5823 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
5824 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
5825 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
5827 %res3 = fadd <8 x double> %res, %res1
5828 %res4 = fadd <8 x double> %res3, %res2
5829 ret <8 x double> %res4
5832 declare <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16)
5834 define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
5835 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_512:
5837 ; CHECK-NEXT: kmovw %edi, %k1
5838 ; CHECK-NEXT: vshufps {{.*#+}} zmm2 = zmm2[2,1],k1[1,0],zmm2[6,5],k1[5,4],zmm2[10,9],k1[9,8],zmm2[14,13],k1[13,12]
5839 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
5840 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
5842 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
5843 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
5844 %res2 = fadd <16 x float> %res, %res1
5845 ret <16 x float> %res2
5848 declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8)
5850 define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
5851 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
5853 ; CHECK-NEXT: movzbl %dil, %eax
5854 ; CHECK-NEXT: kmovw %eax, %k1
5855 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 = zmm1[0,1,3,2,5,4,6,6]
5856 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm2 = k1[0,1,3,2,5,4,6,6]
5857 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,3,2,5,4,6,6]
5858 ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1
5859 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
5861 %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3)
5862 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3)
5863 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1)
5864 %res3 = fadd <8 x double> %res, %res1
5865 %res4 = fadd <8 x double> %res3, %res2
5866 ret <8 x double> %res4
5869 declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16)
5871 define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
5872 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
5874 ; CHECK-NEXT: kmovw %edi, %k1
5875 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 = zmm1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
5876 ; CHECK-NEXT: vpermilps {{.*#+}} zmm2 = k1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
5877 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
5878 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1
5879 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
5881 %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3)
5882 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3)
5883 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1)
5884 %res3 = fadd <16 x float> %res, %res1
5885 %res4 = fadd <16 x float> %res3, %res2
5886 ret <16 x float> %res4
5889 declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
5891 define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
5892 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
5894 ; CHECK-NEXT: movzbl %dil, %eax
5895 ; CHECK-NEXT: kmovw %eax, %k1
5896 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1}
5897 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm3 {%k1} {z}
5898 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0
5899 ; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1
5900 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
5902 %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
5903 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
5904 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
5905 %res3 = fadd <8 x double> %res, %res1
5906 %res4 = fadd <8 x double> %res2, %res3
5907 ret <8 x double> %res4
5910 declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
5912 define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
5913 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
5915 ; CHECK-NEXT: kmovw %edi, %k1
5916 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1}
5917 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm3 {%k1} {z}
5918 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0
5919 ; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1
5920 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
5922 %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
5923 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
5924 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
5925 %res3 = fadd <16 x float> %res, %res1
5926 %res4 = fadd <16 x float> %res2, %res3
5927 ret <16 x float> %res4
5930 declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i8)
5932 define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i8 %x4) {
5933 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_512:
5935 ; CHECK-NEXT: kmovw %edi, %k1
5936 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
5937 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
5938 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0
5939 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
5940 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
5942 %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 %x4)
5943 %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 -1)
5944 %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> zeroinitializer, i8 %x4)
5945 %res3 = fadd <16 x float> %res, %res1
5946 %res4 = fadd <16 x float> %res2, %res3
5947 ret <16 x float> %res4
5950 declare <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32>, <4 x i32>, i32, <16 x i32>, i8)
5952 define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i8 %x4) {
5953 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_512:
5955 ; CHECK-NEXT: kmovw %edi, %k1
5956 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
5957 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
5958 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0
5959 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
5960 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
5962 %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 %x4)
5963 %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 -1)
5964 %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i8 %x4)
5965 %res3 = add <16 x i32> %res, %res1
5966 %res4 = add <16 x i32> %res2, %res3
5967 ret <16 x i32> %res4
5970 declare <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double>, <4 x double>, i32, <8 x double>, i8)
5972 define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) {
5973 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x4_512:
5975 ; CHECK-NEXT: movzbl %dil, %eax
5976 ; CHECK-NEXT: kmovw %eax, %k1
5977 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
5978 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
5979 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
5980 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
5981 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
5983 %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
5984 %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
5985 %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
5986 %res3 = fadd <8 x double> %res, %res1
5987 %res4 = fadd <8 x double> %res2, %res3
5988 ret <8 x double> %res4
5991 declare <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64>, <4 x i64>, i32, <8 x i64>, i8)
5993 define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) {
5994 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x4_512:
5996 ; CHECK-NEXT: movzbl %dil, %eax
5997 ; CHECK-NEXT: kmovw %eax, %k1
5998 ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
5999 ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
6000 ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
6001 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
6002 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
6004 %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
6005 %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
6006 %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
6007 %res3 = add <8 x i64> %res, %res1
6008 %res4 = add <8 x i64> %res2, %res3
6012 declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float>, <4 x float>, <2 x double>, i8, i32)
6014 define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<4 x float> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) {
6015 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ss2sd_round:
6017 ; CHECK-NEXT: andl $1, %edi
6018 ; CHECK-NEXT: kmovw %edi, %k1
6019 ; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm2 {%k1}
6020 ; CHECK-NEXT: vcvtss2sd {sae}, %xmm1, %xmm0, %xmm0
6021 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
6023 %res = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 %x3, i32 4)
6024 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 -1, i32 8)
6025 %res2 = fadd <2 x double> %res, %res1
6026 ret <2 x double> %res2
6029 declare <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double>, <2 x double>, <4 x float>, i8, i32)
6031 define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<2 x double> %x0,<2 x double> %x1, <4 x float> %x2, i8 %x3) {
6032 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_sd2ss_round:
6034 ; CHECK-NEXT: andl $1, %edi
6035 ; CHECK-NEXT: kmovw %edi, %k1
6036 ; CHECK-NEXT: vcvtsd2ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
6037 ; CHECK-NEXT: vcvtsd2ss {rn-sae}, %xmm1, %xmm0, %xmm0
6038 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
6040 %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 3)
6041 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 -1, i32 8)
6042 %res2 = fadd <4 x float> %res, %res1
6043 ret <4 x float> %res2
6046 declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
6048 define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
6049 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
6051 ; CHECK-NEXT: kmovw %edi, %k1
6052 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
6053 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1}
6054 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
6055 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
6057 %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
6058 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
6059 %res2 = add <16 x i32> %res, %res1
6060 ret <16 x i32> %res2
6063 declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
6065 define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
6066 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
6068 ; CHECK-NEXT: kmovw %edi, %k1
6069 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
6070 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
6071 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
6072 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
6074 %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
6075 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
6076 %res2 = add <16 x i32> %res, %res1
6077 ret <16 x i32> %res2
6080 declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
6082 define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
6083 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
6085 ; CHECK-NEXT: movzbl %dil, %eax
6086 ; CHECK-NEXT: kmovw %eax, %k1
6087 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
6088 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1}
6089 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
6090 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
6092 %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
6093 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
6094 %res2 = add <8 x i64> %res, %res1
6098 declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
6100 define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
6101 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
6103 ; CHECK-NEXT: movzbl %dil, %eax
6104 ; CHECK-NEXT: kmovw %eax, %k1
6105 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
6106 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
6107 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
6108 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
6110 %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
6111 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
6112 %res2 = add <8 x i64> %res, %res1
6116 declare <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float>, <16 x float>, i16)
6118 define <16 x float>@test_int_x86_avx512_mask_movsldup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
6119 ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_512:
6121 ; CHECK-NEXT: kmovw %edi, %k1
6122 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
6123 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
6124 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
6125 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
6126 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
6128 %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
6129 %res1 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
6130 %res2 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
6131 %res3 = fadd <16 x float> %res, %res1
6132 %res4 = fadd <16 x float> %res2, %res3
6133 ret <16 x float> %res4
6136 declare <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float>, <16 x float>, i16)
6138 define <16 x float>@test_int_x86_avx512_mask_movshdup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
6139 ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_512:
6141 ; CHECK-NEXT: kmovw %edi, %k1
6142 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
6143 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm2 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
6144 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
6145 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
6146 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
6148 %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
6149 %res1 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
6150 %res2 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
6151 %res3 = fadd <16 x float> %res, %res1
6152 %res4 = fadd <16 x float> %res2, %res3
6153 ret <16 x float> %res4
6156 declare <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double>, <8 x double>, i8)
6158 define <8 x double>@test_int_x86_avx512_mask_movddup_512(<8 x double> %x0, <8 x double> %x1, i8 %x2) {
6159 ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_512:
6161 ; CHECK-NEXT: movzbl %dil, %eax
6162 ; CHECK-NEXT: kmovw %eax, %k1
6163 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6]
6164 ; CHECK-NEXT: vmovddup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6]
6165 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
6166 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
6167 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
6169 %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 %x2)
6170 %res1 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 -1)
6171 %res2 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> zeroinitializer, i8 %x2)
6172 %res3 = fadd <8 x double> %res, %res1
6173 %res4 = fadd <8 x double> %res2, %res3
6174 ret <8 x double> %res4
6177 define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
6178 ; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae:
6180 ; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0
6181 ; CHECK-NEXT: sete %al
6182 ; CHECK-NEXT: movzbl %al, %eax
6184 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8)
6188 define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
6189 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae:
6191 ; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0
6192 ; CHECK-NEXT: sete %al
6193 ; CHECK-NEXT: movzbl %al, %eax
6195 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8)
6199 define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
6200 ; CHECK-LABEL: test_x86_avx512_comi_sd_eq:
6202 ; CHECK-NEXT: vcomisd %xmm1, %xmm0
6203 ; CHECK-NEXT: sete %al
6204 ; CHECK-NEXT: movzbl %al, %eax
6206 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4)
6210 define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
6211 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq:
6213 ; CHECK-NEXT: vucomisd %xmm1, %xmm0
6214 ; CHECK-NEXT: sete %al
6215 ; CHECK-NEXT: movzbl %al, %eax
6217 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4)
6221 define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
6222 ; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae:
6224 ; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0
6225 ; CHECK-NEXT: sbbl %eax, %eax
6226 ; CHECK-NEXT: andl $1, %eax
6228 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8)
6232 define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
6233 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae:
6235 ; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0
6236 ; CHECK-NEXT: sbbl %eax, %eax
6237 ; CHECK-NEXT: andl $1, %eax
6239 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8)
6243 define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
6244 ; CHECK-LABEL: test_x86_avx512_comi_sd_lt:
6246 ; CHECK-NEXT: vcomisd %xmm1, %xmm0
6247 ; CHECK-NEXT: sbbl %eax, %eax
6248 ; CHECK-NEXT: andl $1, %eax
6250 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4)
6254 define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
6255 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt:
6257 ; CHECK-NEXT: vucomisd %xmm1, %xmm0
6258 ; CHECK-NEXT: sbbl %eax, %eax
6259 ; CHECK-NEXT: andl $1, %eax
6261 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4)
6265 declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32)
6267 define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) {
6268 ; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt:
6270 ; CHECK-NEXT: vucomiss %xmm1, %xmm0
6271 ; CHECK-NEXT: sbbl %eax, %eax
6272 ; CHECK-NEXT: andl $1, %eax
6274 %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4)
6278 declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32)
6279 declare <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float>, <4 x float>, <4 x float>, i8)
6281 define <4 x float>@test_int_x86_avx512_mask_move_ss_rrk(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
6282 ; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrk:
6284 ; CHECK-NEXT: andl $1, %edi
6285 ; CHECK-NEXT: kmovw %edi, %k1
6286 ; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1}
6287 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
6289 %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
6290 ret <4 x float> %res
6293 define <4 x float>@test_int_x86_avx512_mask_move_ss_rrkz(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
6294 ; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrkz:
6296 ; CHECK-NEXT: andl $1, %edi
6297 ; CHECK-NEXT: kmovw %edi, %k1
6298 ; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z}
6300 %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x2)
6301 ret <4 x float> %res
6304 define <4 x float>@test_int_x86_avx512_mask_move_ss_rr(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
6305 ; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rr:
6307 ; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0
6309 %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 -1)
6310 ret <4 x float> %res
6313 declare <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double>, <2 x double>, <2 x double>, i8)
6314 define <2 x double>@test_int_x86_avx512_mask_move_sd_rr(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
6315 ; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rr:
6317 ; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0
6319 %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 -1)
6320 ret <2 x double> %res
6323 define <2 x double>@test_int_x86_avx512_mask_move_sd_rrkz(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
6324 ; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrkz:
6326 ; CHECK-NEXT: andl $1, %edi
6327 ; CHECK-NEXT: kmovw %edi, %k1
6328 ; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z}
6330 %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 %x2)
6331 ret <2 x double> %res
6334 define <2 x double>@test_int_x86_avx512_mask_move_sd_rrk(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
6335 ; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrk:
6337 ; CHECK-NEXT: andl $1, %edi
6338 ; CHECK-NEXT: kmovw %edi, %k1
6339 ; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1}
6340 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
6342 %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
6343 ret <2 x double> %res
6346 declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float>, <16 x float>, i16)
6348 define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512(<4 x float> %x0, <16 x float> %x2, i16 %mask) {
6349 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512:
6350 ; CHECK: kmovw %edi, %k1
6351 ; CHECK: vshuff32x4 $0, %zmm0, %zmm0, %zmm2 {%k1} {z}
6352 ; CHECK: vshuff32x4 $0, %zmm0, %zmm0, %zmm1 {%k1}
6353 ; CHECK: vshuff32x4 $0, %zmm0, %zmm0, %zmm0
6354 ; CHECK: vaddps %zmm1, %zmm0, %zmm0
6355 ; CHECK: vaddps %zmm0, %zmm2, %zmm0
6357 %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 -1)
6358 %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 %mask)
6359 %res3 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %mask)
6360 %res4 = fadd <16 x float> %res1, %res2
6361 %res5 = fadd <16 x float> %res3, %res4
6362 ret <16 x float> %res5
6365 declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double>, <8 x double>, i8)
6367 define <8 x double>@test_int_x86_avx512_mask_broadcastf64x4_512(<4 x double> %x0, <8 x double> %x2, i8 %mask) {
6368 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512:
6369 ; CHECK: kmovw %eax, %k1
6370 ; CHECK: vshuff64x2 $68, %zmm0, %zmm0, %zmm2 {%k1} {z}
6371 ; CHECK: vshuff64x2 $68, %zmm0, %zmm0, %zmm1 {%k1}
6372 ; CHECK: vshuff64x2 $68, %zmm0, %zmm0, %zmm0
6373 ; CHECK: vaddpd %zmm1, %zmm0, %zmm0
6374 ; CHECK: vaddpd %zmm0, %zmm2, %zmm0
6376 %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 -1)
6377 %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 %mask)
6378 %res3 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> zeroinitializer, i8 %mask)
6379 %res4 = fadd <8 x double> %res1, %res2
6380 %res5 = fadd <8 x double> %res3, %res4
6381 ret <8 x double> %res5
6384 declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32>, <16 x i32>, i16)
6386 define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x4_512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask) {
6387 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512:
6388 ; CHECK: kmovw %edi, %k1
6389 ; CHECK: vshufi32x4 $0, %zmm0, %zmm0, %zmm2 {%k1} {z}
6390 ; CHECK: vshufi32x4 $0, %zmm0, %zmm0, %zmm1 {%k1}
6391 ; CHECK: vshufi32x4 $0, %zmm0, %zmm0, %zmm0
6392 ; CHECK: vpaddd %zmm1, %zmm0, %zmm0
6393 ; CHECK: vpaddd %zmm0, %zmm2, %zmm0
6395 %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1)
6396 %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask)
6397 %res3 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
6398 %res4 = add <16 x i32> %res1, %res2
6399 %res5 = add <16 x i32> %res3, %res4
6400 ret <16 x i32> %res5
6403 declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64>, <8 x i64>, i8)
6405 define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x4_512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask) {
6406 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512:
6407 ; CHECK: kmovw %eax, %k1
6408 ; CHECK: vshufi64x2 $68, %zmm0, %zmm0, %zmm2 {%k1} {z}
6409 ; CHECK: vshufi64x2 $68, %zmm0, %zmm0, %zmm1 {%k1}
6410 ; CHECK: vshufi64x2 $68, %zmm0, %zmm0, %zmm0
6411 ; CHECK: vpaddq %zmm1, %zmm0, %zmm0
6412 ; CHECK: vpaddq %zmm0, %zmm2, %zmm0
6414 %res1 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 -1)
6415 %res2 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask)
6416 %res3 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> zeroinitializer, i8 %mask)
6417 %res4 = add <8 x i64> %res1, %res2
6418 %res5 = add <8 x i64> %res3, %res4
6422 declare <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64>, i8, <8 x i64>, i8)
6424 define <8 x i64>@test_int_x86_avx512_mask_psrl_qi_512(<8 x i64> %x0, i8 %x1, <8 x i64> %x2, i8 %x3) {
6425 ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_qi_512:
6427 ; CHECK-NEXT: movzbl %sil, %eax
6428 ; CHECK-NEXT: kmovw %eax, %k1
6429 ; CHECK-NEXT: vpsrlq $255, %zmm0, %zmm1 {%k1}
6430 ; CHECK-NEXT: vpsrlq $255, %zmm0, %zmm2 {%k1} {z}
6431 ; CHECK-NEXT: vpsrlq $255, %zmm0, %zmm0
6432 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
6433 ; CHECK-NEXT: vpaddq %zmm2, %zmm0, %zmm0
6435 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i8 255, <8 x i64> %x2, i8 %x3)
6436 %res1 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i8 255, <8 x i64> %x2, i8 -1)
6437 %res2 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i8 255, <8 x i64> zeroinitializer, i8 %x3)
6438 %res3 = add <8 x i64> %res, %res1
6439 %res4 = add <8 x i64> %res3, %res2
6443 declare <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32>, i8, <16 x i32>, i16)
6445 define <16 x i32>@test_int_x86_avx512_mask_psrl_di_512(<16 x i32> %x0, i8 %x1, <16 x i32> %x2, i16 %x3) {
6446 ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_di_512:
6448 ; CHECK-NEXT: kmovw %esi, %k1
6449 ; CHECK-NEXT: vpsrld $255, %zmm0, %zmm1 {%k1}
6450 ; CHECK-NEXT: vpsrld $255, %zmm0, %zmm2 {%k1} {z}
6451 ; CHECK-NEXT: vpsrld $255, %zmm0, %zmm0
6452 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
6453 ; CHECK-NEXT: vpaddd %zmm2, %zmm0, %zmm0
6455 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i8 255, <16 x i32> %x2, i16 %x3)
6456 %res1 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i8 255, <16 x i32> %x2, i16 -1)
6457 %res2 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i8 255, <16 x i32> zeroinitializer, i16 %x3)
6458 %res3 = add <16 x i32> %res, %res1
6459 %res4 = add <16 x i32> %res3, %res2
6460 ret <16 x i32> %res4
6463 declare <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32>, i8, <16 x i32>, i16)
6465 define <16 x i32>@test_int_x86_avx512_mask_psra_di_512(<16 x i32> %x0, i8 %x1, <16 x i32> %x2, i16 %x3) {
6466 ; CHECK-LABEL: test_int_x86_avx512_mask_psra_di_512:
6468 ; CHECK-NEXT: kmovw %esi, %k1
6469 ; CHECK-NEXT: vpsrad $3, %zmm0, %zmm1 {%k1}
6470 ; CHECK-NEXT: vpsrad $3, %zmm0, %zmm2 {%k1} {z}
6471 ; CHECK-NEXT: vpsrad $3, %zmm0, %zmm0
6472 ; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1
6473 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
6475 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i8 3, <16 x i32> %x2, i16 %x3)
6476 %res1 = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i8 3, <16 x i32> zeroinitializer, i16 %x3)
6477 %res2 = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i8 3, <16 x i32> %x2, i16 -1)
6478 %res3 = add <16 x i32> %res, %res1
6479 %res4 = add <16 x i32> %res3, %res2
6480 ret <16 x i32> %res4
6483 declare <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64>, i8, <8 x i64>, i8)
6485 define <8 x i64>@test_int_x86_avx512_mask_psra_qi_512(<8 x i64> %x0, i8 %x1, <8 x i64> %x2, i8 %x3) {
6486 ; CHECK-LABEL: test_int_x86_avx512_mask_psra_qi_512:
6488 ; CHECK-NEXT: movzbl %sil, %eax
6489 ; CHECK-NEXT: kmovw %eax, %k1
6490 ; CHECK-NEXT: vpsraq $3, %zmm0, %zmm1 {%k1}
6491 ; CHECK-NEXT: vpsraq $3, %zmm0, %zmm2 {%k1} {z}
6492 ; CHECK-NEXT: vpsraq $3, %zmm0, %zmm0
6493 ; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1
6494 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
6496 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i8 3, <8 x i64> %x2, i8 %x3)
6497 %res1 = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i8 3, <8 x i64> zeroinitializer, i8 %x3)
6498 %res2 = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i8 3, <8 x i64> %x2, i8 -1)
6499 %res3 = add <8 x i64> %res, %res1
6500 %res4 = add <8 x i64> %res3, %res2
6504 declare <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32>, i8, <16 x i32>, i16)
6506 define <16 x i32>@test_int_x86_avx512_mask_psll_di_512(<16 x i32> %x0, i8 %x1, <16 x i32> %x2, i16 %x3) {
6507 ; CHECK-LABEL: test_int_x86_avx512_mask_psll_di_512:
6509 ; CHECK-NEXT: kmovw %esi, %k1
6510 ; CHECK-NEXT: vpslld $3, %zmm0, %zmm1 {%k1}
6511 ; CHECK-NEXT: vpslld $3, %zmm0, %zmm2 {%k1} {z}
6512 ; CHECK-NEXT: vpslld $3, %zmm0, %zmm0
6513 ; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1
6514 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
6516 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i8 3, <16 x i32> %x2, i16 %x3)
6517 %res1 = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i8 3, <16 x i32> zeroinitializer, i16 %x3)
6518 %res2 = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i8 3, <16 x i32> %x2, i16 -1)
6519 %res3 = add <16 x i32> %res, %res1
6520 %res4 = add <16 x i32> %res3, %res2
6521 ret <16 x i32> %res4
6524 declare <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64>, i8, <8 x i64>, i8)
6526 define <8 x i64>@test_int_x86_avx512_mask_psll_qi_512(<8 x i64> %x0, i8 %x1, <8 x i64> %x2, i8 %x3) {
6527 ; CHECK-LABEL: test_int_x86_avx512_mask_psll_qi_512:
6529 ; CHECK-NEXT: movzbl %sil, %eax
6530 ; CHECK-NEXT: kmovw %eax, %k1
6531 ; CHECK-NEXT: vpsllq $3, %zmm0, %zmm1 {%k1}
6532 ; CHECK-NEXT: vpsllq $3, %zmm0, %zmm2 {%k1} {z}
6533 ; CHECK-NEXT: vpsllq $3, %zmm0, %zmm0
6534 ; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1
6535 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
6537 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i8 3, <8 x i64> %x2, i8 %x3)
6538 %res1 = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i8 3, <8 x i64> zeroinitializer, i8 %x3)
6539 %res2 = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i8 3, <8 x i64> %x2, i8 -1)
6540 %res3 = add <8 x i64> %res, %res1
6541 %res4 = add <8 x i64> %res3, %res2
6545 declare <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32>, i16, <16 x i32>, i8)
6547 define <16 x i32>@test_int_x86_avx512_mask_pshuf_d_512(<16 x i32> %x0, i16 %x1, <16 x i32> %x2, i8 %x3) {
6548 ; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_d_512:
6550 ; CHECK-NEXT: kmovw %esi, %k1
6551 ; CHECK-NEXT: vpshufd $3, %zmm0, %zmm1 {%k1}
6552 ; CHECK-NEXT: vpshufd $3, %zmm0, %zmm2 {%k1} {z}
6553 ; CHECK-NEXT: vpshufd $3, %zmm0, %zmm0
6554 ; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1
6555 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
6557 %res = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i16 3, <16 x i32> %x2, i8 %x3)
6558 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i16 3, <16 x i32> zeroinitializer, i8 %x3)
6559 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i16 3, <16 x i32> %x2, i8 -1)
6560 %res3 = add <16 x i32> %res, %res1
6561 %res4 = add <16 x i32> %res3, %res2
6562 ret <16 x i32> %res4