1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
3 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
4 ; CHECK-LABEL: test_kortestz
7 define i32 @test_kortestz(i16 %a0, i16 %a1) {
8 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
12 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
13 ; CHECK-LABEL: test_kortestc
16 define i32 @test_kortestc(i16 %a0, i16 %a1) {
17 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
21 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
22 ; CHECK-LABEL: test_kand
25 define i16 @test_kand(i16 %a0, i16 %a1) {
26 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
27 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
31 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
32 ; CHECK-LABEL: test_knot
34 define i16 @test_knot(i16 %a0) {
35 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
39 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
41 ; CHECK-LABEL: unpckbw_test
44 define i16 @unpckbw_test(i16 %a0, i16 %a1) {
45 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
49 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
50 ; CHECK: vrcp14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4c,0xc0]
51 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
54 declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
56 define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
57 ; CHECK: vrcp14pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x4c,0xc0]
58 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
61 declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
63 declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
65 define <8 x double> @test7(<8 x double> %a) {
66 ; CHECK: vrndscalepd {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b]
67 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
71 declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
73 define <16 x float> @test8(<16 x float> %a) {
74 ; CHECK: vrndscaleps {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b]
75 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
79 define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
80 ; CHECK: vrsqrt14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4e,0xc0]
81 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
84 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
86 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
87 ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0]
88 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
91 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
93 define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
94 ; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0]
95 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
98 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
100 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
101 ; CHECK-LABEL: test_sqrt_pd_512
103 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
104 ret <8 x double> %res
106 declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
108 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
109 ; CHECK-LABEL: test_sqrt_ps_512
111 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
112 ret <16 x float> %res
114 define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
115 ; CHECK-LABEL: test_sqrt_round_ps_512
116 ; CHECK: vsqrtps {rz-sae}
117 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3)
118 ret <16 x float> %res
120 declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
122 define <8 x double> @test_getexp_pd_512(<8 x double> %a0) {
123 ; CHECK-LABEL: test_getexp_pd_512
125 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
126 ret <8 x double> %res
128 define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) {
129 ; CHECK-LABEL: test_getexp_round_pd_512
130 ; CHECK: vgetexppd {sae}
131 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
132 ret <8 x double> %res
134 declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
136 define <16 x float> @test_getexp_ps_512(<16 x float> %a0) {
137 ; CHECK-LABEL: test_getexp_ps_512
139 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
140 ret <16 x float> %res
143 define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) {
144 ; CHECK-LABEL: test_getexp_round_ps_512
145 ; CHECK: vgetexpps {sae}
146 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
147 ret <16 x float> %res
149 declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
151 declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
153 define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
154 ; CHECK-LABEL: test_sqrt_ss:
156 ; CHECK-NEXT: andl $1, %edi
157 ; CHECK-NEXT: kmovw %edi, %k1
158 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
159 ; CHECK-NEXT: vsqrtss %xmm1, %xmm0, %xmm3 {%k1}
160 ; CHECK-NEXT: vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
161 ; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
162 ; CHECK-NEXT: vsqrtss {rz-sae}, %xmm1, %xmm0, %xmm0
163 ; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1
164 ; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0
165 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
167 %res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
168 %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
169 %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 2)
170 %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 3)
172 %res.1 = fadd <4 x float> %res0, %res1
173 %res.2 = fadd <4 x float> %res2, %res3
174 %res = fadd <4 x float> %res.1, %res.2
178 declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
180 define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
181 ; CHECK-LABEL: test_sqrt_sd:
183 ; CHECK-NEXT: andl $1, %edi
184 ; CHECK-NEXT: kmovw %edi, %k1
185 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
186 ; CHECK-NEXT: vsqrtsd %xmm1, %xmm0, %xmm3 {%k1}
187 ; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
188 ; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
189 ; CHECK-NEXT: vsqrtsd {rz-sae}, %xmm1, %xmm0, %xmm0
190 ; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1
191 ; CHECK-NEXT: vaddpd %xmm0, %xmm4, %xmm0
192 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
194 %res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
195 %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
196 %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 2)
197 %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 3)
199 %res.1 = fadd <2 x double> %res0, %res1
200 %res.2 = fadd <2 x double> %res2, %res3
201 %res = fadd <2 x double> %res.1, %res.2
202 ret <2 x double> %res
205 define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
206 ; CHECK: vcvtsd2si {{.*}}encoding: [0x62
207 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
210 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
212 define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
213 ; CHECK: vcvtsi2sdq {{.*}}encoding: [0x62
214 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
215 ret <2 x double> %res
217 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
219 define i64 @test_x86_avx512_cvttsd2si64(<2 x double> %a0) {
220 ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
221 ; CHECK: vcvttsd2si {sae}{{.*}}encoding: [0x62
222 %res0 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 4) ;
223 %res1 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 8) ;
224 %res2 = add i64 %res0, %res1
227 declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32) nounwind readnone
229 define i32 @test_x86_avx512_cvttsd2usi(<2 x double> %a0) {
230 ; CHECK: vcvttsd2usi {{.*}}encoding: [0x62
231 ; CHECK: vcvttsd2usi {sae}{{.*}}encoding: [0x62
232 %res0 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 4) ;
233 %res1 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 8) ;
234 %res2 = add i32 %res0, %res1
237 declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone
239 define i32 @test_x86_avx512_cvttsd2si(<2 x double> %a0) {
240 ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
241 ; CHECK: vcvttsd2si {sae}{{.*}}encoding: [0x62
242 %res0 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 4) ;
243 %res1 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 8) ;
244 %res2 = add i32 %res0, %res1
247 declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone
251 define i64 @test_x86_avx512_cvttsd2usi64(<2 x double> %a0) {
252 ; CHECK: vcvttsd2usi {{.*}}encoding: [0x62
253 ; CHECK: vcvttsd2usi {sae}{{.*}}encoding: [0x62
254 %res0 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 4) ;
255 %res1 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 8) ;
256 %res2 = add i64 %res0, %res1
259 declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) nounwind readnone
261 define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
262 ; CHECK: vcvtss2si {{.*}}encoding: [0x62
263 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
266 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
269 define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
270 ; CHECK: vcvtsi2ssq {{.*}}encoding: [0x62
271 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
274 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
277 define i32 @test_x86_avx512_cvttss2si(<4 x float> %a0) {
278 ; CHECK: vcvttss2si {sae}{{.*}}encoding: [0x62
279 ; CHECK: vcvttss2si {{.*}}encoding: [0x62
280 %res0 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 8) ;
281 %res1 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 4) ;
282 %res2 = add i32 %res0, %res1
285 declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone
287 define i64 @test_x86_avx512_cvttss2si64(<4 x float> %a0) {
288 ; CHECK: vcvttss2si {{.*}}encoding: [0x62
289 ; CHECK: vcvttss2si {sae}{{.*}}encoding: [0x62
290 %res0 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 4) ;
291 %res1 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 8) ;
292 %res2 = add i64 %res0, %res1
295 declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32) nounwind readnone
297 define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) {
298 ; CHECK: vcvttss2usi {sae}{{.*}}encoding: [0x62
299 ; CHECK: vcvttss2usi {{.*}}encoding: [0x62
300 %res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ;
301 %res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ;
302 %res2 = add i32 %res0, %res1
305 declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone
307 define i64 @test_x86_avx512_cvttss2usi64(<4 x float> %a0) {
308 ; CHECK: vcvttss2usi {{.*}}encoding: [0x62
309 ; CHECK: vcvttss2usi {sae}{{.*}}encoding: [0x62
310 %res0 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 4) ;
311 %res1 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 8) ;
312 %res2 = add i64 %res0, %res1
315 declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone
317 define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
318 ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62
319 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
322 declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
324 define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
325 ; CHECK: test_x86_vcvtph2ps_512
326 ; CHECK: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
327 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
328 ret <16 x float> %res
331 define <16 x float> @test_x86_vcvtph2ps_512_sae(<16 x i16> %a0) {
332 ; CHECK: test_x86_vcvtph2ps_512_sae
333 ; CHECK: vcvtph2ps {sae}, %ymm0, %zmm0
334 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
335 ret <16 x float> %res
338 define <16 x float> @test_x86_vcvtph2ps_512_rrk(<16 x i16> %a0,<16 x float> %a1, i16 %mask) {
339 ; CHECK: test_x86_vcvtph2ps_512_rrk
340 ; CHECK: vcvtph2ps %ymm0, %zmm1 {%k1}
341 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> %a1, i16 %mask, i32 4)
342 ret <16 x float> %res
345 define <16 x float> @test_x86_vcvtph2ps_512_sae_rrkz(<16 x i16> %a0, i16 %mask) {
346 ; CHECK: test_x86_vcvtph2ps_512_sae_rrkz
347 ; CHECK: vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z}
348 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 8)
349 ret <16 x float> %res
352 define <16 x float> @test_x86_vcvtph2ps_512_rrkz(<16 x i16> %a0, i16 %mask) {
353 ; CHECK: test_x86_vcvtph2ps_512_rrkz
354 ; CHECK: vcvtph2ps %ymm0, %zmm0 {%k1} {z}
355 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4)
356 ret <16 x float> %res
359 declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
362 define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
363 ; CHECK: vcvtps2ph $2, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1d,0xc0,0x02]
364 %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
368 declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
370 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
371 ; CHECK: vbroadcastss
372 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
373 ret <16 x float> %res
375 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
377 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
378 ; CHECK: vbroadcastsd
379 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
380 ret <8 x double> %res
382 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
384 define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) {
385 ; CHECK: vbroadcastss
386 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1]
387 ret <16 x float> %res
389 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly
391 define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
392 ; CHECK: vbroadcastsd
393 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1]
394 ret <8 x double> %res
396 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
398 define <16 x i32>@test_int_x86_avx512_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) {
399 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_512:
401 ; CHECK-NEXT: kmovw %edi, %k1
402 ; CHECK-NEXT: vpbroadcastd %xmm0, %zmm1 {%k1}
403 ; CHECK-NEXT: vpbroadcastd %xmm0, %zmm2 {%k1} {z}
404 ; CHECK-NEXT: vpbroadcastd %xmm0, %zmm0
405 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
406 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
408 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 -1)
409 %res1 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask)
410 %res2 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
411 %res3 = add <16 x i32> %res, %res1
412 %res4 = add <16 x i32> %res2, %res3
415 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16)
417 define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
418 ; CHECK: vpbroadcastd
419 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
422 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
424 define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1, i8 %mask) {
425 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_512:
427 ; CHECK-NEXT: movzbl %dil, %eax
428 ; CHECK-NEXT: kmovw %eax, %k1
429 ; CHECK-NEXT: vpbroadcastq %xmm0, %zmm1 {%k1}
430 ; CHECK-NEXT: vpbroadcastq %xmm0, %zmm2 {%k1} {z}
431 ; CHECK-NEXT: vpbroadcastq %xmm0, %zmm0
432 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
433 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
435 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 -1)
436 %res1 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 %mask)
437 %res2 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> zeroinitializer,i8 %mask)
438 %res3 = add <8 x i64> %res, %res1
439 %res4 = add <8 x i64> %res2, %res3
442 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>, <8 x i64>, i8)
444 define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
445 ; CHECK: vpbroadcastq
446 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
449 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
451 define <16 x i32> @test_conflict_d(<16 x i32> %a) {
452 ; CHECK-LABEL: test_conflict_d:
454 ; CHECK-NEXT: vpconflictd %zmm0, %zmm0
455 ; CHECK-NEXT: retq ## encoding: [0xc3]
456 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
460 declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
462 define <8 x i64> @test_conflict_q(<8 x i64> %a) {
463 ; CHECK-LABEL: test_conflict_q:
465 ; CHECK-NEXT: vpconflictq %zmm0, %zmm0
467 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
471 declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
473 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
474 ; CHECK-LABEL: test_maskz_conflict_d:
476 ; CHECK-NEXT: kmovw %edi, %k1
477 ; CHECK-NEXT: vpconflictd %zmm0, %zmm0 {%k1} {z}
479 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
483 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
484 ; CHECK-LABEL: test_mask_conflict_q:
486 ; CHECK-NEXT: movzbl %dil, %eax
487 ; CHECK-NEXT: kmovw %eax, %k1
488 ; CHECK-NEXT: vpconflictq %zmm0, %zmm1 {%k1}
489 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
491 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
495 define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
496 ; CHECK-LABEL: test_lzcnt_d:
498 ; CHECK-NEXT: vplzcntd %zmm0, %zmm0
500 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
504 declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
506 define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
507 ; CHECK-LABEL: test_lzcnt_q:
509 ; CHECK-NEXT: vplzcntq %zmm0, %zmm0
511 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
515 declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
518 define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
519 ; CHECK-LABEL: test_mask_lzcnt_d:
521 ; CHECK-NEXT: kmovw %edi, %k1
522 ; CHECK-NEXT: vplzcntd %zmm0, %zmm1 {%k1}
523 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
525 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
529 define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
530 ; CHECK-LABEL: test_mask_lzcnt_q:
532 ; CHECK-NEXT: movzbl %dil, %eax
533 ; CHECK-NEXT: kmovw %eax, %k1
534 ; CHECK-NEXT: vplzcntq %zmm0, %zmm1 {%k1}
535 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
536 ; CHECK-NEXT: retq ## encoding: [0xc3]
537 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
541 define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
542 ; CHECK: vblendmps %zmm1, %zmm0
543 %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
544 ret <16 x float> %res
547 declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly
549 define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
550 ; CHECK: vblendmpd %zmm1, %zmm0
551 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1]
552 ret <8 x double> %res
555 define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
556 ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop
557 ; CHECK: vblendmpd (%
558 %b = load <8 x double>, <8 x double>* %ptr
559 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
560 ret <8 x double> %res
562 declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly
564 define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
566 %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1]
569 declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
571 define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
573 %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1]
576 declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
578 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
579 ;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
580 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
583 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
585 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
586 ;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
587 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
590 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
593 define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
595 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
596 <8 x double>zeroinitializer, i8 -1, i32 4)
597 ret <8 x double> %res
599 declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
600 <8 x double>, i8, i32)
602 define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
604 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
605 <8 x double>zeroinitializer, i8 -1, i32 4)
606 ret <8 x double> %res
608 declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>,
609 <8 x double>, i8, i32)
611 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
613 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_512
616 ; CHECK: vpabsd{{.*}}{%k1}
617 define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
618 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
619 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1)
620 %res2 = add <16 x i32> %res, %res1
624 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
626 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_512
629 ; CHECK: vpabsq{{.*}}{%k1}
630 define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
631 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
632 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1)
633 %res2 = add <8 x i64> %res, %res1
637 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
638 ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
639 %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
642 declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
644 define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) {
645 ; CHECK: vptestmd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
646 %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
649 declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
651 define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
652 ; CHECK: vmovups {{.*}}encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07]
653 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
657 declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
659 define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
660 ; CHECK: vmovupd {{.*}}encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07]
661 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
665 declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
667 define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
668 ; CHECK-LABEL: test_mask_store_aligned_ps:
670 ; CHECK-NEXT: kmovw %esi, %k1
671 ; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1}
673 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
677 declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
679 define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
680 ; CHECK-LABEL: test_mask_store_aligned_pd:
682 ; CHECK-NEXT: kmovw %esi, %k1
683 ; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1}
685 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
689 declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
691 define <16 x float> @test_maskz_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
692 ; CHECK-LABEL: test_maskz_load_aligned_ps:
694 ; CHECK-NEXT: kmovw %esi, %k1
695 ; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z}
697 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
698 ret <16 x float> %res
701 declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
703 define <8 x double> @test_maskz_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
704 ; CHECK-LABEL: test_maskz_load_aligned_pd:
706 ; CHECK-NEXT: kmovw %esi, %k1
707 ; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z}
709 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
710 ret <8 x double> %res
713 declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
715 define <16 x float> @test_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
716 ; CHECK-LABEL: test_load_aligned_ps:
718 ; CHECK-NEXT: vmovaps (%rdi), %zmm0
720 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
721 ret <16 x float> %res
724 define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
725 ; CHECK-LABEL: test_load_aligned_pd:
727 ; CHECK-NEXT: vmovapd (%rdi), %zmm0
729 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
730 ret <8 x double> %res
733 declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
735 define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
736 ; CHECK-LABEL: test_valign_q:
737 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
738 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> zeroinitializer, i8 -1)
742 define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
743 ; CHECK-LABEL: test_mask_valign_q:
744 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
745 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask)
749 declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
751 define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
752 ; CHECK-LABEL: test_maskz_valign_d:
753 ; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
754 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i32 5, <16 x i32> zeroinitializer, i16 %mask)
758 declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
760 define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
761 ; CHECK-LABEL: test_mask_store_ss
762 ; CHECK: vmovss %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x11,0x07]
763 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
767 declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
769 define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
770 ; CHECK-LABEL: test_pcmpeq_d
771 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
772 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
776 define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
777 ; CHECK-LABEL: test_mask_pcmpeq_d
778 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
779 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
783 declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
785 define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
786 ; CHECK-LABEL: test_pcmpeq_q
787 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
788 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
792 define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
793 ; CHECK-LABEL: test_mask_pcmpeq_q
794 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
795 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
799 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
801 define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
802 ; CHECK-LABEL: test_pcmpgt_d
803 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 ##
804 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
808 define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
809 ; CHECK-LABEL: test_mask_pcmpgt_d
810 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ##
811 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
815 declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
817 define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
818 ; CHECK-LABEL: test_pcmpgt_q
819 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 ##
820 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
824 define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
825 ; CHECK-LABEL: test_mask_pcmpgt_q
826 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ##
827 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
831 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
833 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
834 ; CHECK_LABEL: test_cmp_d_512
835 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
836 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
837 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
838 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 ##
839 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
840 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
841 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 ##
842 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
843 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
844 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 ##
845 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
846 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
847 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 ##
848 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
849 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
850 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 ##
851 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
852 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
853 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 ##
854 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
855 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
856 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 ##
857 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
858 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
862 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
863 ; CHECK_LABEL: test_mask_cmp_d_512
864 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
865 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
866 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
867 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 {%k1} ##
868 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
869 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
870 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 {%k1} ##
871 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
872 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
873 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} ##
874 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
875 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
876 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} ##
877 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
878 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
879 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ##
880 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
881 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
882 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 {%k1} ##
883 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
884 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
885 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 {%k1} ##
886 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
887 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
891 declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
893 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
894 ; CHECK_LABEL: test_ucmp_d_512
895 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 ##
896 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
897 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
898 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 ##
899 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
900 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
901 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 ##
902 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
903 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
904 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 ##
905 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
906 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
907 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 ##
908 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
909 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
910 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 ##
911 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
912 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
913 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 ##
914 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
915 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
916 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 ##
917 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
918 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
922 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
923 ; CHECK_LABEL: test_mask_ucmp_d_512
924 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 {%k1} ##
925 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
926 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
927 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ##
928 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
929 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
930 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 {%k1} ##
931 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
932 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
933 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} ##
934 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
935 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
936 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} ##
937 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
938 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
939 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} ##
940 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
941 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
942 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} ##
943 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
944 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
945 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 {%k1} ##
946 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
947 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
951 declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
953 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
954 ; CHECK_LABEL: test_cmp_q_512
955 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
956 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
957 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
958 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 ##
959 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
960 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
961 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 ##
962 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
963 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
964 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 ##
965 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
966 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
967 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 ##
968 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
969 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
970 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 ##
971 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
972 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
973 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 ##
974 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
975 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
976 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 ##
977 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
978 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
982 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
983 ; CHECK_LABEL: test_mask_cmp_q_512
984 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
985 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
986 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
987 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 {%k1} ##
988 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
989 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
990 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ##
991 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
992 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
993 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} ##
994 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
995 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
996 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ##
997 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
998 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
999 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ##
1000 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
1001 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1002 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} ##
1003 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
1004 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1005 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 {%k1} ##
1006 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
1007 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1011 declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
1013 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
1014 ; CHECK_LABEL: test_ucmp_q_512
1015 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 ##
1016 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
1017 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
1018 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 ##
1019 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
1020 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
1021 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 ##
1022 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
1023 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
1024 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 ##
1025 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
1026 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
1027 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 ##
1028 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
1029 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
1030 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 ##
1031 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
1032 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1033 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 ##
1034 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
1035 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1036 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 ##
1037 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
1038 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1042 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1043 ; CHECK_LABEL: test_mask_ucmp_q_512
1044 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 {%k1} ##
1045 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
1046 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
1047 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ##
1048 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
1049 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
1050 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ##
1051 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
1052 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
1053 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} ##
1054 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
1055 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
1056 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} ##
1057 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
1058 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
1059 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ##
1060 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
1061 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1062 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ##
1063 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
1064 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1065 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 {%k1} ##
1066 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
1067 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1071 declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
1073 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
1074 ; CHECK-LABEL: test_mask_vextractf32x4:
1075 ; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
1076 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask)
1077 ret <4 x float> %res
1080 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i32, <4 x float>, i8)
1082 define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
1083 ; CHECK-LABEL: test_mask_vextracti64x4:
1084 ; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
1085 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 2, <4 x i64> %b, i8 %mask)
1089 declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i64>, i8)
1091 define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
1092 ; CHECK-LABEL: test_maskz_vextracti32x4:
1093 ; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
1094 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask)
1098 declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i32, <4 x i32>, i8)
1100 define <4 x double> @test_vextractf64x4(<8 x double> %a) {
1101 ; CHECK-LABEL: test_vextractf64x4:
1102 ; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ##
1103 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i32 2, <4 x double> zeroinitializer, i8 -1)
1104 ret <4 x double> %res
1107 declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i32, <4 x double>, i8)
1109 define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
1110 ; CHECK-LABEL: test_x86_avx512_pslli_d
1112 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1116 define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1117 ; CHECK-LABEL: test_x86_avx512_mask_pslli_d
1118 ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1}
1119 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1123 define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
1124 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d
1125 ; CHECK: vpslld $7, %zmm0, %zmm0 {%k1} {z}
1126 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1130 declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1132 define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
1133 ; CHECK-LABEL: test_x86_avx512_pslli_q
1135 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1139 define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1140 ; CHECK-LABEL: test_x86_avx512_mask_pslli_q
1141 ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1}
1142 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1146 define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
1147 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q
1148 ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
1149 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1153 declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1155 define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
1156 ; CHECK-LABEL: test_x86_avx512_psrli_d
1158 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1162 define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1163 ; CHECK-LABEL: test_x86_avx512_mask_psrli_d
1164 ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1}
1165 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1169 define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
1170 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d
1171 ; CHECK: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
1172 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1176 declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1178 define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
1179 ; CHECK-LABEL: test_x86_avx512_psrli_q
1181 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1185 define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1186 ; CHECK-LABEL: test_x86_avx512_mask_psrli_q
1187 ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1}
1188 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1192 define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
1193 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q
1194 ; CHECK: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
1195 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1199 declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1201 define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
1202 ; CHECK-LABEL: test_x86_avx512_psrai_d
1204 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1208 define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1209 ; CHECK-LABEL: test_x86_avx512_mask_psrai_d
1210 ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1}
1211 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1215 define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
1216 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d
1217 ; CHECK: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
1218 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1222 declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1224 define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
1225 ; CHECK-LABEL: test_x86_avx512_psrai_q
1227 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1231 define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1232 ; CHECK-LABEL: test_x86_avx512_mask_psrai_q
1233 ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1}
1234 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1238 define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
1239 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q
1240 ; CHECK: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
1241 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1245 declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1247 define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
1248 ; CHECK-LABEL: test_x86_avx512_psll_d
1250 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1254 define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1255 ; CHECK-LABEL: test_x86_avx512_mask_psll_d
1256 ; CHECK: vpslld %xmm1, %zmm0, %zmm2 {%k1}
1257 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1261 define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1262 ; CHECK-LABEL: test_x86_avx512_maskz_psll_d
1263 ; CHECK: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
1264 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1268 declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1270 define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
1271 ; CHECK-LABEL: test_x86_avx512_psll_q
1273 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1277 define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1278 ; CHECK-LABEL: test_x86_avx512_mask_psll_q
1279 ; CHECK: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
1280 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1284 define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1285 ; CHECK-LABEL: test_x86_avx512_maskz_psll_q
1286 ; CHECK: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
1287 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1291 declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1293 define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
1294 ; CHECK-LABEL: test_x86_avx512_psrl_d
1296 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1300 define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1301 ; CHECK-LABEL: test_x86_avx512_mask_psrl_d
1302 ; CHECK: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
1303 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1307 define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1308 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d
1309 ; CHECK: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
1310 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1314 declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1316 define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
1317 ; CHECK-LABEL: test_x86_avx512_psrl_q
1319 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1323 define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1324 ; CHECK-LABEL: test_x86_avx512_mask_psrl_q
1325 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
1326 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1330 define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1331 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q
1332 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
1333 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1337 declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1339 define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
1340 ; CHECK-LABEL: test_x86_avx512_psra_d
1342 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1346 define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1347 ; CHECK-LABEL: test_x86_avx512_mask_psra_d
1348 ; CHECK: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
1349 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1353 define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1354 ; CHECK-LABEL: test_x86_avx512_maskz_psra_d
1355 ; CHECK: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
1356 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1360 declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1362 define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
1363 ; CHECK-LABEL: test_x86_avx512_psra_q
1365 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1369 define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1370 ; CHECK-LABEL: test_x86_avx512_mask_psra_q
1371 ; CHECK: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
1372 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1376 define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1377 ; CHECK-LABEL: test_x86_avx512_maskz_psra_q
1378 ; CHECK: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
1379 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1383 declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1385 define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
1386 ; CHECK-LABEL: test_x86_avx512_psllv_d
1388 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1392 define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1393 ; CHECK-LABEL: test_x86_avx512_mask_psllv_d
1394 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
1395 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1399 define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1400 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d
1401 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1402 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1406 declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1408 define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
1409 ; CHECK-LABEL: test_x86_avx512_psllv_q
1411 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1415 define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1416 ; CHECK-LABEL: test_x86_avx512_mask_psllv_q
1417 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
1418 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1422 define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1423 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q
1424 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1425 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1429 declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1432 define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
1433 ; CHECK-LABEL: test_x86_avx512_psrav_d
1435 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1439 define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1440 ; CHECK-LABEL: test_x86_avx512_mask_psrav_d
1441 ; CHECK: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
1442 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1446 define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1447 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d
1448 ; CHECK: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z}
1449 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1453 declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1455 define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
1456 ; CHECK-LABEL: test_x86_avx512_psrav_q
1458 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1462 define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1463 ; CHECK-LABEL: test_x86_avx512_mask_psrav_q
1464 ; CHECK: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
1465 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1469 define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1470 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q
1471 ; CHECK: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z}
1472 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1476 declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1478 define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
1479 ; CHECK-LABEL: test_x86_avx512_psrlv_d
1481 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1485 define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1486 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d
1487 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
1488 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1492 define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1493 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d
1494 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1495 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1499 declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1501 define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
1502 ; CHECK-LABEL: test_x86_avx512_psrlv_q
1504 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1508 define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1509 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q
1510 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
1511 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1515 define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1516 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q
1517 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1518 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1522 declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1524 define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
1525 ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop
1527 %b = load <8 x i64>, <8 x i64>* %ptr
1528 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1532 declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1533 declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1534 declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
1536 define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
1537 ; CHECK-LABEL: test_vsubps_rn
1538 ; CHECK: vsubps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
1539 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1540 <16 x float> zeroinitializer, i16 -1, i32 0)
1541 ret <16 x float> %res
1544 define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
1545 ; CHECK-LABEL: test_vsubps_rd
1546 ; CHECK: vsubps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
1547 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1548 <16 x float> zeroinitializer, i16 -1, i32 1)
1549 ret <16 x float> %res
1552 define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
1553 ; CHECK-LABEL: test_vsubps_ru
1554 ; CHECK: vsubps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
1555 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1556 <16 x float> zeroinitializer, i16 -1, i32 2)
1557 ret <16 x float> %res
1560 define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
1561 ; CHECK-LABEL: test_vsubps_rz
1562 ; CHECK: vsubps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
1563 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1564 <16 x float> zeroinitializer, i16 -1, i32 3)
1565 ret <16 x float> %res
1568 define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
1569 ; CHECK-LABEL: test_vmulps_rn
1570 ; CHECK: vmulps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1]
1571 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1572 <16 x float> zeroinitializer, i16 -1, i32 0)
1573 ret <16 x float> %res
1576 define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
1577 ; CHECK-LABEL: test_vmulps_rd
1578 ; CHECK: vmulps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1]
1579 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1580 <16 x float> zeroinitializer, i16 -1, i32 1)
1581 ret <16 x float> %res
1584 define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
1585 ; CHECK-LABEL: test_vmulps_ru
1586 ; CHECK: vmulps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1]
1587 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1588 <16 x float> zeroinitializer, i16 -1, i32 2)
1589 ret <16 x float> %res
1592 define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
1593 ; CHECK-LABEL: test_vmulps_rz
1594 ; CHECK: vmulps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1]
1595 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1596 <16 x float> zeroinitializer, i16 -1, i32 3)
1597 ret <16 x float> %res
1601 define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1602 ; CHECK-LABEL: test_vmulps_mask_rn
1603 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
1604 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1605 <16 x float> zeroinitializer, i16 %mask, i32 0)
1606 ret <16 x float> %res
1609 define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1610 ; CHECK-LABEL: test_vmulps_mask_rd
1611 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
1612 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1613 <16 x float> zeroinitializer, i16 %mask, i32 1)
1614 ret <16 x float> %res
1617 define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1618 ; CHECK-LABEL: test_vmulps_mask_ru
1619 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
1620 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1621 <16 x float> zeroinitializer, i16 %mask, i32 2)
1622 ret <16 x float> %res
1625 define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1626 ; CHECK-LABEL: test_vmulps_mask_rz
1627 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
1628 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1629 <16 x float> zeroinitializer, i16 %mask, i32 3)
1630 ret <16 x float> %res
1633 ;; With Passthru value
1634 define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1635 ; CHECK-LABEL: test_vmulps_mask_passthru_rn
1636 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
1637 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1638 <16 x float> %passthru, i16 %mask, i32 0)
1639 ret <16 x float> %res
1642 define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1643 ; CHECK-LABEL: test_vmulps_mask_passthru_rd
1644 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
1645 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1646 <16 x float> %passthru, i16 %mask, i32 1)
1647 ret <16 x float> %res
1650 define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1651 ; CHECK-LABEL: test_vmulps_mask_passthru_ru
1652 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
1653 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1654 <16 x float> %passthru, i16 %mask, i32 2)
1655 ret <16 x float> %res
1658 define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1659 ; CHECK-LABEL: test_vmulps_mask_passthru_rz
1660 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
1661 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1662 <16 x float> %passthru, i16 %mask, i32 3)
1663 ret <16 x float> %res
1667 define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1668 ; CHECK-LABEL: test_vmulpd_mask_rn
1669 ; CHECK: vmulpd {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
1670 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1671 <8 x double> zeroinitializer, i8 %mask, i32 0)
1672 ret <8 x double> %res
1675 define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1676 ; CHECK-LABEL: test_vmulpd_mask_rd
1677 ; CHECK: vmulpd {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
1678 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1679 <8 x double> zeroinitializer, i8 %mask, i32 1)
1680 ret <8 x double> %res
1683 define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1684 ; CHECK-LABEL: test_vmulpd_mask_ru
1685 ; CHECK: vmulpd {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
1686 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1687 <8 x double> zeroinitializer, i8 %mask, i32 2)
1688 ret <8 x double> %res
1691 define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1692 ; CHECK-LABEL: test_vmulpd_mask_rz
1693 ; CHECK: vmulpd {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
1694 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1695 <8 x double> zeroinitializer, i8 %mask, i32 3)
1696 ret <8 x double> %res
1699 define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
1700 ;CHECK-LABEL: test_xor_epi32
1701 ;CHECK: vpxord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xef,0xc1]
1702 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1703 ret < 16 x i32> %res
1706 define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1707 ;CHECK-LABEL: test_mask_xor_epi32
1708 ;CHECK: vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1]
1709 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1710 ret < 16 x i32> %res
1713 declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1715 define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
1716 ;CHECK-LABEL: test_or_epi32
1717 ;CHECK: vpord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xeb,0xc1]
1718 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1719 ret < 16 x i32> %res
1722 define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1723 ;CHECK-LABEL: test_mask_or_epi32
1724 ;CHECK: vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1]
1725 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1726 ret < 16 x i32> %res
1729 declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1731 define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
1732 ;CHECK-LABEL: test_and_epi32
1733 ;CHECK: vpandd {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xdb,0xc1]
1734 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1735 ret < 16 x i32> %res
1738 define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1739 ;CHECK-LABEL: test_mask_and_epi32
1740 ;CHECK: vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1]
1741 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1742 ret < 16 x i32> %res
1745 declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1747 define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
1748 ;CHECK-LABEL: test_xor_epi64
1749 ;CHECK: vpxorq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1]
1750 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1754 define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1755 ;CHECK-LABEL: test_mask_xor_epi64
1756 ;CHECK: vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1]
1757 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1761 declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1763 define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
1764 ;CHECK-LABEL: test_or_epi64
1765 ;CHECK: vporq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1]
1766 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1770 define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1771 ;CHECK-LABEL: test_mask_or_epi64
1772 ;CHECK: vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1]
1773 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1777 declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1779 define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
1780 ;CHECK-LABEL: test_and_epi64
1781 ;CHECK: vpandq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1]
1782 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1786 define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1787 ;CHECK-LABEL: test_mask_and_epi64
1788 ;CHECK: vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1]
1789 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1793 declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1796 define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1797 ;CHECK-LABEL: test_mask_add_epi32_rr
1798 ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
1799 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1800 ret < 16 x i32> %res
1803 define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1804 ;CHECK-LABEL: test_mask_add_epi32_rrk
1805 ;CHECK: vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
1806 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1807 ret < 16 x i32> %res
1810 define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1811 ;CHECK-LABEL: test_mask_add_epi32_rrkz
1812 ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
1813 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1814 ret < 16 x i32> %res
1817 define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1818 ;CHECK-LABEL: test_mask_add_epi32_rm
1819 ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x07]
1820 %b = load <16 x i32>, <16 x i32>* %ptr_b
1821 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1822 ret < 16 x i32> %res
1825 define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1826 ;CHECK-LABEL: test_mask_add_epi32_rmk
1827 ;CHECK: vpaddd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x0f]
1828 %b = load <16 x i32>, <16 x i32>* %ptr_b
1829 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1830 ret < 16 x i32> %res
1833 define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
1834 ;CHECK-LABEL: test_mask_add_epi32_rmkz
1835 ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x07]
1836 %b = load <16 x i32>, <16 x i32>* %ptr_b
1837 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1838 ret < 16 x i32> %res
1841 define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
1842 ;CHECK-LABEL: test_mask_add_epi32_rmb
1843 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x07]
1844 %q = load i32, i32* %ptr_b
1845 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1846 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1847 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1848 ret < 16 x i32> %res
1851 define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1852 ;CHECK-LABEL: test_mask_add_epi32_rmbk
1853 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x0f]
1854 %q = load i32, i32* %ptr_b
1855 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1856 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1857 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1858 ret < 16 x i32> %res
1861 define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
1862 ;CHECK-LABEL: test_mask_add_epi32_rmbkz
1863 ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x07]
1864 %q = load i32, i32* %ptr_b
1865 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1866 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1867 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1868 ret < 16 x i32> %res
1871 declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1873 define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1874 ;CHECK-LABEL: test_mask_sub_epi32_rr
1875 ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc1]
1876 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1877 ret < 16 x i32> %res
1880 define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1881 ;CHECK-LABEL: test_mask_sub_epi32_rrk
1882 ;CHECK: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
1883 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1884 ret < 16 x i32> %res
1887 define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1888 ;CHECK-LABEL: test_mask_sub_epi32_rrkz
1889 ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
1890 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1891 ret < 16 x i32> %res
1894 define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1895 ;CHECK-LABEL: test_mask_sub_epi32_rm
1896 ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x07]
1897 %b = load <16 x i32>, <16 x i32>* %ptr_b
1898 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1899 ret < 16 x i32> %res
1902 define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1903 ;CHECK-LABEL: test_mask_sub_epi32_rmk
1904 ;CHECK: vpsubd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x0f]
1905 %b = load <16 x i32>, <16 x i32>* %ptr_b
1906 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1907 ret < 16 x i32> %res
1910 define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
1911 ;CHECK-LABEL: test_mask_sub_epi32_rmkz
1912 ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x07]
1913 %b = load <16 x i32>, <16 x i32>* %ptr_b
1914 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1915 ret < 16 x i32> %res
1918 define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
1919 ;CHECK-LABEL: test_mask_sub_epi32_rmb
1920 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x07]
1921 %q = load i32, i32* %ptr_b
1922 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1923 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1924 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
1925 ret < 16 x i32> %res
1928 define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
1929 ;CHECK-LABEL: test_mask_sub_epi32_rmbk
1930 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x0f]
1931 %q = load i32, i32* %ptr_b
1932 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1933 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1934 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1935 ret < 16 x i32> %res
1938 define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
1939 ;CHECK-LABEL: test_mask_sub_epi32_rmbkz
1940 ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x07]
1941 %q = load i32, i32* %ptr_b
1942 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1943 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1944 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
1945 ret < 16 x i32> %res
1948 declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1950 define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
1951 ;CHECK-LABEL: test_mask_add_epi64_rr
1952 ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
1953 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1957 define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1958 ;CHECK-LABEL: test_mask_add_epi64_rrk
1959 ;CHECK: vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
1960 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1964 define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1965 ;CHECK-LABEL: test_mask_add_epi64_rrkz
1966 ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
1967 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1971 define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
1972 ;CHECK-LABEL: test_mask_add_epi64_rm
1973 ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x07]
1974 %b = load <8 x i64>, <8 x i64>* %ptr_b
1975 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1979 define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1980 ;CHECK-LABEL: test_mask_add_epi64_rmk
1981 ;CHECK: vpaddq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x0f]
1982 %b = load <8 x i64>, <8 x i64>* %ptr_b
1983 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1987 define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
1988 ;CHECK-LABEL: test_mask_add_epi64_rmkz
1989 ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x07]
1990 %b = load <8 x i64>, <8 x i64>* %ptr_b
1991 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
1995 define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
1996 ;CHECK-LABEL: test_mask_add_epi64_rmb
1997 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x07]
1998 %q = load i64, i64* %ptr_b
1999 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2000 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2001 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2005 define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2006 ;CHECK-LABEL: test_mask_add_epi64_rmbk
2007 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x0f]
2008 %q = load i64, i64* %ptr_b
2009 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2010 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2011 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2015 define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
2016 ;CHECK-LABEL: test_mask_add_epi64_rmbkz
2017 ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x07]
2018 %q = load i64, i64* %ptr_b
2019 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2020 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2021 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2025 declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2027 define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
2028 ;CHECK-LABEL: test_mask_sub_epi64_rr
2029 ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1]
2030 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2034 define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2035 ;CHECK-LABEL: test_mask_sub_epi64_rrk
2036 ;CHECK: vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
2037 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2041 define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
2042 ;CHECK-LABEL: test_mask_sub_epi64_rrkz
2043 ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
2044 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2048 define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
2049 ;CHECK-LABEL: test_mask_sub_epi64_rm
2050 ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x07]
2051 %b = load <8 x i64>, <8 x i64>* %ptr_b
2052 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2056 define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2057 ;CHECK-LABEL: test_mask_sub_epi64_rmk
2058 ;CHECK: vpsubq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x0f]
2059 %b = load <8 x i64>, <8 x i64>* %ptr_b
2060 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2064 define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
2065 ;CHECK-LABEL: test_mask_sub_epi64_rmkz
2066 ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x07]
2067 %b = load <8 x i64>, <8 x i64>* %ptr_b
2068 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2072 define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
2073 ;CHECK-LABEL: test_mask_sub_epi64_rmb
2074 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x07]
2075 %q = load i64, i64* %ptr_b
2076 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2077 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2078 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2082 define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2083 ;CHECK-LABEL: test_mask_sub_epi64_rmbk
2084 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x0f]
2085 %q = load i64, i64* %ptr_b
2086 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2087 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2088 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2092 define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
2093 ;CHECK-LABEL: test_mask_sub_epi64_rmbkz
2094 ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x07]
2095 %q = load i64, i64* %ptr_b
2096 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2097 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2098 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2102 declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2104 define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
2105 ;CHECK-LABEL: test_mask_mul_epi32_rr
2106 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
2107 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2111 define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
2112 ;CHECK-LABEL: test_mask_mul_epi32_rrk
2113 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
2114 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2118 define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
2119 ;CHECK-LABEL: test_mask_mul_epi32_rrkz
2120 ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
2121 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2125 define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2126 ;CHECK-LABEL: test_mask_mul_epi32_rm
2127 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
2128 %b = load <16 x i32>, <16 x i32>* %ptr_b
2129 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2133 define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2134 ;CHECK-LABEL: test_mask_mul_epi32_rmk
2135 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
2136 %b = load <16 x i32>, <16 x i32>* %ptr_b
2137 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2141 define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
2142 ;CHECK-LABEL: test_mask_mul_epi32_rmkz
2143 ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
2144 %b = load <16 x i32>, <16 x i32>* %ptr_b
2145 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2149 define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
2150 ;CHECK-LABEL: test_mask_mul_epi32_rmb
2151 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
2152 %q = load i64, i64* %ptr_b
2153 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2154 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2155 %b = bitcast <8 x i64> %b64 to <16 x i32>
2156 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2160 define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2161 ;CHECK-LABEL: test_mask_mul_epi32_rmbk
2162 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
2163 %q = load i64, i64* %ptr_b
2164 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2165 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2166 %b = bitcast <8 x i64> %b64 to <16 x i32>
2167 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2171 define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
2172 ;CHECK-LABEL: test_mask_mul_epi32_rmbkz
2173 ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
2174 %q = load i64, i64* %ptr_b
2175 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2176 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2177 %b = bitcast <8 x i64> %b64 to <16 x i32>
2178 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2182 declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
2184 define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
2185 ;CHECK-LABEL: test_mask_mul_epu32_rr
2186 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
2187 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2191 define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
2192 ;CHECK-LABEL: test_mask_mul_epu32_rrk
2193 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
2194 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2198 define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
2199 ;CHECK-LABEL: test_mask_mul_epu32_rrkz
2200 ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
2201 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2205 define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2206 ;CHECK-LABEL: test_mask_mul_epu32_rm
2207 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
2208 %b = load <16 x i32>, <16 x i32>* %ptr_b
2209 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2213 define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2214 ;CHECK-LABEL: test_mask_mul_epu32_rmk
2215 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
2216 %b = load <16 x i32>, <16 x i32>* %ptr_b
2217 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2221 define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
2222 ;CHECK-LABEL: test_mask_mul_epu32_rmkz
2223 ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
2224 %b = load <16 x i32>, <16 x i32>* %ptr_b
2225 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2229 define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
2230 ;CHECK-LABEL: test_mask_mul_epu32_rmb
2231 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
2232 %q = load i64, i64* %ptr_b
2233 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2234 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2235 %b = bitcast <8 x i64> %b64 to <16 x i32>
2236 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
2240 define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2241 ;CHECK-LABEL: test_mask_mul_epu32_rmbk
2242 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
2243 %q = load i64, i64* %ptr_b
2244 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2245 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2246 %b = bitcast <8 x i64> %b64 to <16 x i32>
2247 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
2251 define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
2252 ;CHECK-LABEL: test_mask_mul_epu32_rmbkz
2253 ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
2254 %q = load i64, i64* %ptr_b
2255 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2256 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2257 %b = bitcast <8 x i64> %b64 to <16 x i32>
2258 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
2262 declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
2264 define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
2265 ;CHECK-LABEL: test_mask_mullo_epi32_rr_512
2266 ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0xc1]
2267 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2271 define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2272 ;CHECK-LABEL: test_mask_mullo_epi32_rrk_512
2273 ;CHECK: vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
2274 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2275 ret < 16 x i32> %res
2278 define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2279 ;CHECK-LABEL: test_mask_mullo_epi32_rrkz_512
2280 ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
2281 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2282 ret < 16 x i32> %res
2285 define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
2286 ;CHECK-LABEL: test_mask_mullo_epi32_rm_512
2287 ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x07]
2288 %b = load <16 x i32>, <16 x i32>* %ptr_b
2289 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2290 ret < 16 x i32> %res
2293 define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2294 ;CHECK-LABEL: test_mask_mullo_epi32_rmk_512
2295 ;CHECK: vpmulld (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x0f]
2296 %b = load <16 x i32>, <16 x i32>* %ptr_b
2297 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2298 ret < 16 x i32> %res
2301 define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2302 ;CHECK-LABEL: test_mask_mullo_epi32_rmkz_512
2303 ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x07]
2304 %b = load <16 x i32>, <16 x i32>* %ptr_b
2305 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2306 ret < 16 x i32> %res
2309 define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
2310 ;CHECK-LABEL: test_mask_mullo_epi32_rmb_512
2311 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x07]
2312 %q = load i32, i32* %ptr_b
2313 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2314 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2315 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2316 ret < 16 x i32> %res
2319 define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2320 ;CHECK-LABEL: test_mask_mullo_epi32_rmbk_512
2321 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x0f]
2322 %q = load i32, i32* %ptr_b
2323 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2324 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2325 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2326 ret < 16 x i32> %res
2329 define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2330 ;CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512
2331 ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x07]
2332 %q = load i32, i32* %ptr_b
2333 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2334 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2335 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2336 ret < 16 x i32> %res
2339 declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2341 define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2342 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae
2343 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2344 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
2345 ret <16 x float> %res
2347 define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2348 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae
2349 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
2350 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
2351 ret <16 x float> %res
2353 define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2354 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae
2355 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2356 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
2357 ret <16 x float> %res
2360 define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2361 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae
2362 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2363 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
2364 ret <16 x float> %res
2368 define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2369 ;CHECK-LABEL: test_mm512_maskz_add_round_ps_current
2370 ;CHECK: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z}
2371 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2372 ret <16 x float> %res
2375 define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2376 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae
2377 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2378 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2379 ret <16 x float> %res
2381 define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2382 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae
2383 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2384 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2385 ret <16 x float> %res
2387 define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2388 ;CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae
2389 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2390 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2391 ret <16 x float> %res
2394 define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2395 ;CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae
2396 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2397 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2398 ret <16 x float> %res
2402 define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2403 ;CHECK-LABEL: test_mm512_mask_add_round_ps_current
2404 ;CHECK: vaddps %zmm1, %zmm0, %zmm2 {%k1}
2405 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2406 ret <16 x float> %res
2410 define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2411 ;CHECK-LABEL: test_mm512_add_round_ps_rn_sae
2412 ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
2413 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2414 ret <16 x float> %res
2416 define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2417 ;CHECK-LABEL: test_mm512_add_round_ps_rd_sae
2418 ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
2419 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2420 ret <16 x float> %res
2422 define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2423 ;CHECK-LABEL: test_mm512_add_round_ps_ru_sae
2424 ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
2425 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2426 ret <16 x float> %res
2429 define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2430 ;CHECK-LABEL: test_mm512_add_round_ps_rz_sae
2431 ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
2432 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2433 ret <16 x float> %res
2436 define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2437 ;CHECK-LABEL: test_mm512_add_round_ps_current
2438 ;CHECK: vaddps %zmm1, %zmm0, %zmm0
2439 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2440 ret <16 x float> %res
2442 declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2444 define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2445 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae
2446 ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2447 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2448 ret <16 x float> %res
2450 define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2451 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae
2452 ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2453 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2454 ret <16 x float> %res
2456 define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2457 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae
2458 ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2459 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2460 ret <16 x float> %res
2463 define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2464 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae
2465 ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2466 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2467 ret <16 x float> %res
2471 define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2472 ;CHECK-LABEL: test_mm512_mask_sub_round_ps_current
2473 ;CHECK: vsubps %zmm1, %zmm0, %zmm2 {%k1}
2474 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2475 ret <16 x float> %res
2478 define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2479 ;CHECK-LABEL: test_mm512_sub_round_ps_rn_sae
2480 ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
2481 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2482 ret <16 x float> %res
2484 define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2485 ;CHECK-LABEL: test_mm512_sub_round_ps_rd_sae
2486 ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
2487 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2488 ret <16 x float> %res
2490 define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2491 ;CHECK-LABEL: test_mm512_sub_round_ps_ru_sae
2492 ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
2493 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2494 ret <16 x float> %res
2497 define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2498 ;CHECK-LABEL: test_mm512_sub_round_ps_rz_sae
2499 ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
2500 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2501 ret <16 x float> %res
2504 define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2505 ;CHECK-LABEL: test_mm512_sub_round_ps_current
2506 ;CHECK: vsubps %zmm1, %zmm0, %zmm0
2507 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2508 ret <16 x float> %res
2511 define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2512 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae
2513 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2514 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
2515 ret <16 x float> %res
2517 define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2518 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae
2519 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
2520 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
2521 ret <16 x float> %res
2523 define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2524 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae
2525 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2526 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
2527 ret <16 x float> %res
2530 define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2531 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae
2532 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2533 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
2534 ret <16 x float> %res
2538 define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2539 ;CHECK-LABEL: test_mm512_maskz_div_round_ps_current
2540 ;CHECK: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z}
2541 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2542 ret <16 x float> %res
2545 define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2546 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae
2547 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2548 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2549 ret <16 x float> %res
2551 define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2552 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae
2553 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2554 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2555 ret <16 x float> %res
2557 define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2558 ;CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae
2559 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2560 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2561 ret <16 x float> %res
2564 define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2565 ;CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae
2566 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2567 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2568 ret <16 x float> %res
2572 define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2573 ;CHECK-LABEL: test_mm512_mask_div_round_ps_current
2574 ;CHECK: vdivps %zmm1, %zmm0, %zmm2 {%k1}
2575 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2576 ret <16 x float> %res
2580 define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2581 ;CHECK-LABEL: test_mm512_div_round_ps_rn_sae
2582 ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
2583 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2584 ret <16 x float> %res
2586 define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2587 ;CHECK-LABEL: test_mm512_div_round_ps_rd_sae
2588 ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
2589 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2590 ret <16 x float> %res
2592 define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2593 ;CHECK-LABEL: test_mm512_div_round_ps_ru_sae
2594 ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
2595 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2596 ret <16 x float> %res
2599 define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2600 ;CHECK-LABEL: test_mm512_div_round_ps_rz_sae
2601 ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
2602 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2603 ret <16 x float> %res
2606 define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2607 ;CHECK-LABEL: test_mm512_div_round_ps_current
2608 ;CHECK: vdivps %zmm1, %zmm0, %zmm0
2609 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2610 ret <16 x float> %res
2612 declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2614 define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2615 ;CHECK-LABEL: test_mm512_maskz_min_round_ps_sae
2616 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2617 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
2618 ret <16 x float> %res
2621 define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2622 ;CHECK-LABEL: test_mm512_maskz_min_round_ps_current
2623 ;CHECK: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
2624 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2625 ret <16 x float> %res
2628 define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2629 ;CHECK-LABEL: test_mm512_mask_min_round_ps_sae
2630 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
2631 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
2632 ret <16 x float> %res
2635 define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2636 ;CHECK-LABEL: test_mm512_mask_min_round_ps_current
2637 ;CHECK: vminps %zmm1, %zmm0, %zmm2 {%k1}
2638 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2639 ret <16 x float> %res
2642 define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2643 ;CHECK-LABEL: test_mm512_min_round_ps_sae
2644 ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0
2645 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
2646 ret <16 x float> %res
2649 define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2650 ;CHECK-LABEL: test_mm512_min_round_ps_current
2651 ;CHECK: vminps %zmm1, %zmm0, %zmm0
2652 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2653 ret <16 x float> %res
2655 declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2657 define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2658 ;CHECK-LABEL: test_mm512_maskz_max_round_ps_sae
2659 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2660 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
2661 ret <16 x float> %res
2664 define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2665 ;CHECK-LABEL: test_mm512_maskz_max_round_ps_current
2666 ;CHECK: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
2667 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2668 ret <16 x float> %res
2671 define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2672 ;CHECK-LABEL: test_mm512_mask_max_round_ps_sae
2673 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
2674 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
2675 ret <16 x float> %res
2678 define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2679 ;CHECK-LABEL: test_mm512_mask_max_round_ps_current
2680 ;CHECK: vmaxps %zmm1, %zmm0, %zmm2 {%k1}
2681 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2682 ret <16 x float> %res
2685 define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2686 ;CHECK-LABEL: test_mm512_max_round_ps_sae
2687 ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0
2688 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
2689 ret <16 x float> %res
2692 define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2693 ;CHECK-LABEL: test_mm512_max_round_ps_current
2694 ;CHECK: vmaxps %zmm1, %zmm0, %zmm0
2695 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2696 ret <16 x float> %res
2698 declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2700 declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
2702 define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2703 ; CHECK-LABEL: test_mask_add_ss_rn
2704 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2705 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0)
2706 ret <4 x float> %res
2709 define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2710 ; CHECK-LABEL: test_mask_add_ss_rd
2711 ; CHECK: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2712 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
2713 ret <4 x float> %res
2716 define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2717 ; CHECK-LABEL: test_mask_add_ss_ru
2718 ; CHECK: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2719 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2)
2720 ret <4 x float> %res
2723 define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2724 ; CHECK-LABEL: test_mask_add_ss_rz
2725 ; CHECK: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2726 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3)
2727 ret <4 x float> %res
2730 define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2731 ; CHECK-LABEL: test_mask_add_ss_current
2732 ; CHECK: vaddss %xmm1, %xmm0, %xmm2 {%k1}
2733 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
2734 ret <4 x float> %res
2737 define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2738 ; CHECK-LABEL: test_maskz_add_ss_rn
2739 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2740 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0)
2741 ret <4 x float> %res
2744 define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) {
2745 ; CHECK-LABEL: test_add_ss_rn
2746 ; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0
2747 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0)
2748 ret <4 x float> %res
2751 declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
2753 define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2754 ; CHECK-LABEL: test_mask_add_sd_rn
2755 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2756 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0)
2757 ret <2 x double> %res
2760 define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2761 ; CHECK-LABEL: test_mask_add_sd_rd
2762 ; CHECK: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2763 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
2764 ret <2 x double> %res
2767 define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2768 ; CHECK-LABEL: test_mask_add_sd_ru
2769 ; CHECK: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2770 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2)
2771 ret <2 x double> %res
2774 define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2775 ; CHECK-LABEL: test_mask_add_sd_rz
2776 ; CHECK: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2777 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3)
2778 ret <2 x double> %res
2781 define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2782 ; CHECK-LABEL: test_mask_add_sd_current
2783 ; CHECK: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
2784 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
2785 ret <2 x double> %res
2788 define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2789 ; CHECK-LABEL: test_maskz_add_sd_rn
2790 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2791 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 0)
2792 ret <2 x double> %res
2795 define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) {
2796 ; CHECK-LABEL: test_add_sd_rn
2797 ; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0
2798 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 0)
2799 ret <2 x double> %res
2802 declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
2804 define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2805 ; CHECK-LABEL: test_mask_max_ss_sae
2806 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
2807 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
2808 ret <4 x float> %res
2811 define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2812 ; CHECK-LABEL: test_maskz_max_ss_sae
2813 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2814 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
2815 ret <4 x float> %res
2818 define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) {
2819 ; CHECK-LABEL: test_max_ss_sae
2820 ; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm0
2821 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
2822 ret <4 x float> %res
2825 define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2826 ; CHECK-LABEL: test_mask_max_ss
2827 ; CHECK: vmaxss %xmm1, %xmm0, %xmm2 {%k1}
2828 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
2829 ret <4 x float> %res
2832 define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2833 ; CHECK-LABEL: test_maskz_max_ss
2834 ; CHECK: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z}
2835 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4)
2836 ret <4 x float> %res
2839 define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) {
2840 ; CHECK-LABEL: test_max_ss
2841 ; CHECK: vmaxss %xmm1, %xmm0, %xmm0
2842 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4)
2843 ret <4 x float> %res
2845 declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
2847 define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2848 ; CHECK-LABEL: test_mask_max_sd_sae
2849 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
2850 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
2851 ret <2 x double> %res
2854 define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2855 ; CHECK-LABEL: test_maskz_max_sd_sae
2856 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2857 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
2858 ret <2 x double> %res
2861 define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) {
2862 ; CHECK-LABEL: test_max_sd_sae
2863 ; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm0
2864 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
2865 ret <2 x double> %res
2868 define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2869 ; CHECK-LABEL: test_mask_max_sd
2870 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm2 {%k1}
2871 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
2872 ret <2 x double> %res
2875 define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2876 ; CHECK-LABEL: test_maskz_max_sd
2877 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z}
2878 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4)
2879 ret <2 x double> %res
2882 define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) {
2883 ; CHECK-LABEL: test_max_sd
2884 ; CHECK: vmaxsd %xmm1, %xmm0, %xmm0
2885 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
2886 ret <2 x double> %res
2889 define <2 x double> @test_x86_avx512_cvtsi2sd32(<2 x double> %a, i32 %b) {
2890 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd32:
2892 ; CHECK-NEXT: vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0
2894 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double> %a, i32 %b, i32 3) ; <<<2 x double>> [#uses=1]
2895 ret <2 x double> %res
2897 declare <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double>, i32, i32) nounwind readnone
2899 define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) {
2900 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd64:
2902 ; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0
2904 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1]
2905 ret <2 x double> %res
2907 declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone
2909 define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) {
2910 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss32:
2912 ; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0
2914 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1]
2915 ret <4 x float> %res
2917 declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone
2919 define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) {
2920 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss64:
2922 ; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0
2924 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1]
2925 ret <4 x float> %res
2927 declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone
2929 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b)
2930 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss:
2932 ; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0
2935 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
2936 ret <4 x float> %res
2939 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32* %ptr)
2940 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss_mem:
2942 ; CHECK-NEXT: movl (%rdi), %eax
2943 ; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0
2946 %b = load i32, i32* %ptr
2947 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
2948 ret <4 x float> %res
2951 define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b)
2952 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss:
2954 ; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0
2957 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
2958 ret <4 x float> %res
2961 define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, i32* %ptr)
2962 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss_mem:
2964 ; CHECK-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0
2967 %b = load i32, i32* %ptr
2968 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
2969 ret <4 x float> %res
2971 declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone
2973 define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b)
2974 ; CHECK-LABEL: _mm_cvt_roundu64_ss:
2976 ; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0
2979 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1]
2980 ret <4 x float> %res
2983 define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b)
2984 ; CHECK-LABEL: _mm_cvtu64_ss:
2986 ; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0
2989 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1]
2990 ret <4 x float> %res
2992 declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind readnone
2994 define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b)
2995 ; CHECK-LABEL: test_x86_avx512_mm_cvtu32_sd:
2997 ; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0
3000 %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1]
3001 ret <2 x double> %res
3003 declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone
3005 define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b)
3006 ; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd:
3008 ; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0
3011 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1]
3012 ret <2 x double> %res
3015 define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b)
3016 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd:
3018 ; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0
3021 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1]
3022 ret <2 x double> %res
3024 declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone
3026 define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) {
3027 ; CHECK: vpmaxsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1]
3028 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1,
3029 <8 x i64>zeroinitializer, i8 -1)
3032 declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3034 define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) {
3035 ; CHECK: vpminud {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1]
3036 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1,
3037 <16 x i32>zeroinitializer, i16 -1)
3040 declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3042 define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
3043 ; CHECK: vpmaxsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1]
3044 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1,
3045 <16 x i32>zeroinitializer, i16 -1)
3048 declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3050 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_512
3052 ; CHECK: vpmaxsd %zmm
3054 define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3055 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3056 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3057 %res2 = add <16 x i32> %res, %res1
3058 ret <16 x i32> %res2
3061 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_512
3063 ; CHECK: vpmaxsq %zmm
3065 define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3066 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3067 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3068 %res2 = add <8 x i64> %res, %res1
3072 declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3074 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_512
3076 ; CHECK: vpmaxud %zmm
3078 define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3079 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3080 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3081 %res2 = add <16 x i32> %res, %res1
3082 ret <16 x i32> %res2
3085 declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3087 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_512
3089 ; CHECK: vpmaxuq %zmm
3091 define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3092 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3093 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3094 %res2 = add <8 x i64> %res, %res1
3098 declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3100 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_512
3102 ; CHECK: vpminsd %zmm
3104 define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3105 %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3106 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3107 %res2 = add <16 x i32> %res, %res1
3108 ret <16 x i32> %res2
3111 declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3113 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_512
3115 ; CHECK: vpminsq %zmm
3117 define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3118 %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3119 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3120 %res2 = add <8 x i64> %res, %res1
3124 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_512
3126 ; CHECK: vpminud %zmm
3128 define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3129 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3130 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3131 %res2 = add <16 x i32> %res, %res1
3132 ret <16 x i32> %res2
3135 declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3137 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_512
3139 ; CHECK: vpminuq %zmm
3141 define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3142 %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3143 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3144 %res2 = add <8 x i64> %res, %res1
3148 declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3150 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_d_512
3153 ; CHECK: vpermi2d {{.*}}{%k1}
3154 define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3155 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3156 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3157 %res2 = add <16 x i32> %res, %res1
3158 ret <16 x i32> %res2
3161 declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
3163 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_512
3166 ; CHECK: vpermi2pd {{.*}}{%k1}
3167 define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
3168 %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
3169 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
3170 %res2 = fadd <8 x double> %res, %res1
3171 ret <8 x double> %res2
3174 declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
3176 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_512
3179 ; CHECK: vpermi2ps {{.*}}{%k1}
3180 define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
3181 %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
3182 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
3183 %res2 = fadd <16 x float> %res, %res1
3184 ret <16 x float> %res2
3187 declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3189 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_q_512
3192 ; CHECK: vpermi2q {{.*}}{%k1}
3193 define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3194 %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3195 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3196 %res2 = add <8 x i64> %res, %res1
3200 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3202 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_512
3205 ; CHECK: vpermt2d {{.*}}{%k1} {z}
3206 define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3207 %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3208 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3209 %res2 = add <16 x i32> %res, %res1
3210 ret <16 x i32> %res2
3213 declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
3215 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_pd_512
3218 ; CHECK: vpermt2pd {{.*}}{%k1} {z}
3219 define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3220 %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
3221 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
3222 %res2 = fadd <8 x double> %res, %res1
3223 ret <8 x double> %res2
3226 declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
3228 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_ps_512
3231 ; CHECK: vpermt2ps {{.*}}{%k1} {z}
3232 define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3233 %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
3234 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
3235 %res2 = fadd <16 x float> %res, %res1
3236 ret <16 x float> %res2
3240 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3242 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_q_512
3245 ; CHECK: vpermt2q {{.*}}{%k1} {z}
3246 define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3247 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3248 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3249 %res2 = add <8 x i64> %res, %res1
3253 declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3255 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_512
3258 ; CHECK: vpermt2d {{.*}}{%k1}
3260 define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3261 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3262 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3263 %res2 = add <16 x i32> %res, %res1
3264 ret <16 x i32> %res2
3267 declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
3268 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_512
3271 ; CHECK: vscalefpd{{.*}}{%k1}
3272 define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3273 %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 3)
3274 %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
3275 %res2 = fadd <8 x double> %res, %res1
3276 ret <8 x double> %res2
3279 declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
3280 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_512
3283 ; CHECK: vscalefps{{.*}}{%k1}
3284 define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3285 %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 2)
3286 %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
3287 %res2 = fadd <16 x float> %res, %res1
3288 ret <16 x float> %res2
3291 declare <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
3293 define <8 x double>@test_int_x86_avx512_mask_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3294 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_512:
3296 ; CHECK-NEXT: movzbl %dil, %eax
3297 ; CHECK-NEXT: kmovw %eax, %k1
3298 ; CHECK-NEXT: vunpckhpd %zmm1, %zmm0, %zmm2 {%k1}
3299 ; CHECK-NEXT: ## zmm2 = zmm2[1],k1[1],zmm2[3],k1[3],zmm2[5],k1[5],zmm2[7],k1[7]
3300 ; CHECK-NEXT: vunpckhpd %zmm1, %zmm0, %zmm0
3301 ; CHECK-NEXT: ## zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
3302 %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
3303 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
3304 %res2 = fadd <8 x double> %res, %res1
3305 ret <8 x double> %res2
3308 declare <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
3310 define <16 x float>@test_int_x86_avx512_mask_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3311 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_512:
3313 ; CHECK-NEXT: kmovw %edi, %k1
3314 ; CHECK-NEXT: vunpckhps %zmm1, %zmm0, %zmm2 {%k1}
3315 ; CHECK-NEXT: ## zmm2 = zmm2[2],k1[2],zmm2[3],k1[3],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[14],k1[14],zmm2[15],k1[15]
3316 ; CHECK-NEXT: vunpckhps %zmm1, %zmm0, %zmm0
3317 ; CHECK-NEXT: ## zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
3318 %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
3319 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
3320 %res2 = fadd <16 x float> %res, %res1
3321 ret <16 x float> %res2
3324 declare <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
3326 define <8 x double>@test_int_x86_avx512_mask_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
3327 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_512:
3329 ; CHECK-NEXT: movzbl %dil, %eax
3330 ; CHECK-NEXT: kmovw %eax, %k1
3331 ; CHECK-NEXT: vunpcklpd %zmm1, %zmm0, %zmm2 {%k1}
3332 ; CHECK-NEXT: ## zmm2 = zmm2[0],k1[0],zmm2[2],k1[2],zmm2[4],k1[4],zmm2[6],k1[6]
3333 ; CHECK-NEXT: vunpcklpd %zmm1, %zmm0, %zmm0
3334 ; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
3335 %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
3336 %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
3337 %res2 = fadd <8 x double> %res, %res1
3338 ret <8 x double> %res2
3341 declare <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
3343 define <16 x float>@test_int_x86_avx512_mask_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
3344 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_512:
3346 ; CHECK-NEXT: kmovw %edi, %k1
3347 ; CHECK-NEXT: vunpcklps %zmm1, %zmm0, %zmm2 {%k1}
3348 ; CHECK-NEXT: ## zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[12],k1[12],zmm2[13],k1[13]
3349 ; CHECK-NEXT: vunpcklps %zmm1, %zmm0, %zmm0
3350 ; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
3351 %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
3352 %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
3353 %res2 = fadd <16 x float> %res, %res1
3354 ret <16 x float> %res2
3357 declare <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3359 define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3360 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512:
3362 ; CHECK-NEXT: movzbl %dil, %eax
3363 ; CHECK-NEXT: kmovw %eax, %k1
3364 ; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm2 {%k1}
3365 ; CHECK-NEXT: ## zmm2 = zmm2[0],k1[0],zmm2[2],k1[2],zmm2[4],k1[4],zmm2[6],k1[6]
3366 ; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm3 {%k1} {z
3367 ; CHECK-NEXT: ## zmm3 = k1[0],zmm0[0],k1[2],zmm0[2],k1[4],zmm0[4],k1[6],zmm0[6]
3368 ; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm0
3369 ; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
3370 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
3371 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
3373 %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3374 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3375 %res2 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer,i8 %x3)
3376 %res3 = add <8 x i64> %res, %res1
3377 %res4 = add <8 x i64> %res2, %res3
3381 declare <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3383 define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3384 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512:
3386 ; CHECK-NEXT: movzbl %dil, %eax
3387 ; CHECK-NEXT: kmovw %eax, %k1
3388 ; CHECK-NEXT: vpunpckhqdq %zmm1, %zmm0, %zmm2 {%k1}
3389 ; CHECK-NEXT: ## zmm2 = zmm2[1],k1[1],zmm2[3],k1[3],zmm2[5],k1[5],zmm2[7],k1[7]
3390 ; CHECK-NEXT: vpunpckhqdq %zmm1, %zmm0, %zmm0
3391 ; CHECK-NEXT: ## zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
3392 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
3394 %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3395 %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3396 %res2 = add <8 x i64> %res, %res1
3400 declare <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3402 define <16 x i32>@test_int_x86_avx512_mask_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3403 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_512:
3405 ; CHECK-NEXT: kmovw %edi, %k1
3406 ; CHECK-NEXT: vpunpckhdq %zmm1, %zmm0, %zmm2 {%k1}
3407 ; CHECK-NEXT: ## zmm2 = zmm2[2],k1[2],zmm2[3],k1[3],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[14],k1[14],zmm2[15],k1[15]
3408 ; CHECK-NEXT: vpunpckhdq %zmm1, %zmm0, %zmm0
3409 ; CHECK-NEXT: ## zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
3410 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
3412 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3413 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3414 %res2 = add <16 x i32> %res, %res1
3415 ret <16 x i32> %res2
3418 declare <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3420 define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3421 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_512:
3423 ; CHECK-NEXT: kmovw %edi, %k1
3424 ; CHECK-NEXT: vpunpckldq %zmm1, %zmm0, %zmm2 {%k1}
3425 ; CHECK-NEXT: ## zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[12],k1[12],zmm2[13],k1[13]
3426 ; CHECK-NEXT: vpunpckldq %zmm1, %zmm0, %zmm0
3427 ; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
3428 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
3430 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3431 %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3432 %res2 = add <16 x i32> %res, %res1
3433 ret <16 x i32> %res2
3436 declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8)
3438 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3439 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512:
3440 ; CHECK: vpmovqb %zmm0, %xmm1 {%k1}
3441 ; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z}
3442 ; CHECK-NEXT: vpmovqb %zmm0, %xmm0
3443 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
3444 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
3445 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3446 %res3 = add <16 x i8> %res0, %res1
3447 %res4 = add <16 x i8> %res3, %res2
3451 declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64>, i8)
3453 define void @test_int_x86_avx512_mask_pmov_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3454 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_512:
3455 ; CHECK: vpmovqb %zmm0, (%rdi)
3456 ; CHECK: vpmovqb %zmm0, (%rdi) {%k1}
3457 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3458 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3462 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8)
3464 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3465 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512:
3466 ; CHECK: vpmovsqb %zmm0, %xmm1 {%k1}
3467 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z}
3468 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm0
3469 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
3470 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
3471 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3472 %res3 = add <16 x i8> %res0, %res1
3473 %res4 = add <16 x i8> %res3, %res2
3477 declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64>, i8)
3479 define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3480 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512:
3481 ; CHECK: vpmovsqb %zmm0, (%rdi)
3482 ; CHECK: vpmovsqb %zmm0, (%rdi) {%k1}
3483 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3484 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3488 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8)
3490 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
3491 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512:
3492 ; CHECK: vpmovusqb %zmm0, %xmm1 {%k1}
3493 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z}
3494 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm0
3495 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
3496 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
3497 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
3498 %res3 = add <16 x i8> %res0, %res1
3499 %res4 = add <16 x i8> %res3, %res2
3503 declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64>, i8)
3505 define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3506 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512:
3507 ; CHECK: vpmovusqb %zmm0, (%rdi)
3508 ; CHECK: vpmovusqb %zmm0, (%rdi) {%k1}
3509 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3510 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3514 declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
3516 define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3517 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512:
3518 ; CHECK: vpmovqw %zmm0, %xmm1 {%k1}
3519 ; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z}
3520 ; CHECK-NEXT: vpmovqw %zmm0, %xmm0
3521 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
3522 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
3523 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3524 %res3 = add <8 x i16> %res0, %res1
3525 %res4 = add <8 x i16> %res3, %res2
3529 declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64>, i8)
3531 define void @test_int_x86_avx512_mask_pmov_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3532 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_512:
3533 ; CHECK: vpmovqw %zmm0, (%rdi)
3534 ; CHECK: vpmovqw %zmm0, (%rdi) {%k1}
3535 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3536 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3540 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8)
3542 define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3543 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512:
3544 ; CHECK: vpmovsqw %zmm0, %xmm1 {%k1}
3545 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z}
3546 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm0
3547 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
3548 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
3549 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3550 %res3 = add <8 x i16> %res0, %res1
3551 %res4 = add <8 x i16> %res3, %res2
3555 declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64>, i8)
3557 define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3558 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512:
3559 ; CHECK: vpmovsqw %zmm0, (%rdi)
3560 ; CHECK: vpmovsqw %zmm0, (%rdi) {%k1}
3561 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3562 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3566 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8)
3568 define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3569 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512:
3570 ; CHECK: vpmovusqw %zmm0, %xmm1 {%k1}
3571 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z}
3572 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm0
3573 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
3574 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
3575 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3576 %res3 = add <8 x i16> %res0, %res1
3577 %res4 = add <8 x i16> %res3, %res2
3581 declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64>, i8)
3583 define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3584 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512:
3585 ; CHECK: vpmovusqw %zmm0, (%rdi)
3586 ; CHECK: vpmovusqw %zmm0, (%rdi) {%k1}
3587 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3588 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3592 declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8)
3594 define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
3595 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
3596 ; CHECK: vpmovqd %zmm0, %ymm1 {%k1}
3597 ; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z}
3598 ; CHECK-NEXT: vpmovqd %zmm0, %ymm0
3599 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
3600 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
3601 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
3602 %res3 = add <8 x i32> %res0, %res1
3603 %res4 = add <8 x i32> %res3, %res2
3607 declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64>, i8)
3609 define void @test_int_x86_avx512_mask_pmov_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3610 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_512:
3611 ; CHECK: vpmovqd %zmm0, (%rdi)
3612 ; CHECK: vpmovqd %zmm0, (%rdi) {%k1}
3613 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3614 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3618 declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8)
3620 define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
3621 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_512:
3622 ; CHECK: vpmovsqd %zmm0, %ymm1 {%k1}
3623 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm2 {%k1} {z}
3624 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm0
3625 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
3626 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
3627 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
3628 %res3 = add <8 x i32> %res0, %res1
3629 %res4 = add <8 x i32> %res3, %res2
3633 declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64>, i8)
3635 define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3636 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512:
3637 ; CHECK: vpmovsqd %zmm0, (%rdi)
3638 ; CHECK: vpmovsqd %zmm0, (%rdi) {%k1}
3639 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3640 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3644 declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8)
3646 define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
3647 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_512:
3648 ; CHECK: vpmovusqd %zmm0, %ymm1 {%k1}
3649 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm2 {%k1} {z}
3650 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm0
3651 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
3652 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
3653 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
3654 %res3 = add <8 x i32> %res0, %res1
3655 %res4 = add <8 x i32> %res3, %res2
3659 declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64>, i8)
3661 define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3662 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512:
3663 ; CHECK: vpmovusqd %zmm0, (%rdi)
3664 ; CHECK: vpmovusqd %zmm0, (%rdi) {%k1}
3665 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3666 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3670 declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
3672 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
3673 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512:
3674 ; CHECK: vpmovdb %zmm0, %xmm1 {%k1}
3675 ; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z}
3676 ; CHECK-NEXT: vpmovdb %zmm0, %xmm0
3677 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
3678 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
3679 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
3680 %res3 = add <16 x i8> %res0, %res1
3681 %res4 = add <16 x i8> %res3, %res2
3685 declare void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32>, i16)
3687 define void @test_int_x86_avx512_mask_pmov_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3688 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_512:
3689 ; CHECK: vpmovdb %zmm0, (%rdi)
3690 ; CHECK: vpmovdb %zmm0, (%rdi) {%k1}
3691 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3692 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3696 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16)
3698 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
3699 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512:
3700 ; CHECK: vpmovsdb %zmm0, %xmm1 {%k1}
3701 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z}
3702 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm0
3703 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
3704 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
3705 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
3706 %res3 = add <16 x i8> %res0, %res1
3707 %res4 = add <16 x i8> %res3, %res2
3711 declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32>, i16)
3713 define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3714 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512:
3715 ; CHECK: vpmovsdb %zmm0, (%rdi)
3716 ; CHECK: vpmovsdb %zmm0, (%rdi) {%k1}
3717 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3718 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3722 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16)
3724 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
3725 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512:
3726 ; CHECK: vpmovusdb %zmm0, %xmm1 {%k1}
3727 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z}
3728 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm0
3729 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
3730 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
3731 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
3732 %res3 = add <16 x i8> %res0, %res1
3733 %res4 = add <16 x i8> %res3, %res2
3737 declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32>, i16)
3739 define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3740 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512:
3741 ; CHECK: vpmovusdb %zmm0, (%rdi)
3742 ; CHECK: vpmovusdb %zmm0, (%rdi) {%k1}
3743 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3744 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3748 declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16)
3750 define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
3751 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512:
3752 ; CHECK: vpmovdw %zmm0, %ymm1 {%k1}
3753 ; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z}
3754 ; CHECK-NEXT: vpmovdw %zmm0, %ymm0
3755 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
3756 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
3757 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
3758 %res3 = add <16 x i16> %res0, %res1
3759 %res4 = add <16 x i16> %res3, %res2
3760 ret <16 x i16> %res4
3763 declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32>, i16)
3765 define void @test_int_x86_avx512_mask_pmov_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3766 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_512:
3767 ; CHECK: vpmovdw %zmm0, (%rdi)
3768 ; CHECK: vpmovdw %zmm0, (%rdi) {%k1}
3769 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3770 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3774 declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16)
3776 define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
3777 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512:
3778 ; CHECK: vpmovsdw %zmm0, %ymm1 {%k1}
3779 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z}
3780 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm0
3781 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
3782 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
3783 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
3784 %res3 = add <16 x i16> %res0, %res1
3785 %res4 = add <16 x i16> %res3, %res2
3786 ret <16 x i16> %res4
3789 declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32>, i16)
3791 define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3792 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512:
3793 ; CHECK: vpmovsdw %zmm0, (%rdi)
3794 ; CHECK: vpmovsdw %zmm0, (%rdi) {%k1}
3795 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3796 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3800 declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16)
3802 define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
3803 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512:
3804 ; CHECK: vpmovusdw %zmm0, %ymm1 {%k1}
3805 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z}
3806 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm0
3807 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
3808 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
3809 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
3810 %res3 = add <16 x i16> %res0, %res1
3811 %res4 = add <16 x i16> %res3, %res2
3812 ret <16 x i16> %res4
3815 declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32>, i16)
3817 define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3818 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512:
3819 ; CHECK: vpmovusdw %zmm0, (%rdi)
3820 ; CHECK: vpmovusdw %zmm0, (%rdi) {%k1}
3821 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3822 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3826 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
3828 define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
3829 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
3831 ; CHECK-NEXT: movzbl %dil, %eax
3832 ; CHECK-NEXT: kmovw %eax, %k1
3833 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1}
3834 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0
3835 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
3837 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
3838 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
3839 %res2 = fadd <8 x double> %res, %res1
3840 ret <8 x double> %res2
3843 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
3845 define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
3846 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512:
3848 ; CHECK-NEXT: kmovw %edi, %k1
3849 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1}
3850 ; CHECK-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0
3851 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
3853 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
3854 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
3855 %res2 = fadd <16 x float> %res, %res1
3856 ret <16 x float> %res2
3859 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
3861 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3862 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_512:
3864 ; CHECK-NEXT: movzbl %dil, %eax
3865 ; CHECK-NEXT: kmovw %eax, %k1
3866 ; CHECK-NEXT: vcvtpd2dq %zmm0, %ymm1 {%k1}
3867 ; CHECK-NEXT: vcvtpd2dq {rn-sae}, %zmm0, %ymm0
3868 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3870 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
3871 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
3872 %res2 = add <8 x i32> %res, %res1
3876 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
3878 define <8 x float>@test_int_x86_avx512_mask_cvt_pd2ps_512(<8 x double> %x0, <8 x float> %x1, i8 %x2) {
3879 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_512:
3881 ; CHECK-NEXT: movzbl %dil, %eax
3882 ; CHECK-NEXT: kmovw %eax, %k1
3883 ; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm1 {%k1}
3884 ; CHECK-NEXT: vcvtpd2ps {ru-sae}, %zmm0, %ymm0
3885 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
3887 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 %x2, i32 4)
3888 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 2)
3889 %res2 = fadd <8 x float> %res, %res1
3890 ret <8 x float> %res2
3893 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
3895 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3896 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_512:
3898 ; CHECK-NEXT: movzbl %dil, %eax
3899 ; CHECK-NEXT: kmovw %eax, %k1
3900 ; CHECK-NEXT: vcvtpd2udq {ru-sae}, %zmm0, %ymm1 {%k1}
3901 ; CHECK-NEXT: vcvtpd2udq {rn-sae}, %zmm0, %ymm0
3902 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3904 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 2)
3905 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
3906 %res2 = add <8 x i32> %res, %res1
3910 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float>, <16 x i32>, i16, i32)
3912 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3913 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_512:
3915 ; CHECK-NEXT: kmovw %edi, %k1
3916 ; CHECK-NEXT: vcvtps2dq {ru-sae}, %zmm0, %zmm1 {%k1}
3917 ; CHECK-NEXT: vcvtps2dq {rn-sae}, %zmm0, %zmm0
3918 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
3920 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
3921 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
3922 %res2 = add <16 x i32> %res, %res1
3923 ret <16 x i32> %res2
3926 declare <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float>, <8 x double>, i8, i32)
3928 define <8 x double>@test_int_x86_avx512_mask_cvt_ps2pd_512(<8 x float> %x0, <8 x double> %x1, i8 %x2) {
3929 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_512:
3931 ; CHECK-NEXT: movzbl %dil, %eax
3932 ; CHECK-NEXT: kmovw %eax, %k1
3933 ; CHECK-NEXT: vcvtps2pd %ymm0, %zmm1 {%k1}
3934 ; CHECK-NEXT: vcvtps2pd {sae}, %ymm0, %zmm0
3935 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
3937 %res = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 %x2, i32 4)
3938 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 -1, i32 8)
3939 %res2 = fadd <8 x double> %res, %res1
3940 ret <8 x double> %res2
3943 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
3945 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3946 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_512:
3948 ; CHECK-NEXT: kmovw %edi, %k1
3949 ; CHECK-NEXT: vcvtps2udq {ru-sae}, %zmm0, %zmm1 {%k1}
3950 ; CHECK-NEXT: vcvtps2udq {rn-sae}, %zmm0, %zmm0
3951 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
3953 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
3954 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
3955 %res2 = add <16 x i32> %res, %res1
3956 ret <16 x i32> %res2
3959 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
3961 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3962 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512:
3964 ; CHECK-NEXT: movzbl %dil, %eax
3965 ; CHECK-NEXT: kmovw %eax, %k1
3966 ; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1}
3967 ; CHECK-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0
3968 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3970 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
3971 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
3972 %res2 = add <8 x i32> %res, %res1
3976 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
3978 define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
3979 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
3981 ; CHECK-NEXT: movzbl %dil, %eax
3982 ; CHECK-NEXT: kmovw %eax, %k1
3983 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1}
3984 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0
3985 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
3987 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
3988 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
3989 %res2 = fadd <8 x double> %res, %res1
3990 ret <8 x double> %res2
3994 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
3996 define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
3997 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512:
3999 ; CHECK-NEXT: kmovw %edi, %k1
4000 ; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1}
4001 ; CHECK-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0
4002 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
4004 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
4005 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
4006 %res2 = fadd <16 x float> %res, %res1
4007 ret <16 x float> %res2
4010 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
4012 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
4013 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512:
4015 ; CHECK-NEXT: movzbl %dil, %eax
4016 ; CHECK-NEXT: kmovw %eax, %k1
4017 ; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1}
4018 ; CHECK-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0
4019 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
4021 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
4022 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
4023 %res2 = add <8 x i32> %res, %res1
4027 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32)
4029 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
4030 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512:
4032 ; CHECK-NEXT: kmovw %edi, %k1
4033 ; CHECK-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1}
4034 ; CHECK-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0
4035 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
4037 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
4038 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
4039 %res2 = add <16 x i32> %res, %res1
4040 ret <16 x i32> %res2
4043 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32)
4045 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
4046 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512:
4048 ; CHECK-NEXT: kmovw %edi, %k1
4049 ; CHECK-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1}
4050 ; CHECK-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0
4051 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
4053 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
4054 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
4055 %res2 = add <16 x i32> %res, %res1
4056 ret <16 x i32> %res2
4060 declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32)
4061 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ss
4064 ; CHECK: vscalefss {{.*}}{%k1}
4065 ; CHECK: vscalefss {rn-sae}
4066 define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
4067 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4)
4068 %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8)
4069 %res2 = fadd <4 x float> %res, %res1
4070 ret <4 x float> %res2
4073 declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32)
4074 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_sd
4077 ; CHECK: vscalefsd {{.*}}{%k1}
4078 ; CHECK: vscalefsd {rn-sae}
4079 define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
4080 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4)
4081 %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8)
4082 %res2 = fadd <2 x double> %res, %res1
4083 ret <2 x double> %res2
4086 declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
4088 define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
4089 ; CHECK-LABEL: test_getexp_ss:
4091 ; CHECK-NEXT: andl $1, %edi
4092 ; CHECK-NEXT: kmovw %edi, %k1
4093 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
4094 ; CHECK-NEXT: vgetexpss %xmm1, %xmm0, %xmm3 {%k1}
4095 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
4096 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
4097 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm0
4098 ; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1
4099 ; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0
4100 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
4102 %res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
4103 %res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
4104 %res2 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
4105 %res3 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
4107 %res.1 = fadd <4 x float> %res0, %res1
4108 %res.2 = fadd <4 x float> %res2, %res3
4109 %res = fadd <4 x float> %res.1, %res.2
4110 ret <4 x float> %res
4113 declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
4115 define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
4116 ; CHECK-LABEL: test_getexp_sd:
4118 ; CHECK-NEXT: andl $1, %edi
4119 ; CHECK-NEXT: kmovw %edi, %k1
4120 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
4121 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 {%k1}
4122 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4
4123 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
4124 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
4125 ; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1
4126 ; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0
4127 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
4129 %res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
4130 %res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
4131 %res2 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
4132 %res3 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
4134 %res.1 = fadd <2 x double> %res0, %res1
4135 %res.2 = fadd <2 x double> %res2, %res3
4136 %res = fadd <2 x double> %res.1, %res.2
4137 ret <2 x double> %res
4140 declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32)
4142 define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
4143 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd:
4145 ; CHECK-NEXT: andl $1, %edi
4146 ; CHECK-NEXT: kmovw %edi, %k1
4147 ; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1}
4148 ; CHECK-NEXT: kmovw %k0, %eax
4149 ; CHECK-NEXT: shlb $7, %al
4150 ; CHECK-NEXT: sarb $7, %al
4153 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
4157 define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
4158 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd_all:
4160 ; CHECK-NEXT: vcmpunordsd {sae}, %xmm1, %xmm0, %k0
4161 ; CHECK-NEXT: vcmplesd %xmm1, %xmm0, %k1
4162 ; CHECK-NEXT: korw %k0, %k1, %k0
4163 ; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k1
4164 ; CHECK-NEXT: vcmpneqsd %xmm1, %xmm0, %k2
4165 ; CHECK-NEXT: korw %k1, %k2, %k1
4166 ; CHECK-NEXT: andl $1, %edi
4167 ; CHECK-NEXT: kmovw %edi, %k2
4168 ; CHECK-NEXT: kandw %k2, %k1, %k1
4169 ; CHECK-NEXT: korw %k1, %k0, %k0
4170 ; CHECK-NEXT: kmovw %k0, %eax
4171 ; CHECK-NEXT: shlb $7, %al
4172 ; CHECK-NEXT: sarb $7, %al
4175 %res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4)
4176 %res2 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 3, i8 -1, i32 8)
4177 %res3 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 4, i8 %x3, i32 4)
4178 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
4180 %res11 = or i8 %res1, %res2
4181 %res12 = or i8 %res3, %res4
4182 %res13 = or i8 %res11, %res12
4186 declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32)
4188 define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
4189 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss:
4191 ; CHECK-NEXT: andl $1, %edi
4192 ; CHECK-NEXT: kmovw %edi, %k1
4193 ; CHECK-NEXT: vcmpunordss %xmm1, %xmm0, %k0 {%k1}
4194 ; CHECK-NEXT: kmovw %k0, %eax
4195 ; CHECK-NEXT: shlb $7, %al
4196 ; CHECK-NEXT: sarb $7, %al
4199 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4)
4204 define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
4205 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss_all:
4207 ; CHECK-NEXT: vcmpless %xmm1, %xmm0, %k1
4208 ; CHECK-NEXT: vcmpunordss {sae}, %xmm1, %xmm0, %k0 {%k1}
4209 ; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k1
4210 ; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k1 {%k1}
4211 ; CHECK-NEXT: andl $1, %edi
4212 ; CHECK-NEXT: kmovw %edi, %k2
4213 ; CHECK-NEXT: kandw %k2, %k1, %k1
4214 ; CHECK-NEXT: kandw %k1, %k0, %k0
4215 ; CHECK-NEXT: kmovw %k0, %eax
4216 ; CHECK-NEXT: shlb $7, %al
4217 ; CHECK-NEXT: sarb $7, %al
4219 %res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4)
4220 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8)
4221 %res3 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 4, i8 %x3, i32 4)
4222 %res4 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 5, i8 %x3, i32 8)
4224 %res11 = and i8 %res1, %res2
4225 %res12 = and i8 %res3, %res4
4226 %res13 = and i8 %res11, %res12
4230 declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16)
4232 define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
4233 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4:
4235 ; CHECK-NEXT: kmovw %edi, %k1
4236 ; CHECK-NEXT: vshuff32x4 $22, %zmm1, %zmm0, %zmm2 {%k1}
4237 ; CHECK-NEXT: ## zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
4238 ; CHECK-NEXT: vshuff32x4 $22, %zmm1, %zmm0, %zmm0
4239 ; CHECK-NEXT: ## zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
4240 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4242 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
4243 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
4244 %res2 = fadd <16 x float> %res, %res1
4245 ret <16 x float> %res2
4248 declare <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double>, <8 x double>, i32, <8 x double>, i8)
4250 define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
4251 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2:
4253 ; CHECK-NEXT: movzbl %dil, %eax
4254 ; CHECK-NEXT: kmovw %eax, %k1
4255 ; CHECK-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm2 {%k1}
4256 ; CHECK-NEXT: ## zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
4257 ; CHECK-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm3 {%k1} {z}
4258 ; CHECK-NEXT: ## zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
4259 ; CHECK-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm0
4260 ; CHECK-NEXT: ## zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
4261 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4262 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
4264 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
4265 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
4266 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
4268 %res3 = fadd <8 x double> %res, %res1
4269 %res4 = fadd <8 x double> %res3, %res2
4270 ret <8 x double> %res4
4273 declare <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
4275 define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
4276 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4:
4278 ; CHECK-NEXT: kmovw %edi, %k1
4279 ; CHECK-NEXT: vshufi32x4 $22, %zmm1, %zmm0, %zmm2 {%k1}
4280 ; CHECK-NEXT: ## zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
4281 ; CHECK-NEXT: vshufi32x4 $22, %zmm1, %zmm0, %zmm0
4282 ; CHECK-NEXT: ## zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
4283 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4285 %res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
4286 %res1 = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1)
4287 %res2 = add <16 x i32> %res, %res1
4288 ret <16 x i32> %res2
4291 declare <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
4293 define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
4294 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2:
4296 ; CHECK-NEXT: movzbl %dil, %eax
4297 ; CHECK-NEXT: kmovw %eax, %k1
4298 ; CHECK-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm2 {%k1}
4299 ; CHECK-NEXT: ## zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
4300 ; CHECK-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm0
4301 ; CHECK-NEXT: ## zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
4302 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4304 %res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
4305 %res1 = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1)
4306 %res2 = add <8 x i64> %res, %res1
4310 declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
4312 define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
4313 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_512:
4315 ; CHECK-NEXT: movzbl %dil, %eax
4316 ; CHECK-NEXT: kmovw %eax, %k1
4317 ; CHECK-NEXT: vgetmantpd $11, %zmm0, %zmm1 {%k1}
4318 ; CHECK-NEXT: vgetmantpd $11,{sae}, %zmm0, %zmm0
4319 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
4321 %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %x3, i32 4)
4322 %res1 = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 -1, i32 8)
4323 %res2 = fadd <8 x double> %res, %res1
4324 ret <8 x double> %res2
4327 declare <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
4329 define <16 x float>@test_int_x86_avx512_mask_getmant_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
4330 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_512:
4332 ; CHECK-NEXT: kmovw %edi, %k1
4333 ; CHECK-NEXT: vgetmantps $11, %zmm0, %zmm1 {%k1}
4334 ; CHECK-NEXT: vgetmantps $11,{sae}, %zmm0, %zmm0
4335 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
4337 %res = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 %x3, i32 4)
4338 %res1 = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 8)
4339 %res2 = fadd <16 x float> %res, %res1
4340 ret <16 x float> %res2
4343 declare <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double>, <2 x double>, i32, <2 x double>, i8, i32)
4345 define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
4346 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sd:
4348 ; CHECK-NEXT: andl $1, %edi
4349 ; CHECK-NEXT: kmovw %edi, %k1
4350 ; CHECK-NEXT: vmovaps %zmm2, %zmm3
4351 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1}
4352 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z}
4353 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5
4354 ; CHECK-NEXT: vgetmantsd $11,{sae}, %xmm1, %xmm0, %xmm2 {%k1}
4355 ; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm0
4356 ; CHECK-NEXT: vaddpd %xmm5, %xmm2, %xmm1
4357 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
4359 %res = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 4)
4360 %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> zeroinitializer, i8 %x3, i32 4)
4361 %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 8)
4362 %res3 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 -1, i32 4)
4363 %res11 = fadd <2 x double> %res, %res1
4364 %res12 = fadd <2 x double> %res2, %res3
4365 %res13 = fadd <2 x double> %res11, %res12
4366 ret <2 x double> %res13
4369 declare <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float>, <4 x float>, i32, <4 x float>, i8, i32)
4371 define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
4372 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ss:
4374 ; CHECK-NEXT: andl $1, %edi
4375 ; CHECK-NEXT: kmovw %edi, %k1
4376 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm2 {%k1}
4377 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3 {%k1} {z}
4378 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm4
4379 ; CHECK-NEXT: vgetmantss $11,{sae}, %xmm1, %xmm0, %xmm0
4380 ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1
4381 ; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0
4382 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
4384 %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 %x3, i32 4)
4385 %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> zeroinitializer, i8 %x3, i32 4)
4386 %res2 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 8)
4387 %res3 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 4)
4388 %res11 = fadd <4 x float> %res, %res1
4389 %res12 = fadd <4 x float> %res2, %res3
4390 %res13 = fadd <4 x float> %res11, %res12
4391 ret <4 x float> %res13
4394 declare <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8)
4396 define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
4397 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_512:
4399 ; CHECK-NEXT: movzbl %dil, %eax
4400 ; CHECK-NEXT: kmovw %eax, %k1
4401 ; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm2 {%k1}
4402 ; CHECK-NEXT: ## zmm2 = zmm2[0],k1[1],zmm2[3],k1[2],zmm2[5],k1[4],zmm2[6],k1[6]
4403 ; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm3 {%k1} {z}
4404 ; CHECK-NEXT: ## zmm3 = k1[0],zmm0[1],k1[3],zmm0[2],k1[5],zmm0[4],k1[6],zmm0[6]
4405 ; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm0
4406 ; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
4407 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4408 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
4410 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
4411 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
4412 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
4414 %res3 = fadd <8 x double> %res, %res1
4415 %res4 = fadd <8 x double> %res3, %res2
4416 ret <8 x double> %res4
4419 declare <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16)
4421 define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
4422 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_512:
4424 ; CHECK-NEXT: kmovw %edi, %k1
4425 ; CHECK-NEXT: vshufps $22, %zmm1, %zmm0, %zmm2 {%k1}
4426 ; CHECK-NEXT: ## zmm2 = zmm2[2,1],k1[1,0],zmm2[6,5],k1[5,4],zmm2[10,9],k1[9,8],zmm2[14,13],k1[13,12]
4427 ; CHECK-NEXT: vshufps $22, %zmm1, %zmm0, %zmm0
4428 ; CHECK-NEXT: ## zmm0 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
4429 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4431 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
4432 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
4433 %res2 = fadd <16 x float> %res, %res1
4434 ret <16 x float> %res2
4437 declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8)
4439 define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
4440 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
4442 ; CHECK-NEXT: movzbl %dil, %eax
4443 ; CHECK-NEXT: kmovw %eax, %k1
4444 ; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm1 {%k1}
4445 ; CHECK-NEXT: ## zmm1 = zmm1[0,1,3,2,5,4,6,6]
4446 ; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm2 {%k1} {z}
4447 ; CHECK-NEXT: ## zmm2 = k1[0,1,3,2,5,4,6,6]
4448 ; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm0
4449 ; CHECK-NEXT: ## zmm0 = zmm0[0,1,3,2,5,4,6,6]
4450 ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1
4451 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
4453 %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3)
4454 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3)
4455 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1)
4456 %res3 = fadd <8 x double> %res, %res1
4457 %res4 = fadd <8 x double> %res3, %res2
4458 ret <8 x double> %res4
4461 declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16)
4463 define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
4464 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
4466 ; CHECK-NEXT: kmovw %edi, %k1
4467 ; CHECK-NEXT: vpermilps $22, %zmm0, %zmm1 {%k1}
4468 ; CHECK-NEXT: ## zmm1 = zmm1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
4469 ; CHECK-NEXT: vpermilps $22, %zmm0, %zmm2 {%k1} {z}
4470 ; CHECK-NEXT: ## zmm2 = k1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
4471 ; CHECK-NEXT: vpermilps $22, %zmm0, %zmm0
4472 ; CHECK-NEXT: ## zmm0 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
4473 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1
4474 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
4476 %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3)
4477 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3)
4478 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1)
4479 %res3 = fadd <16 x float> %res, %res1
4480 %res4 = fadd <16 x float> %res3, %res2
4481 ret <16 x float> %res4
4484 declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
4486 define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
4487 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
4489 ; CHECK-NEXT: movzbl %dil, %eax
4490 ; CHECK-NEXT: kmovw %eax, %k1
4491 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1}
4492 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm3 {%k1} {z}
4493 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0
4494 ; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1
4495 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
4497 %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
4498 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
4499 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
4500 %res3 = fadd <8 x double> %res, %res1
4501 %res4 = fadd <8 x double> %res2, %res3
4502 ret <8 x double> %res4
4505 declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
4507 define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
4508 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
4510 ; CHECK-NEXT: kmovw %edi, %k1
4511 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1}
4512 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm3 {%k1} {z}
4513 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0
4514 ; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1
4515 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
4517 %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
4518 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
4519 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
4520 %res3 = fadd <16 x float> %res, %res1
4521 %res4 = fadd <16 x float> %res2, %res3
4522 ret <16 x float> %res4
4525 declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i8)
4527 define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i8 %x4) {
4528 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_512:
4530 ; CHECK-NEXT: kmovw %edi, %k1
4531 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
4532 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
4533 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0
4534 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4535 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
4537 %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 %x4)
4538 %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 -1)
4539 %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> zeroinitializer, i8 %x4)
4540 %res3 = fadd <16 x float> %res, %res1
4541 %res4 = fadd <16 x float> %res2, %res3
4542 ret <16 x float> %res4
4545 declare <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32>, <4 x i32>, i32, <16 x i32>, i8)
4547 define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i8 %x4) {
4548 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_512:
4550 ; CHECK-NEXT: kmovw %edi, %k1
4551 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
4552 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
4553 ; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0
4554 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4555 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
4557 %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 %x4)
4558 %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 -1)
4559 %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i8 %x4)
4560 %res3 = add <16 x i32> %res, %res1
4561 %res4 = add <16 x i32> %res2, %res3
4562 ret <16 x i32> %res4
4565 declare <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double>, <4 x double>, i32, <8 x double>, i8)
4567 define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) {
4568 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x4_512:
4570 ; CHECK-NEXT: movzbl %dil, %eax
4571 ; CHECK-NEXT: kmovw %eax, %k1
4572 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
4573 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
4574 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
4575 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4576 ; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
4578 %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
4579 %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
4580 %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
4581 %res3 = fadd <8 x double> %res, %res1
4582 %res4 = fadd <8 x double> %res2, %res3
4583 ret <8 x double> %res4
4586 declare <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64>, <4 x i64>, i32, <8 x i64>, i8)
4588 define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) {
4589 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x4_512:
4591 ; CHECK-NEXT: movzbl %dil, %eax
4592 ; CHECK-NEXT: kmovw %eax, %k1
4593 ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
4594 ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
4595 ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
4596 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4597 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
4599 %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
4600 %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
4601 %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
4602 %res3 = add <8 x i64> %res, %res1
4603 %res4 = add <8 x i64> %res2, %res3
4607 declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float>, <4 x float>, <2 x double>, i8, i32)
4609 define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<4 x float> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) {
4610 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ss2sd_round:
4611 ; CHECK: kmovw %edi, %k1
4612 ; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm2 {%k1}
4613 ; CHECK-NEXT: vcvtss2sd {sae}, %xmm1, %xmm0, %xmm0
4614 ; CHECK-NEXT: %xmm0, %xmm2, %xmm0
4616 %res = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 %x3, i32 4)
4617 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 -1, i32 8)
4618 %res2 = fadd <2 x double> %res, %res1
4619 ret <2 x double> %res2
4622 declare <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double>, <2 x double>, <4 x float>, i8, i32)
4624 define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<2 x double> %x0,<2 x double> %x1, <4 x float> %x2, i8 %x3) {
4625 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_sd2ss_round:
4626 ; CHECK: kmovw %edi, %k1
4627 ; CHECK-NEXT: vcvtsd2ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
4628 ; CHECK-NEXT: vcvtsd2ss {rn-sae}, %xmm1, %xmm0, %xmm0
4629 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
4631 %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 3)
4632 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 -1, i32 8)
4633 %res2 = fadd <4 x float> %res, %res1
4634 ret <4 x float> %res2
4637 declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
4639 define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
4640 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
4642 ; CHECK-NEXT: kmovw %edi, %k1
4643 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
4644 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1}
4645 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
4646 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
4648 %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
4649 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
4650 %res2 = add <16 x i32> %res, %res1
4651 ret <16 x i32> %res2
4654 declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
4656 define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
4657 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
4659 ; CHECK-NEXT: kmovw %edi, %k1
4660 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
4661 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
4662 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
4663 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
4665 %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
4666 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
4667 %res2 = add <16 x i32> %res, %res1
4668 ret <16 x i32> %res2
4671 declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
4673 define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
4674 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
4676 ; CHECK-NEXT: movzbl %dil, %eax
4677 ; CHECK-NEXT: kmovw %eax, %k1
4678 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
4679 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1}
4680 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
4681 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
4683 %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
4684 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
4685 %res2 = add <8 x i64> %res, %res1
4689 declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
4691 define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
4692 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
4694 ; CHECK-NEXT: movzbl %dil, %eax
4695 ; CHECK-NEXT: kmovw %eax, %k1
4696 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
4697 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
4698 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
4699 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
4701 %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
4702 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
4703 %res2 = add <8 x i64> %res, %res1
4707 declare <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float>, <16 x float>, i16)
4709 define <16 x float>@test_int_x86_avx512_mask_movsldup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
4710 ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_512:
4712 ; CHECK-NEXT: kmovw %edi, %k1
4713 ; CHECK-NEXT: vmovsldup %zmm0, %zmm1 {%k1}
4714 ; CHECK-NEXT: ## zmm1 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
4715 ; CHECK-NEXT: vmovsldup %zmm0, %zmm2 {%k1} {z}
4716 ; CHECK-NEXT: ## zmm2 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
4717 ; CHECK-NEXT: vmovsldup %zmm0, %zmm0
4718 ; CHECK-NEXT: ## zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
4719 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
4720 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4722 %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
4723 %res1 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
4724 %res2 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
4725 %res3 = fadd <16 x float> %res, %res1
4726 %res4 = fadd <16 x float> %res2, %res3
4727 ret <16 x float> %res4
4730 declare <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float>, <16 x float>, i16)
4732 define <16 x float>@test_int_x86_avx512_mask_movshdup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
4733 ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_512:
4735 ; CHECK-NEXT: kmovw %edi, %k1
4736 ; CHECK-NEXT: vmovshdup %zmm0, %zmm1 {%k1}
4737 ; CHECK-NEXT: ## zmm1 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
4738 ; CHECK-NEXT: vmovshdup %zmm0, %zmm2 {%k1} {z}
4739 ; CHECK-NEXT: ## zmm2 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
4740 ; CHECK-NEXT: vmovshdup %zmm0, %zmm0
4741 ; CHECK-NEXT: ## zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
4742 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
4743 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4745 %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
4746 %res1 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
4747 %res2 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
4748 %res3 = fadd <16 x float> %res, %res1
4749 %res4 = fadd <16 x float> %res2, %res3
4750 ret <16 x float> %res4