1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
3 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
4 ; CHECK-LABEL: test_kortestz
7 define i32 @test_kortestz(i16 %a0, i16 %a1) {
8 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
12 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
13 ; CHECK-LABEL: test_kortestc
16 define i32 @test_kortestc(i16 %a0, i16 %a1) {
17 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
21 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
22 ; CHECK-LABEL: test_kand
25 define i16 @test_kand(i16 %a0, i16 %a1) {
26 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
27 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
31 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
32 ; CHECK-LABEL: test_knot
34 define i16 @test_knot(i16 %a0) {
35 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
39 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
41 ; CHECK-LABEL: unpckbw_test
44 define i16 @unpckbw_test(i16 %a0, i16 %a1) {
45 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
49 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
50 ; CHECK: vrcp14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4c,0xc0]
51 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
54 declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
56 define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
57 ; CHECK: vrcp14pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x4c,0xc0]
58 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
61 declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
63 declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
65 define <8 x double> @test7(<8 x double> %a) {
66 ; CHECK: vrndscalepd {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b]
67 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
71 declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
73 define <16 x float> @test8(<16 x float> %a) {
74 ; CHECK: vrndscaleps {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b]
75 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
79 define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
80 ; CHECK: vrsqrt14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4e,0xc0]
81 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
84 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
86 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
87 ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0]
88 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
91 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
93 define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
94 ; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0]
95 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
98 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
100 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
102 %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) ; <<8 x double>> [#uses=1]
103 ret <8 x double> %res
105 declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
107 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
109 %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) ; <<16 x float>> [#uses=1]
110 ret <16 x float> %res
112 declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
114 define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
115 ; CHECK: vsqrtss {{.*}}encoding: [0x62
116 %res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
119 declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone
121 define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1) {
122 ; CHECK: vsqrtsd {{.*}}encoding: [0x62
123 %res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
124 ret <2 x double> %res
126 declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone
128 define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
129 ; CHECK: vcvtsd2si {{.*}}encoding: [0x62
130 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
133 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
135 define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
136 ; CHECK: vcvtsi2sdq {{.*}}encoding: [0x62
137 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
138 ret <2 x double> %res
140 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
142 define <2 x double> @test_x86_avx512_cvtusi642sd(<2 x double> %a0, i64 %a1) {
143 ; CHECK: vcvtusi2sdq {{.*}}encoding: [0x62
144 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
145 ret <2 x double> %res
147 declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64) nounwind readnone
149 define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
150 ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
151 %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
154 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
157 define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
158 ; CHECK: vcvtss2si {{.*}}encoding: [0x62
159 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
162 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
165 define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
166 ; CHECK: vcvtsi2ssq {{.*}}encoding: [0x62
167 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
170 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
173 define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
174 ; CHECK: vcvttss2si {{.*}}encoding: [0x62
175 %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
178 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
180 define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
181 ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62
182 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
185 declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
187 define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
188 ; CHECK: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
189 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
190 ret <16 x float> %res
192 declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
195 define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
196 ; CHECK: vcvtps2ph $2, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1d,0xc0,0x02]
197 %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
201 declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
203 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
204 ; CHECK: vbroadcastss
205 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
206 ret <16 x float> %res
208 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
210 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
211 ; CHECK: vbroadcastsd
212 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
213 ret <8 x double> %res
215 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
217 define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) {
218 ; CHECK: vbroadcastss
219 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1]
220 ret <16 x float> %res
222 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly
224 define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
225 ; CHECK: vbroadcastsd
226 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1]
227 ret <8 x double> %res
229 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
231 define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32> %a0) {
232 ; CHECK: vpbroadcastd
233 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1]
236 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly
238 define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
239 ; CHECK: vpbroadcastd
240 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
243 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
245 define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) {
246 ; CHECK: vpbroadcastq
247 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1]
250 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly
252 define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
253 ; CHECK: vpbroadcastq
254 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
257 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
259 define <16 x i32> @test_conflict_d(<16 x i32> %a) {
260 ; CHECK: movw $-1, %ax
263 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
267 declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
269 define <8 x i64> @test_conflict_q(<8 x i64> %a) {
270 ; CHECK: movb $-1, %al
273 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
277 declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
279 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
281 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
285 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
287 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
291 define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
292 ; CHECK: movw $-1, %ax
295 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
299 declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
301 define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
302 ; CHECK: movb $-1, %al
305 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
309 declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
312 define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
314 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
318 define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
320 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
324 define <16 x i32> @test_ctlz_d(<16 x i32> %a) {
325 ; CHECK-LABEL: test_ctlz_d
327 %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false)
331 declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) nounwind readonly
333 define <8 x i64> @test_ctlz_q(<8 x i64> %a) {
334 ; CHECK-LABEL: test_ctlz_q
336 %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false)
340 declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) nounwind readonly
342 define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
343 ; CHECK: vblendmps %zmm1, %zmm0
344 %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
345 ret <16 x float> %res
348 declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly
350 define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
351 ; CHECK: vblendmpd %zmm1, %zmm0
352 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1]
353 ret <8 x double> %res
356 define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
357 ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop
358 ; CHECK: vblendmpd (%
359 %b = load <8 x double>* %ptr
360 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
361 ret <8 x double> %res
363 declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly
365 define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
367 %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1]
370 declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
372 define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
374 %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1]
377 declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
379 define <8 x i32> @test_cvtpd2udq(<8 x double> %a) {
380 ;CHECK: vcvtpd2udq {ru-sae}{{.*}}encoding: [0x62,0xf1,0xfc,0x58,0x79,0xc0]
381 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %a, <8 x i32>zeroinitializer, i8 -1, i32 2)
384 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
386 define <16 x i32> @test_cvtps2udq(<16 x float> %a) {
387 ;CHECK: vcvtps2udq {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x79,0xc0]
388 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %a, <16 x i32>zeroinitializer, i16 -1, i32 1)
391 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
393 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
394 ;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
395 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
398 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
400 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
401 ;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
402 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
405 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
408 define <16 x float> @test_cvtdq2ps(<16 x i32> %a) {
409 ;CHECK: vcvtdq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x5b,0xc0]
410 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1)
413 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
415 define <16 x float> @test_cvtudq2ps(<16 x i32> %a) {
416 ;CHECK: vcvtudq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7f,0x38,0x7a,0xc0]
417 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1)
420 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
422 define <8 x double> @test_cvtdq2pd(<8 x i32> %a) {
423 ;CHECK: vcvtdq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0xe6,0xc0]
424 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1)
427 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
429 define <8 x double> @test_cvtudq2pd(<8 x i32> %a) {
430 ;CHECK: vcvtudq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0x7a,0xc0]
431 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1)
434 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
437 define <16 x float> @test_vmaxps(<16 x float> %a0, <16 x float> %a1) {
439 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1,
440 <16 x float>zeroinitializer, i16 -1, i32 4)
441 ret <16 x float> %res
443 declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>,
444 <16 x float>, i16, i32)
446 define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
448 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
449 <8 x double>zeroinitializer, i8 -1, i32 4)
450 ret <8 x double> %res
452 declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
453 <8 x double>, i8, i32)
455 define <16 x float> @test_vminps(<16 x float> %a0, <16 x float> %a1) {
457 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1,
458 <16 x float>zeroinitializer, i16 -1, i32 4)
459 ret <16 x float> %res
461 declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>,
462 <16 x float>, i16, i32)
464 define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
466 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
467 <8 x double>zeroinitializer, i8 -1, i32 4)
468 ret <8 x double> %res
470 declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>,
471 <8 x double>, i8, i32)
473 define <8 x float> @test_cvtpd2ps(<8 x double> %a) {
474 ;CHECK: vcvtpd2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0xfd,0x38,0x5a,0xc0]
475 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %a, <8 x float>zeroinitializer, i8 -1, i32 1)
478 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
480 define <16 x i32> @test_pabsd(<16 x i32> %a) {
481 ;CHECK: vpabsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xc0]
482 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %a, <16 x i32>zeroinitializer, i16 -1)
485 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
487 define <8 x i64> @test_pabsq(<8 x i64> %a) {
488 ;CHECK: vpabsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xc0]
489 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %a, <8 x i64>zeroinitializer, i8 -1)
492 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
494 define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) {
495 ; CHECK: vpmaxsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1]
496 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1,
497 <8 x i64>zeroinitializer, i8 -1)
500 declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
502 define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) {
503 ; CHECK: vpminud {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1]
504 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1,
505 <16 x i32>zeroinitializer, i16 -1)
508 declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
510 define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
511 ; CHECK: vpmaxsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1]
512 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1,
513 <16 x i32>zeroinitializer, i16 -1)
516 declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
518 define <8 x i64> @test_vpmuludq(<16 x i32> %a0, <16 x i32> %a1) {
519 ; CHECK: vpmuludq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
520 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a0, <16 x i32> %a1,
521 <8 x i64>zeroinitializer, i8 -1)
524 declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
526 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
527 ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
528 %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
531 declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
533 define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) {
534 ; CHECK: vptestmd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
535 %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
538 declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
540 define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
541 ; CHECK: vmovups {{.*}}encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07]
542 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
546 declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
548 define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
549 ; CHECK: vmovupd {{.*}}encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07]
550 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
554 declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
556 define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
557 ; CHECK-LABEL: test_mask_store_aligned_ps:
559 ; CHECK-NEXT: kmovw %esi, %k1
560 ; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1}
562 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
566 declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
568 define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
569 ; CHECK-LABEL: test_mask_store_aligned_pd:
571 ; CHECK-NEXT: kmovw %esi, %k1
572 ; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1}
574 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
578 declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
580 define <16 x float> @test_maskz_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
581 ; CHECK-LABEL: test_maskz_load_aligned_ps:
583 ; CHECK-NEXT: kmovw %esi, %k1
584 ; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z}
586 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
587 ret <16 x float> %res
590 declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
592 define <8 x double> @test_maskz_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
593 ; CHECK-LABEL: test_maskz_load_aligned_pd:
595 ; CHECK-NEXT: kmovw %esi, %k1
596 ; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z}
598 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
599 ret <8 x double> %res
602 declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
604 define <16 x float> @test_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
605 ; CHECK-LABEL: test_load_aligned_ps:
607 ; CHECK-NEXT: vmovaps (%rdi), %zmm0
609 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
610 ret <16 x float> %res
613 define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
614 ; CHECK-LABEL: test_load_aligned_pd:
616 ; CHECK-NEXT: vmovapd (%rdi), %zmm0
618 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
619 ret <8 x double> %res
622 define <16 x float> @test_vpermt2ps(<16 x float>%x, <16 x float>%y, <16 x i32>%perm) {
623 ; CHECK: vpermt2ps {{.*}}encoding: [0x62,0xf2,0x6d,0x48,0x7f,0xc1]
624 %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 -1)
625 ret <16 x float> %res
628 define <16 x float> @test_vpermt2ps_mask(<16 x float>%x, <16 x float>%y, <16 x i32>%perm, i16 %mask) {
629 ; CHECK-LABEL: test_vpermt2ps_mask:
630 ; CHECK: vpermt2ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x7f,0xc1]
631 %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 %mask)
632 ret <16 x float> %res
635 declare <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
637 define <8 x i64> @test_vmovntdqa(i8 *%x) {
638 ; CHECK-LABEL: test_vmovntdqa:
639 ; CHECK: vmovntdqa (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x07]
640 %res = call <8 x i64> @llvm.x86.avx512.movntdqa(i8* %x)
644 declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
646 define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
647 ; CHECK-LABEL: test_valign_q:
648 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
649 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> zeroinitializer, i8 -1)
653 define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
654 ; CHECK-LABEL: test_mask_valign_q:
655 ; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
656 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> %src, i8 %mask)
660 declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i8, <8 x i64>, i8)
662 define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
663 ; CHECK-LABEL: test_maskz_valign_d:
664 ; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
665 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i8 5, <16 x i32> zeroinitializer, i16 %mask)
669 declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i8, <16 x i32>, i16)
671 define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
672 ; CHECK-LABEL: test_mask_store_ss
673 ; CHECK: vmovss %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x11,0x07]
674 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
678 declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
680 define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
681 ; CHECK-LABEL: test_pcmpeq_d
682 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
683 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
687 define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
688 ; CHECK-LABEL: test_mask_pcmpeq_d
689 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
690 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
694 declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
696 define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
697 ; CHECK-LABEL: test_pcmpeq_q
698 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
699 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
703 define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
704 ; CHECK-LABEL: test_mask_pcmpeq_q
705 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
706 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
710 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
712 define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
713 ; CHECK-LABEL: test_pcmpgt_d
714 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 ##
715 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
719 define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
720 ; CHECK-LABEL: test_mask_pcmpgt_d
721 ; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ##
722 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
726 declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
728 define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
729 ; CHECK-LABEL: test_pcmpgt_q
730 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 ##
731 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
735 define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
736 ; CHECK-LABEL: test_mask_pcmpgt_q
737 ; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ##
738 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
742 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
744 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
745 ; CHECK_LABEL: test_cmp_d_512
746 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
747 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
748 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
749 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 ##
750 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
751 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
752 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 ##
753 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
754 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
755 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 ##
756 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
757 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
758 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 ##
759 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
760 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
761 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 ##
762 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
763 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
764 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 ##
765 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
766 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
767 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 ##
768 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
769 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
773 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
774 ; CHECK_LABEL: test_mask_cmp_d_512
775 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
776 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
777 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
778 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 {%k1} ##
779 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
780 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
781 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 {%k1} ##
782 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
783 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
784 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} ##
785 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
786 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
787 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} ##
788 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
789 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
790 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ##
791 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
792 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
793 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 {%k1} ##
794 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
795 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
796 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 {%k1} ##
797 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
798 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
802 declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
804 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
805 ; CHECK_LABEL: test_ucmp_d_512
806 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 ##
807 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
808 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
809 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 ##
810 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
811 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
812 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 ##
813 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
814 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
815 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 ##
816 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
817 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
818 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 ##
819 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
820 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
821 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 ##
822 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
823 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
824 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 ##
825 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
826 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
827 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 ##
828 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
829 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
833 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
834 ; CHECK_LABEL: test_mask_ucmp_d_512
835 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 {%k1} ##
836 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
837 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
838 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ##
839 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
840 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
841 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 {%k1} ##
842 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
843 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
844 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} ##
845 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
846 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
847 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} ##
848 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
849 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
850 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} ##
851 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
852 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
853 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} ##
854 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
855 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
856 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 {%k1} ##
857 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
858 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
862 declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
864 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
865 ; CHECK_LABEL: test_cmp_q_512
866 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
867 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
868 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
869 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 ##
870 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
871 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
872 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 ##
873 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
874 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
875 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 ##
876 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
877 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
878 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 ##
879 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
880 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
881 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 ##
882 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
883 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
884 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 ##
885 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
886 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
887 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 ##
888 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
889 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
893 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
894 ; CHECK_LABEL: test_mask_cmp_q_512
895 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
896 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
897 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
898 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 {%k1} ##
899 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
900 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
901 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ##
902 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
903 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
904 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} ##
905 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
906 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
907 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ##
908 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
909 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
910 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ##
911 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
912 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
913 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} ##
914 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
915 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
916 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 {%k1} ##
917 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
918 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
922 declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
924 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
925 ; CHECK_LABEL: test_ucmp_q_512
926 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 ##
927 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
928 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
929 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 ##
930 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
931 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
932 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 ##
933 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
934 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
935 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 ##
936 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
937 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
938 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 ##
939 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
940 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
941 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 ##
942 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
943 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
944 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 ##
945 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
946 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
947 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 ##
948 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
949 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
953 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
954 ; CHECK_LABEL: test_mask_ucmp_q_512
955 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 {%k1} ##
956 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
957 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
958 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ##
959 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
960 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
961 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ##
962 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
963 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
964 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} ##
965 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
966 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
967 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} ##
968 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
969 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
970 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ##
971 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
972 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
973 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ##
974 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
975 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
976 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 {%k1} ##
977 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
978 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
982 declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
984 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
985 ; CHECK-LABEL: test_mask_vextractf32x4:
986 ; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
987 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i8 2, <4 x float> %b, i8 %mask)
991 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i8, <4 x float>, i8)
993 define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
994 ; CHECK-LABEL: test_mask_vextracti64x4:
995 ; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
996 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i8 2, <4 x i64> %b, i8 %mask)
1000 declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i8, <4 x i64>, i8)
1002 define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
1003 ; CHECK-LABEL: test_maskz_vextracti32x4:
1004 ; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
1005 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i8 2, <4 x i32> zeroinitializer, i8 %mask)
1009 declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i8, <4 x i32>, i8)
1011 define <4 x double> @test_vextractf64x4(<8 x double> %a) {
1012 ; CHECK-LABEL: test_vextractf64x4:
1013 ; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ##
1014 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i8 2, <4 x double> zeroinitializer, i8 -1)
1015 ret <4 x double> %res
1018 declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8)
1020 define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
1021 ; CHECK-LABEL: test_x86_avx512_pslli_d
1023 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1027 define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1028 ; CHECK-LABEL: test_x86_avx512_mask_pslli_d
1029 ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1}
1030 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1034 define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
1035 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d
1036 ; CHECK: vpslld $7, %zmm0, %zmm0 {%k1} {z}
1037 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1041 declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1043 define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
1044 ; CHECK-LABEL: test_x86_avx512_pslli_q
1046 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1050 define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1051 ; CHECK-LABEL: test_x86_avx512_mask_pslli_q
1052 ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1}
1053 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1057 define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
1058 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q
1059 ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
1060 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1064 declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1066 define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
1067 ; CHECK-LABEL: test_x86_avx512_psrli_d
1069 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1073 define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1074 ; CHECK-LABEL: test_x86_avx512_mask_psrli_d
1075 ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1}
1076 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1080 define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
1081 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d
1082 ; CHECK: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
1083 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1087 declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1089 define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
1090 ; CHECK-LABEL: test_x86_avx512_psrli_q
1092 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1096 define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1097 ; CHECK-LABEL: test_x86_avx512_mask_psrli_q
1098 ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1}
1099 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1103 define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
1104 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q
1105 ; CHECK: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
1106 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1110 declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1112 define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
1113 ; CHECK-LABEL: test_x86_avx512_psrai_d
1115 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1119 define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1120 ; CHECK-LABEL: test_x86_avx512_mask_psrai_d
1121 ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1}
1122 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1126 define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
1127 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d
1128 ; CHECK: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
1129 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1133 declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1135 define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
1136 ; CHECK-LABEL: test_x86_avx512_psrai_q
1138 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1142 define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1143 ; CHECK-LABEL: test_x86_avx512_mask_psrai_q
1144 ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1}
1145 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1149 define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
1150 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q
1151 ; CHECK: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
1152 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1156 declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1158 define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
1159 ; CHECK-LABEL: test_x86_avx512_psll_d
1161 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1165 define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1166 ; CHECK-LABEL: test_x86_avx512_mask_psll_d
1167 ; CHECK: vpslld %xmm1, %zmm0, %zmm2 {%k1}
1168 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1172 define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1173 ; CHECK-LABEL: test_x86_avx512_maskz_psll_d
1174 ; CHECK: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
1175 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1179 declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1181 define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
1182 ; CHECK-LABEL: test_x86_avx512_psll_q
1184 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1188 define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1189 ; CHECK-LABEL: test_x86_avx512_mask_psll_q
1190 ; CHECK: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
1191 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1195 define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1196 ; CHECK-LABEL: test_x86_avx512_maskz_psll_q
1197 ; CHECK: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
1198 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1202 declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1204 define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
1205 ; CHECK-LABEL: test_x86_avx512_psrl_d
1207 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1211 define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1212 ; CHECK-LABEL: test_x86_avx512_mask_psrl_d
1213 ; CHECK: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
1214 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1218 define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1219 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d
1220 ; CHECK: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
1221 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1225 declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1227 define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
1228 ; CHECK-LABEL: test_x86_avx512_psrl_q
1230 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1234 define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1235 ; CHECK-LABEL: test_x86_avx512_mask_psrl_q
1236 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
1237 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1241 define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1242 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q
1243 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
1244 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1248 declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1250 define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
1251 ; CHECK-LABEL: test_x86_avx512_psra_d
1253 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1257 define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1258 ; CHECK-LABEL: test_x86_avx512_mask_psra_d
1259 ; CHECK: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
1260 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1264 define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1265 ; CHECK-LABEL: test_x86_avx512_maskz_psra_d
1266 ; CHECK: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
1267 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1271 declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1273 define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
1274 ; CHECK-LABEL: test_x86_avx512_psra_q
1276 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1280 define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1281 ; CHECK-LABEL: test_x86_avx512_mask_psra_q
1282 ; CHECK: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
1283 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1287 define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1288 ; CHECK-LABEL: test_x86_avx512_maskz_psra_q
1289 ; CHECK: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
1290 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1294 declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1296 define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
1297 ; CHECK-LABEL: test_x86_avx512_psllv_d
1299 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1303 define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1304 ; CHECK-LABEL: test_x86_avx512_mask_psllv_d
1305 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
1306 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1310 define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1311 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d
1312 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1313 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1317 declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1319 define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
1320 ; CHECK-LABEL: test_x86_avx512_psllv_q
1322 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1326 define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1327 ; CHECK-LABEL: test_x86_avx512_mask_psllv_q
1328 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
1329 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1333 define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1334 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q
1335 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1336 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1340 declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1343 define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
1344 ; CHECK-LABEL: test_x86_avx512_psrav_d
1346 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1350 define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1351 ; CHECK-LABEL: test_x86_avx512_mask_psrav_d
1352 ; CHECK: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
1353 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1357 define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1358 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d
1359 ; CHECK: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z}
1360 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1364 declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1366 define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
1367 ; CHECK-LABEL: test_x86_avx512_psrav_q
1369 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1373 define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1374 ; CHECK-LABEL: test_x86_avx512_mask_psrav_q
1375 ; CHECK: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
1376 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1380 define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1381 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q
1382 ; CHECK: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z}
1383 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1387 declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1389 define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
1390 ; CHECK-LABEL: test_x86_avx512_psrlv_d
1392 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1396 define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1397 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d
1398 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
1399 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1403 define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1404 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d
1405 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1406 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1410 declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1412 define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
1413 ; CHECK-LABEL: test_x86_avx512_psrlv_q
1415 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1419 define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1420 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q
1421 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
1422 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1426 define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1427 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q
1428 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1429 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1433 declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1435 define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
1436 ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop
1438 %b = load <8 x i64>* %ptr
1439 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)