1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
3 declare i32 @llvm.x86.avx512.kortestz(i16, i16) nounwind readnone
7 define i32 @test_kortestz(i16 %a0, i16 %a1) {
8 %res = call i32 @llvm.x86.avx512.kortestz(i16 %a0, i16 %a1)
12 declare i32 @llvm.x86.avx512.kortestc(i16, i16) nounwind readnone
13 ; CHECK: test_kortestc
16 define i32 @test_kortestc(i16 %a0, i16 %a1) {
17 %res = call i32 @llvm.x86.avx512.kortestc(i16 %a0, i16 %a1)
21 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
23 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
26 declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>) nounwind readnone
28 define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
30 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1]
33 declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>) nounwind readnone
35 define <16 x float> @test_rcp28_ps_512(<16 x float> %a0) {
37 %res = call <16 x float> @llvm.x86.avx512.rcp28.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
40 declare <16 x float> @llvm.x86.avx512.rcp28.ps.512(<16 x float>) nounwind readnone
42 define <8 x double> @test_rcp28_pd_512(<8 x double> %a0) {
44 %res = call <8 x double> @llvm.x86.avx512.rcp28.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1]
47 declare <8 x double> @llvm.x86.avx512.rcp28.pd.512(<8 x double>) nounwind readnone
49 define <8 x double> @test_rndscale_pd_512(<8 x double> %a0) {
51 %res = call <8 x double> @llvm.x86.avx512.rndscale.pd.512(<8 x double> %a0, i32 7) ; <<8 x double>> [#uses=1]
54 declare <8 x double> @llvm.x86.avx512.rndscale.pd.512(<8 x double>, i32) nounwind readnone
57 define <16 x float> @test_rndscale_ps_512(<16 x float> %a0) {
59 %res = call <16 x float> @llvm.x86.avx512.rndscale.ps.512(<16 x float> %a0, i32 7) ; <<16 x float>> [#uses=1]
62 declare <16 x float> @llvm.x86.avx512.rndscale.ps.512(<16 x float>, i32) nounwind readnone
65 define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
67 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
70 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>) nounwind readnone
72 define <16 x float> @test_rsqrt28_ps_512(<16 x float> %a0) {
74 %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
77 declare <16 x float> @llvm.x86.avx512.rsqrt28.ps.512(<16 x float>) nounwind readnone
79 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
81 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
84 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>) nounwind readnone
86 define <4 x float> @test_rsqrt28_ss(<4 x float> %a0) {
88 %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
91 declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>) nounwind readnone
93 define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
95 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
98 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>) nounwind readnone
100 define <4 x float> @test_rcp28_ss(<4 x float> %a0) {
102 %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
105 declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>) nounwind readnone
107 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
109 %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1]
110 ret <8 x double> %res
112 declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>) nounwind readnone
114 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
116 %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
117 ret <16 x float> %res
119 declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>) nounwind readnone
121 define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
123 %res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
126 declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone
128 define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1) {
130 %res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
131 ret <2 x double> %res
133 declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone
135 define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
137 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
140 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
142 define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
144 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
145 ret <2 x double> %res
147 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
149 define <2 x double> @test_x86_avx512_cvtusi642sd(<2 x double> %a0, i64 %a1) {
150 ; CHECK: vcvtusi2sdqz
151 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
152 ret <2 x double> %res
154 declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64) nounwind readnone
156 define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
158 %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
161 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
164 define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
166 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
169 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
172 define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
174 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
177 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
180 define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
182 %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
185 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
187 define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
189 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
192 declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
194 define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
196 %res = call <16 x float> @llvm.x86.avx512.vcvtph2ps.512(<16 x i16> %a0)
197 ret <16 x float> %res
199 declare <16 x float> @llvm.x86.avx512.vcvtph2ps.512(<16 x i16>) nounwind readonly
202 define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
204 %res = call <16 x i16> @llvm.x86.avx512.vcvtps2ph.512(<16 x float> %a0, i32 0)
207 declare <16 x i16> @llvm.x86.avx512.vcvtps2ph.512(<16 x float>, i32) nounwind readonly
209 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
210 ; CHECK: vbroadcastss
211 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
212 ret <16 x float> %res
214 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
216 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
217 ; CHECK: vbroadcastsd
218 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
219 ret <8 x double> %res
221 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
223 define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) {
224 ; CHECK: vbroadcastss
225 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1]
226 ret <16 x float> %res
228 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly
230 define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
231 ; CHECK: vbroadcastsd
232 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1]
233 ret <8 x double> %res
235 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
237 define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32> %a0) {
238 ; CHECK: vpbroadcastd
239 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1]
242 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly
244 define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
245 ; CHECK: vpbroadcastd
246 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
249 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
251 define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) {
252 ; CHECK: vpbroadcastq
253 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1]
256 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly
258 define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
259 ; CHECK: vpbroadcastq
260 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
263 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
265 define <16 x i32> @test_x86_pmaxu_d(<16 x i32> %a0, <16 x i32> %a1) {
267 %res = call <16 x i32> @llvm.x86.avx512.pmaxu.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
270 declare <16 x i32> @llvm.x86.avx512.pmaxu.d(<16 x i32>, <16 x i32>) nounwind readonly
272 define <8 x i64> @test_x86_pmaxu_q(<8 x i64> %a0, <8 x i64> %a1) {
274 %res = call <8 x i64> @llvm.x86.avx512.pmaxu.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
277 declare <8 x i64> @llvm.x86.avx512.pmaxu.q(<8 x i64>, <8 x i64>) nounwind readonly
279 define <16 x i32> @test_x86_pmaxs_d(<16 x i32> %a0, <16 x i32> %a1) {
281 %res = call <16 x i32> @llvm.x86.avx512.pmaxs.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
284 declare <16 x i32> @llvm.x86.avx512.pmaxs.d(<16 x i32>, <16 x i32>) nounwind readonly
286 define <8 x i64> @test_x86_pmaxs_q(<8 x i64> %a0, <8 x i64> %a1) {
288 %res = call <8 x i64> @llvm.x86.avx512.pmaxs.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
291 declare <8 x i64> @llvm.x86.avx512.pmaxs.q(<8 x i64>, <8 x i64>) nounwind readonly
293 define <16 x i32> @test_x86_pminu_d(<16 x i32> %a0, <16 x i32> %a1) {
295 %res = call <16 x i32> @llvm.x86.avx512.pminu.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
298 declare <16 x i32> @llvm.x86.avx512.pminu.d(<16 x i32>, <16 x i32>) nounwind readonly
300 define <8 x i64> @test_x86_pminu_q(<8 x i64> %a0, <8 x i64> %a1) {
302 %res = call <8 x i64> @llvm.x86.avx512.pminu.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
305 declare <8 x i64> @llvm.x86.avx512.pminu.q(<8 x i64>, <8 x i64>) nounwind readonly
307 define <16 x i32> @test_x86_pmins_d(<16 x i32> %a0, <16 x i32> %a1) {
309 %res = call <16 x i32> @llvm.x86.avx512.pmins.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]
312 declare <16 x i32> @llvm.x86.avx512.pmins.d(<16 x i32>, <16 x i32>) nounwind readonly
314 define <8 x i64> @test_x86_pmins_q(<8 x i64> %a0, <8 x i64> %a1) {
316 %res = call <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64> %a0, <8 x i64> %a1) ; <<8 x i64>> [#uses=1]
319 declare <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64>, <8 x i64>) nounwind readonly
321 define <16 x i32> @test_conflict_d(<16 x i32> %a) {
323 %res = call <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32> %a)
326 declare <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32>) nounwind readonly
328 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
329 ; CHECK: vpconflictd %zmm0, %zmm0 {%k1} {z}
330 %vmask = bitcast i16 %mask to <16 x i1>
331 %res = call <16 x i32> @llvm.x86.avx512.conflict.d.maskz.512(<16 x i1> %vmask, <16 x i32> %a)
334 declare <16 x i32> @llvm.x86.avx512.conflict.d.maskz.512(<16 x i1>,<16 x i32>) nounwind readonly
336 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
337 ; CHECK: vpconflictq {{.*}} {%k1}
338 %vmask = bitcast i8 %mask to <8 x i1>
339 %res = call <8 x i64> @llvm.x86.avx512.conflict.q.mask.512(<8 x i64> %b, <8 x i1> %vmask, <8 x i64> %a)
342 declare <8 x i64> @llvm.x86.avx512.conflict.q.mask.512(<8 x i64>, <8 x i1>,<8 x i64>) nounwind readonly
344 define <16 x float> @test_x86_mskblend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
346 %m0 = bitcast i16 %a0 to <16 x i1>
347 %res = call <16 x float> @llvm.x86.avx512.mskblend.ps.512(<16 x i1> %m0, <16 x float> %a1, <16 x float> %a2) ; <<16 x float>> [#uses=1]
348 ret <16 x float> %res
350 declare <16 x float> @llvm.x86.avx512.mskblend.ps.512(<16 x i1> %a0, <16 x float> %a1, <16 x float> %a2) nounwind readonly
352 define <8 x double> @test_x86_mskblend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
354 %m0 = bitcast i8 %a0 to <8 x i1>
355 %res = call <8 x double> @llvm.x86.avx512.mskblend.pd.512(<8 x i1> %m0, <8 x double> %a1, <8 x double> %a2) ; <<8 x double>> [#uses=1]
356 ret <8 x double> %res
359 define <8 x double> @test_x86_mskblend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
360 ; CHECK-LABEL: test_x86_mskblend_pd_512_memop
361 ; CHECK: vblendmpd {{.*}}, {{%zmm[0-9]}}, {{%zmm[0-9]}} {%k1}
362 %vmask = bitcast i8 %mask to <8 x i1>
363 %b = load <8 x double>* %ptr
364 %res = call <8 x double> @llvm.x86.avx512.mskblend.pd.512(<8 x i1> %vmask, <8 x double> %a, <8 x double> %b) ; <<8 x double>> [#uses=1]
365 ret <8 x double> %res
367 declare <8 x double> @llvm.x86.avx512.mskblend.pd.512(<8 x i1> %a0, <8 x double> %a1, <8 x double> %a2) nounwind readonly
369 define <16 x i32> @test_x86_mskblend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
371 %m0 = bitcast i16 %a0 to <16 x i1>
372 %res = call <16 x i32> @llvm.x86.avx512.mskblend.d.512(<16 x i1> %m0, <16 x i32> %a1, <16 x i32> %a2) ; <<16 x i32>> [#uses=1]
375 declare <16 x i32> @llvm.x86.avx512.mskblend.d.512(<16 x i1> %a0, <16 x i32> %a1, <16 x i32> %a2) nounwind readonly
377 define <8 x i64> @test_x86_mskblend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
379 %m0 = bitcast i8 %a0 to <8 x i1>
380 %res = call <8 x i64> @llvm.x86.avx512.mskblend.q.512(<8 x i1> %m0, <8 x i64> %a1, <8 x i64> %a2) ; <<8 x i64>> [#uses=1]
383 declare <8 x i64> @llvm.x86.avx512.mskblend.q.512(<8 x i1> %a0, <8 x i64> %a1, <8 x i64> %a2) nounwind readonly