1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=SKX
4 attributes #0 = { nounwind }
6 define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) #0 {
7 ; ALL-LABEL: trunc_16x32_to_16x8:
9 ; ALL-NEXT: vpmovdb %zmm0, %xmm0
11 %x = trunc <16 x i32> %i to <16 x i8>
15 define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) #0 {
16 ; ALL-LABEL: trunc_8x64_to_8x16:
18 ; ALL-NEXT: vpmovqw %zmm0, %xmm0
20 %x = trunc <8 x i64> %i to <8 x i16>
24 define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) #0 {
25 ; ALL-LABEL: trunc_v16i32_to_v16i16:
27 ; ALL-NEXT: vpmovdw %zmm0, %ymm0
29 %1 = trunc <16 x i32> %x to <16 x i16>
33 define <8 x i8> @trunc_qb_512(<8 x i64> %i) #0 {
34 ; ALL-LABEL: trunc_qb_512:
36 ; ALL-NEXT: vpmovqw %zmm0, %xmm0
38 %x = trunc <8 x i64> %i to <8 x i8>
42 define void @trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) #0 {
43 ; ALL-LABEL: trunc_qb_512_mem:
45 ; ALL-NEXT: vpmovqb %zmm0, (%rdi)
47 %x = trunc <8 x i64> %i to <8 x i8>
48 store <8 x i8> %x, <8 x i8>* %res
52 define <4 x i8> @trunc_qb_256(<4 x i64> %i) #0 {
53 ; KNL-LABEL: trunc_qb_256:
55 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
58 ; SKX-LABEL: trunc_qb_256:
60 ; SKX-NEXT: vpmovqd %ymm0, %xmm0
62 %x = trunc <4 x i64> %i to <4 x i8>
66 define void @trunc_qb_256_mem(<4 x i64> %i, <4 x i8>* %res) #0 {
67 ; KNL-LABEL: trunc_qb_256_mem:
69 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
70 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
71 ; KNL-NEXT: vmovd %xmm0, (%rdi)
74 ; SKX-LABEL: trunc_qb_256_mem:
76 ; SKX-NEXT: vpmovqb %ymm0, (%rdi)
78 %x = trunc <4 x i64> %i to <4 x i8>
79 store <4 x i8> %x, <4 x i8>* %res
83 define <2 x i8> @trunc_qb_128(<2 x i64> %i) #0 {
84 ; ALL-LABEL: trunc_qb_128:
87 %x = trunc <2 x i64> %i to <2 x i8>
91 define void @trunc_qb_128_mem(<2 x i64> %i, <2 x i8>* %res) #0 {
92 ; KNL-LABEL: trunc_qb_128_mem:
94 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
95 ; KNL-NEXT: vmovd %xmm0, %eax
96 ; KNL-NEXT: movw %ax, (%rdi)
99 ; SKX-LABEL: trunc_qb_128_mem:
101 ; SKX-NEXT: vpmovqb %xmm0, (%rdi)
103 %x = trunc <2 x i64> %i to <2 x i8>
104 store <2 x i8> %x, <2 x i8>* %res
108 define <8 x i16> @trunc_qw_512(<8 x i64> %i) #0 {
109 ; ALL-LABEL: trunc_qw_512:
111 ; ALL-NEXT: vpmovqw %zmm0, %xmm0
113 %x = trunc <8 x i64> %i to <8 x i16>
117 define void @trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) #0 {
118 ; ALL-LABEL: trunc_qw_512_mem:
120 ; ALL-NEXT: vpmovqw %zmm0, (%rdi)
122 %x = trunc <8 x i64> %i to <8 x i16>
123 store <8 x i16> %x, <8 x i16>* %res
127 define <4 x i16> @trunc_qw_256(<4 x i64> %i) #0 {
128 ; KNL-LABEL: trunc_qw_256:
130 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
133 ; SKX-LABEL: trunc_qw_256:
135 ; SKX-NEXT: vpmovqd %ymm0, %xmm0
137 %x = trunc <4 x i64> %i to <4 x i16>
141 define void @trunc_qw_256_mem(<4 x i64> %i, <4 x i16>* %res) #0 {
142 ; KNL-LABEL: trunc_qw_256_mem:
144 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
145 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
146 ; KNL-NEXT: vmovq %xmm0, (%rdi)
149 ; SKX-LABEL: trunc_qw_256_mem:
151 ; SKX-NEXT: vpmovqw %ymm0, (%rdi)
153 %x = trunc <4 x i64> %i to <4 x i16>
154 store <4 x i16> %x, <4 x i16>* %res
158 define <2 x i16> @trunc_qw_128(<2 x i64> %i) #0 {
159 ; ALL-LABEL: trunc_qw_128:
162 %x = trunc <2 x i64> %i to <2 x i16>
166 define void @trunc_qw_128_mem(<2 x i64> %i, <2 x i16>* %res) #0 {
167 ; KNL-LABEL: trunc_qw_128_mem:
169 ; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
170 ; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
171 ; KNL-NEXT: vmovd %xmm0, (%rdi)
174 ; SKX-LABEL: trunc_qw_128_mem:
176 ; SKX-NEXT: vpmovqw %xmm0, (%rdi)
178 %x = trunc <2 x i64> %i to <2 x i16>
179 store <2 x i16> %x, <2 x i16>* %res
183 define <8 x i32> @trunc_qd_512(<8 x i64> %i) #0 {
184 ; ALL-LABEL: trunc_qd_512:
186 ; ALL-NEXT: vpmovqd %zmm0, %ymm0
188 %x = trunc <8 x i64> %i to <8 x i32>
192 define void @trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) #0 {
193 ; ALL-LABEL: trunc_qd_512_mem:
195 ; ALL-NEXT: vpmovqd %zmm0, (%rdi)
197 %x = trunc <8 x i64> %i to <8 x i32>
198 store <8 x i32> %x, <8 x i32>* %res
202 define <4 x i32> @trunc_qd_256(<4 x i64> %i) #0 {
203 ; KNL-LABEL: trunc_qd_256:
205 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
208 ; SKX-LABEL: trunc_qd_256:
210 ; SKX-NEXT: vpmovqd %ymm0, %xmm0
212 %x = trunc <4 x i64> %i to <4 x i32>
216 define void @trunc_qd_256_mem(<4 x i64> %i, <4 x i32>* %res) #0 {
217 ; KNL-LABEL: trunc_qd_256_mem:
219 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
220 ; KNL-NEXT: vmovaps %xmm0, (%rdi)
223 ; SKX-LABEL: trunc_qd_256_mem:
225 ; SKX-NEXT: vpmovqd %ymm0, (%rdi)
227 %x = trunc <4 x i64> %i to <4 x i32>
228 store <4 x i32> %x, <4 x i32>* %res
232 define <2 x i32> @trunc_qd_128(<2 x i64> %i) #0 {
233 ; ALL-LABEL: trunc_qd_128:
236 %x = trunc <2 x i64> %i to <2 x i32>
240 define void @trunc_qd_128_mem(<2 x i64> %i, <2 x i32>* %res) #0 {
241 ; KNL-LABEL: trunc_qd_128_mem:
243 ; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
244 ; KNL-NEXT: vmovq %xmm0, (%rdi)
247 ; SKX-LABEL: trunc_qd_128_mem:
249 ; SKX-NEXT: vpmovqd %xmm0, (%rdi)
251 %x = trunc <2 x i64> %i to <2 x i32>
252 store <2 x i32> %x, <2 x i32>* %res
256 define <16 x i8> @trunc_db_512(<16 x i32> %i) #0 {
257 ; ALL-LABEL: trunc_db_512:
259 ; ALL-NEXT: vpmovdb %zmm0, %xmm0
261 %x = trunc <16 x i32> %i to <16 x i8>
265 define void @trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) #0 {
266 ; ALL-LABEL: trunc_db_512_mem:
268 ; ALL-NEXT: vpmovdb %zmm0, (%rdi)
270 %x = trunc <16 x i32> %i to <16 x i8>
271 store <16 x i8> %x, <16 x i8>* %res
275 define <8 x i8> @trunc_db_256(<8 x i32> %i) #0 {
276 ; KNL-LABEL: trunc_db_256:
278 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
281 ; SKX-LABEL: trunc_db_256:
283 ; SKX-NEXT: vpmovdw %ymm0, %xmm0
285 %x = trunc <8 x i32> %i to <8 x i8>
289 define void @trunc_db_256_mem(<8 x i32> %i, <8 x i8>* %res) #0 {
290 ; KNL-LABEL: trunc_db_256_mem:
292 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
293 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
294 ; KNL-NEXT: vmovq %xmm0, (%rdi)
297 ; SKX-LABEL: trunc_db_256_mem:
299 ; SKX-NEXT: vpmovdb %ymm0, (%rdi)
301 %x = trunc <8 x i32> %i to <8 x i8>
302 store <8 x i8> %x, <8 x i8>* %res
306 define <4 x i8> @trunc_db_128(<4 x i32> %i) #0 {
307 ; ALL-LABEL: trunc_db_128:
310 %x = trunc <4 x i32> %i to <4 x i8>
314 define void @trunc_db_128_mem(<4 x i32> %i, <4 x i8>* %res) #0 {
315 ; KNL-LABEL: trunc_db_128_mem:
317 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
318 ; KNL-NEXT: vmovd %xmm0, (%rdi)
321 ; SKX-LABEL: trunc_db_128_mem:
323 ; SKX-NEXT: vpmovdb %xmm0, (%rdi)
325 %x = trunc <4 x i32> %i to <4 x i8>
326 store <4 x i8> %x, <4 x i8>* %res
330 define <16 x i16> @trunc_dw_512(<16 x i32> %i) #0 {
331 ; ALL-LABEL: trunc_dw_512:
333 ; ALL-NEXT: vpmovdw %zmm0, %ymm0
335 %x = trunc <16 x i32> %i to <16 x i16>
339 define void @trunc_dw_512_mem(<16 x i32> %i, <16 x i16>* %res) #0 {
340 ; ALL-LABEL: trunc_dw_512_mem:
342 ; ALL-NEXT: vpmovdw %zmm0, (%rdi)
344 %x = trunc <16 x i32> %i to <16 x i16>
345 store <16 x i16> %x, <16 x i16>* %res
349 define <8 x i16> @trunc_dw_256(<8 x i32> %i) #0 {
350 ; KNL-LABEL: trunc_dw_256:
352 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
355 ; SKX-LABEL: trunc_dw_256:
357 ; SKX-NEXT: vpmovdw %ymm0, %xmm0
359 %x = trunc <8 x i32> %i to <8 x i16>
363 define void @trunc_dw_256_mem(<8 x i32> %i, <8 x i16>* %res) #0 {
364 ; KNL-LABEL: trunc_dw_256_mem:
366 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
367 ; KNL-NEXT: vmovaps %xmm0, (%rdi)
370 ; SKX-LABEL: trunc_dw_256_mem:
372 ; SKX-NEXT: vpmovdw %ymm0, (%rdi)
374 %x = trunc <8 x i32> %i to <8 x i16>
375 store <8 x i16> %x, <8 x i16>* %res
379 define void @trunc_dw_128_mem(<4 x i32> %i, <4 x i16>* %res) #0 {
380 ; KNL-LABEL: trunc_dw_128_mem:
382 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
383 ; KNL-NEXT: vmovq %xmm0, (%rdi)
386 ; SKX-LABEL: trunc_dw_128_mem:
388 ; SKX-NEXT: vpmovdw %xmm0, (%rdi)
390 %x = trunc <4 x i32> %i to <4 x i16>
391 store <4 x i16> %x, <4 x i16>* %res
395 define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
396 ; KNL-LABEL: trunc_wb_512:
398 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
399 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
400 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
401 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
402 ; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
405 ; SKX-LABEL: trunc_wb_512:
407 ; SKX-NEXT: vpmovwb %zmm0, %ymm0
409 %x = trunc <32 x i16> %i to <32 x i8>
413 define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
414 ; KNL-LABEL: trunc_wb_512_mem:
416 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
417 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
418 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
419 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
420 ; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
421 ; KNL-NEXT: vmovaps %ymm0, (%rdi)
424 ; SKX-LABEL: trunc_wb_512_mem:
426 ; SKX-NEXT: vpmovwb %zmm0, (%rdi)
428 %x = trunc <32 x i16> %i to <32 x i8>
429 store <32 x i8> %x, <32 x i8>* %res
433 define <16 x i8> @trunc_wb_256(<16 x i16> %i) #0 {
434 ; KNL-LABEL: trunc_wb_256:
436 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
437 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
440 ; SKX-LABEL: trunc_wb_256:
442 ; SKX-NEXT: vpmovwb %ymm0, %xmm0
444 %x = trunc <16 x i16> %i to <16 x i8>
448 define void @trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) #0 {
449 ; KNL-LABEL: trunc_wb_256_mem:
451 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
452 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
453 ; KNL-NEXT: vmovaps %xmm0, (%rdi)
456 ; SKX-LABEL: trunc_wb_256_mem:
458 ; SKX-NEXT: vpmovwb %ymm0, (%rdi)
460 %x = trunc <16 x i16> %i to <16 x i8>
461 store <16 x i8> %x, <16 x i8>* %res
465 define <8 x i8> @trunc_wb_128(<8 x i16> %i) #0 {
466 ; ALL-LABEL: trunc_wb_128:
469 %x = trunc <8 x i16> %i to <8 x i8>
473 define void @trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) #0 {
474 ; KNL-LABEL: trunc_wb_128_mem:
476 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
477 ; KNL-NEXT: vmovq %xmm0, (%rdi)
480 ; SKX-LABEL: trunc_wb_128_mem:
482 ; SKX-NEXT: vpmovwb %xmm0, (%rdi)
484 %x = trunc <8 x i16> %i to <8 x i8>
485 store <8 x i8> %x, <8 x i8>* %res