1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
4 ;SKX-LABEL: zext_8x8mem_to_8x16:
6 ;SKX-NEXT: vpmovw2m %xmm0, %k1
7 ;SKX-NEXT: vpmovzxbw (%rdi), %xmm0 {%k1} {z}
9 define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
10 %a = load <8 x i8>,<8 x i8> *%i,align 1
11 %x = zext <8 x i8> %a to <8 x i16>
12 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
16 ;SKX-LABEL: sext_8x8mem_to_8x16:
18 ;SKX-NEXT: vpmovw2m %xmm0, %k1
19 ;SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z}
21 define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
22 %a = load <8 x i8>,<8 x i8> *%i,align 1
23 %x = sext <8 x i8> %a to <8 x i16>
24 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
28 ;SKX-LABEL: zext_16x8mem_to_16x16:
30 ;SKX-NEXT: vpmovb2m %xmm0, %k1
31 ;SKX-NEXT: vpmovzxbw (%rdi), %ymm0 {%k1} {z}
33 define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
34 %a = load <16 x i8>,<16 x i8> *%i,align 1
35 %x = zext <16 x i8> %a to <16 x i16>
36 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
40 ;SKX-LABEL: sext_16x8mem_to_16x16:
42 ;SKX-NEXT: vpmovb2m %xmm0, %k1
43 ;SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z}
45 define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
46 %a = load <16 x i8>,<16 x i8> *%i,align 1
47 %x = sext <16 x i8> %a to <16 x i16>
48 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
52 ;SKX-LABEL: zext_16x8_to_16x16:
54 ;SKX-NEXT: vpmovzxbw %xmm0, %ymm0
56 define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
57 %x = zext <16 x i8> %a to <16 x i16>
61 ;SKX-LABEL: zext_16x8_to_16x16_mask:
63 ;SKX-NEXT: vpmovb2m %xmm1, %k1
64 ;SKX-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z}
66 define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
67 %x = zext <16 x i8> %a to <16 x i16>
68 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
72 ;SKX-LABEL: sext_16x8_to_16x16:
74 ;SKX-NEXT: vpmovsxbw %xmm0, %ymm0
76 define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
77 %x = sext <16 x i8> %a to <16 x i16>
81 ;SKX-LABEL: sext_16x8_to_16x16_mask:
83 ;SKX-NEXT: vpmovb2m %xmm1, %k1
84 ;SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z}
86 define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
87 %x = sext <16 x i8> %a to <16 x i16>
88 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
92 ;SKX-LABEL: zext_32x8mem_to_32x16:
94 ;SKX-NEXT: vpmovb2m %ymm0, %k1
95 ;SKX-NEXT: vpmovzxbw (%rdi), %zmm0 {%k1} {z}
97 define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
98 %a = load <32 x i8>,<32 x i8> *%i,align 1
99 %x = zext <32 x i8> %a to <32 x i16>
100 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
104 ;SKX-LABEL: sext_32x8mem_to_32x16:
106 ;SKX-NEXT: vpmovb2m %ymm0, %k1
107 ;SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z}
109 define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
110 %a = load <32 x i8>,<32 x i8> *%i,align 1
111 %x = sext <32 x i8> %a to <32 x i16>
112 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
116 ;SKX-LABEL: zext_32x8_to_32x16:
118 ;SKX-NEXT: vpmovzxbw %ymm0, %zmm0
120 define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
121 %x = zext <32 x i8> %a to <32 x i16>
125 ;SKX-LABEL: zext_32x8_to_32x16_mask:
127 ;SKX-NEXT: vpmovb2m %ymm1, %k1
128 ;SKX-NEXT: vpmovzxbw %ymm0, %zmm0 {%k1} {z}
130 define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
131 %x = zext <32 x i8> %a to <32 x i16>
132 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
136 ;SKX-LABEL: sext_32x8_to_32x16:
138 ;SKX-NEXT: vpmovsxbw %ymm0, %zmm0
140 define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
141 %x = sext <32 x i8> %a to <32 x i16>
145 ;SKX-LABEL: sext_32x8_to_32x16_mask:
147 ;SKX-NEXT: vpmovb2m %ymm1, %k1
148 ;SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z}
150 define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
151 %x = sext <32 x i8> %a to <32 x i16>
152 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
156 ;SKX-LABEL: zext_4x8mem_to_4x32:
158 ;SKX-NEXT: vpmovd2m %xmm0, %k1
159 ;SKX-NEXT: vpmovzxbd (%rdi), %xmm0 {%k1} {z}
161 define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
162 %a = load <4 x i8>,<4 x i8> *%i,align 1
163 %x = zext <4 x i8> %a to <4 x i32>
164 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
168 ;SKX-LABEL: sext_4x8mem_to_4x32:
170 ;SKX-NEXT: vpmovd2m %xmm0, %k1
171 ;SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z}
173 define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
174 %a = load <4 x i8>,<4 x i8> *%i,align 1
175 %x = sext <4 x i8> %a to <4 x i32>
176 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
180 ;SKX-LABEL: zext_8x8mem_to_8x32:
182 ;SKX-NEXT: vpmovw2m %xmm0, %k1
183 ;SKX-NEXT: vpmovzxbd (%rdi), %ymm0 {%k1} {z}
185 define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
186 %a = load <8 x i8>,<8 x i8> *%i,align 1
187 %x = zext <8 x i8> %a to <8 x i32>
188 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
192 ;SKX-LABEL: sext_8x8mem_to_8x32:
194 ;SKX-NEXT: vpmovw2m %xmm0, %k1
195 ;SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z}
197 define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
198 %a = load <8 x i8>,<8 x i8> *%i,align 1
199 %x = sext <8 x i8> %a to <8 x i32>
200 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
204 ;KNL-LABEL: zext_16x8mem_to_16x32:
205 ;KNL: vpmovzxbd (%rdi), %zmm0 {%k1} {z}
207 define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
208 %a = load <16 x i8>,<16 x i8> *%i,align 1
209 %x = zext <16 x i8> %a to <16 x i32>
210 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
214 ;KNL-LABEL: sext_16x8mem_to_16x32:
215 ;KNL: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
217 define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
218 %a = load <16 x i8>,<16 x i8> *%i,align 1
219 %x = sext <16 x i8> %a to <16 x i32>
220 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
224 ;KNL-LABEL: zext_16x8_to_16x32_mask:
225 ;KNL: vpmovzxbd %xmm0, %zmm0 {%k1} {z}
227 define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
228 %x = zext <16 x i8> %a to <16 x i32>
229 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
233 ;KNL-LABEL: sext_16x8_to_16x32_mask:
234 ;KNL: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
236 define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
237 %x = sext <16 x i8> %a to <16 x i32>
238 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
242 ; KNL-LABEL: zext_16x8_to_16x32
243 ; KNL: vpmovzxbd {{.*}}%zmm
245 define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
246 %x = zext <16 x i8> %i to <16 x i32>
250 ; KNL-LABEL: sext_16x8_to_16x32
251 ; KNL: vpmovsxbd {{.*}}%zmm
253 define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
254 %x = sext <16 x i8> %i to <16 x i32>
258 ;SKX-LABEL: zext_2x8mem_to_2x64:
260 ;SKX-NEXT: vpmovq2m %xmm0, %k1
261 ;SKX-NEXT: vpmovzxbq (%rdi), %xmm0 {%k1} {z}
263 define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
264 %a = load <2 x i8>,<2 x i8> *%i,align 1
265 %x = zext <2 x i8> %a to <2 x i64>
266 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
269 ;SKX-LABEL: sext_2x8mem_to_2x64mask:
271 ;SKX-NEXT: vpmovq2m %xmm0, %k1
272 ;SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z}
274 define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
275 %a = load <2 x i8>,<2 x i8> *%i,align 1
276 %x = sext <2 x i8> %a to <2 x i64>
277 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
280 ;SKX-LABEL: sext_2x8mem_to_2x64:
282 ;SKX-NEXT: vpmovsxbq (%rdi), %xmm0
284 define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
285 %a = load <2 x i8>,<2 x i8> *%i,align 1
286 %x = sext <2 x i8> %a to <2 x i64>
290 ;SKX-LABEL: zext_4x8mem_to_4x64:
292 ;SKX-NEXT: vpmovd2m %xmm0, %k1
293 ;SKX-NEXT: vpmovzxbq (%rdi), %ymm0 {%k1} {z}
295 define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
296 %a = load <4 x i8>,<4 x i8> *%i,align 1
297 %x = zext <4 x i8> %a to <4 x i64>
298 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
302 ;SKX-LABEL: sext_4x8mem_to_4x64mask:
304 ;SKX-NEXT: vpmovd2m %xmm0, %k1
305 ;SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z}
307 define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
308 %a = load <4 x i8>,<4 x i8> *%i,align 1
309 %x = sext <4 x i8> %a to <4 x i64>
310 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
314 ;SKX-LABEL: sext_4x8mem_to_4x64:
316 ;SKX-NEXT: vpmovsxbq (%rdi), %ymm0
318 define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
319 %a = load <4 x i8>,<4 x i8> *%i,align 1
320 %x = sext <4 x i8> %a to <4 x i64>
324 ;KNL-LABEL: zext_8x8mem_to_8x64:
325 ;KNL: vpmovzxbq (%rdi), %zmm0 {%k1} {z}
327 define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
328 %a = load <8 x i8>,<8 x i8> *%i,align 1
329 %x = zext <8 x i8> %a to <8 x i64>
330 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
334 ;KNL-LABEL: sext_8x8mem_to_8x64mask:
335 ;KNL: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
337 define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
338 %a = load <8 x i8>,<8 x i8> *%i,align 1
339 %x = sext <8 x i8> %a to <8 x i64>
340 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
344 ;KNL-LABEL: sext_8x8mem_to_8x64:
345 ;KNL: vpmovsxbq (%rdi), %zmm0
347 define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
348 %a = load <8 x i8>,<8 x i8> *%i,align 1
349 %x = sext <8 x i8> %a to <8 x i64>
353 ;SKX-LABEL: zext_4x16mem_to_4x32:
355 ;SKX-NEXT: vpmovd2m %xmm0, %k1
356 ;SKX-NEXT: vpmovzxwd (%rdi), %xmm0 {%k1} {z}
358 define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
359 %a = load <4 x i16>,<4 x i16> *%i,align 1
360 %x = zext <4 x i16> %a to <4 x i32>
361 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
365 ;SKX-LABEL: sext_4x16mem_to_4x32mask:
367 ;SKX-NEXT: vpmovd2m %xmm0, %k1
368 ;SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z}
370 define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
371 %a = load <4 x i16>,<4 x i16> *%i,align 1
372 %x = sext <4 x i16> %a to <4 x i32>
373 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
377 ;SKX-LABEL: sext_4x16mem_to_4x32:
379 ;SKX-NEXT: vpmovsxwd (%rdi), %xmm0
381 define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
382 %a = load <4 x i16>,<4 x i16> *%i,align 1
383 %x = sext <4 x i16> %a to <4 x i32>
388 ;SKX-LABEL: zext_8x16mem_to_8x32:
390 ;SKX-NEXT: vpmovw2m %xmm0, %k1
391 ;SKX-NEXT: vpmovzxwd (%rdi), %ymm0 {%k1} {z}
393 define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
394 %a = load <8 x i16>,<8 x i16> *%i,align 1
395 %x = zext <8 x i16> %a to <8 x i32>
396 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
400 ;SKX-LABEL: sext_8x16mem_to_8x32mask:
402 ;SKX-NEXT: vpmovw2m %xmm0, %k1
403 ;SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z}
405 define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
406 %a = load <8 x i16>,<8 x i16> *%i,align 1
407 %x = sext <8 x i16> %a to <8 x i32>
408 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
412 ;SKX-LABEL: sext_8x16mem_to_8x32:
414 ;SKX-NEXT: vpmovsxwd (%rdi), %ymm0
416 define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
417 %a = load <8 x i16>,<8 x i16> *%i,align 1
418 %x = sext <8 x i16> %a to <8 x i32>
422 ;SKX-LABEL: zext_8x16_to_8x32mask:
424 ;SKX-NEXT: vpmovw2m %xmm1, %k1
425 ;SKX-NEXT: vpmovzxwd %xmm0, %ymm0 {%k1} {z}
427 define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
428 %x = zext <8 x i16> %a to <8 x i32>
429 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
433 ;SKX-LABEL: zext_8x16_to_8x32:
435 ;SKX-NEXT: vpmovzxwd %xmm0, %ymm0
437 define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
438 %x = zext <8 x i16> %a to <8 x i32>
442 ;SKX-LABEL: zext_16x16mem_to_16x32:
443 ;KNL-LABEL: zext_16x16mem_to_16x32:
445 ;SKX-NEXT: vpmovb2m %xmm0, %k1
446 ;SKX-NEXT: vpmovzxwd (%rdi), %zmm0 {%k1} {z}
447 ;KNL: vpmovzxwd (%rdi), %zmm0 {%k1} {z}
449 define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
450 %a = load <16 x i16>,<16 x i16> *%i,align 1
451 %x = zext <16 x i16> %a to <16 x i32>
452 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
456 ;SKX-LABEL: sext_16x16mem_to_16x32mask:
457 ;KNL-LABEL: sext_16x16mem_to_16x32mask:
459 ;SKX-NEXT: vpmovb2m %xmm0, %k1
460 ;SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
461 ;KNL: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
463 define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
464 %a = load <16 x i16>,<16 x i16> *%i,align 1
465 %x = sext <16 x i16> %a to <16 x i32>
466 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
470 ;SKX-LABEL: sext_16x16mem_to_16x32:
471 ;KNL-LABEL: sext_16x16mem_to_16x32:
473 ;SKX-NEXT: vpmovsxwd (%rdi), %zmm0
474 ;KNL: vpmovsxwd (%rdi), %zmm0
476 define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
477 %a = load <16 x i16>,<16 x i16> *%i,align 1
478 %x = sext <16 x i16> %a to <16 x i32>
481 ;SKX-LABEL: zext_16x16_to_16x32mask:
482 ;KNL-LABEL: zext_16x16_to_16x32mask:
484 ;SKX-NEXT: vpmovb2m %xmm1, %k1
485 ;SKX-NEXT: vpmovzxwd %ymm0, %zmm0 {%k1} {z}
486 ;KNL: vpmovzxwd %ymm0, %zmm0 {%k1} {z}
488 define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
489 %x = zext <16 x i16> %a to <16 x i32>
490 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
494 ;SKX-LABEL: zext_16x16_to_16x32:
495 ;KNL-LABEL: zext_16x16_to_16x32:
497 ;SKX-NEXT: vpmovzxwd %ymm0, %zmm0
498 ;KNL: vpmovzxwd %ymm0, %zmm0
500 define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
501 %x = zext <16 x i16> %a to <16 x i32>
505 ;SKX-LABEL: zext_2x16mem_to_2x64:
507 ;SKX-NEXT: vpmovq2m %xmm0, %k1
508 ;SKX-NEXT: vpmovzxwq (%rdi), %xmm0 {%k1} {z}
510 define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
511 %a = load <2 x i16>,<2 x i16> *%i,align 1
512 %x = zext <2 x i16> %a to <2 x i64>
513 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
517 ;SKX-LABEL: sext_2x16mem_to_2x64mask:
519 ;SKX-NEXT: vpmovq2m %xmm0, %k1
520 ;SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z}
522 define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
523 %a = load <2 x i16>,<2 x i16> *%i,align 1
524 %x = sext <2 x i16> %a to <2 x i64>
525 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
529 ;SKX-LABEL: sext_2x16mem_to_2x64:
531 ;SKX-NEXT: vpmovsxwq (%rdi), %xmm0
533 define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
534 %a = load <2 x i16>,<2 x i16> *%i,align 1
535 %x = sext <2 x i16> %a to <2 x i64>
539 ;SKX-LABEL: zext_4x16mem_to_4x64:
541 ;SKX-NEXT: vpmovd2m %xmm0, %k1
542 ;SKX-NEXT: vpmovzxwq (%rdi), %ymm0 {%k1} {z}
544 define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
545 %a = load <4 x i16>,<4 x i16> *%i,align 1
546 %x = zext <4 x i16> %a to <4 x i64>
547 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
551 ;SKX-LABEL: sext_4x16mem_to_4x64mask:
553 ;SKX-NEXT: vpmovd2m %xmm0, %k1
554 ;SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z}
556 define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
557 %a = load <4 x i16>,<4 x i16> *%i,align 1
558 %x = sext <4 x i16> %a to <4 x i64>
559 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
563 ;SKX-LABEL: sext_4x16mem_to_4x64:
565 ;SKX-NEXT: vpmovsxwq (%rdi), %ymm0
567 define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
568 %a = load <4 x i16>,<4 x i16> *%i,align 1
569 %x = sext <4 x i16> %a to <4 x i64>
573 ;SKX-LABEL: zext_8x16mem_to_8x64:
574 ;KNL-LABEL: zext_8x16mem_to_8x64:
576 ;SKX-NEXT: vpmovw2m %xmm0, %k1
577 ;SKX-NEXT: vpmovzxwq (%rdi), %zmm0 {%k1} {z}
578 ;KNL: vpmovzxwq (%rdi), %zmm0 {%k1} {z}
580 define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
581 %a = load <8 x i16>,<8 x i16> *%i,align 1
582 %x = zext <8 x i16> %a to <8 x i64>
583 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
587 ;SKX-LABEL: sext_8x16mem_to_8x64mask:
588 ;KNL-LABEL: sext_8x16mem_to_8x64mask:
590 ;SKX-NEXT: vpmovw2m %xmm0, %k1
591 ;SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
592 ;KNL: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
594 define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
595 %a = load <8 x i16>,<8 x i16> *%i,align 1
596 %x = sext <8 x i16> %a to <8 x i64>
597 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
601 ;SKX-LABEL: sext_8x16mem_to_8x64:
602 ;KNL-LABEL: sext_8x16mem_to_8x64:
604 ;SKX-NEXT: vpmovsxwq (%rdi), %zmm0
605 ;KNL: vpmovsxwq (%rdi), %zmm0
607 define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
608 %a = load <8 x i16>,<8 x i16> *%i,align 1
609 %x = sext <8 x i16> %a to <8 x i64>
613 ;SKX-LABEL: zext_8x16_to_8x64mask:
614 ;KNL-LABEL: zext_8x16_to_8x64mask:
616 ;SKX-NEXT: vpmovw2m %xmm1, %k1
617 ;SKX-NEXT: vpmovzxwq %xmm0, %zmm0 {%k1} {z}
618 ;KNL: vpmovzxwq %xmm0, %zmm0 {%k1} {z}
620 define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
621 %x = zext <8 x i16> %a to <8 x i64>
622 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
626 ;SKX-LABEL: zext_8x16_to_8x64:
627 ;KNL-LABEL: zext_8x16_to_8x64:
629 ;SKX-NEXT: vpmovzxwq %xmm0, %zmm0
630 ;KNL: vpmovzxwq %xmm0, %zmm0
633 define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
634 %ret = zext <8 x i16> %a to <8 x i64>
638 ;SKX-LABEL: zext_2x32mem_to_2x64:
640 ;SKX-NEXT: vpmovq2m %xmm0, %k1
641 ;SKX-NEXT: vpmovzxdq (%rdi), %xmm0 {%k1} {z}
643 define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
644 %a = load <2 x i32>,<2 x i32> *%i,align 1
645 %x = zext <2 x i32> %a to <2 x i64>
646 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
650 ;SKX-LABEL: sext_2x32mem_to_2x64mask:
652 ;SKX-NEXT: vpmovq2m %xmm0, %k1
653 ;SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z}
655 define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
656 %a = load <2 x i32>,<2 x i32> *%i,align 1
657 %x = sext <2 x i32> %a to <2 x i64>
658 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
662 ;SKX-LABEL: sext_2x32mem_to_2x64:
664 ;SKX-NEXT: vpmovsxdq (%rdi), %xmm0
666 define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
667 %a = load <2 x i32>,<2 x i32> *%i,align 1
668 %x = sext <2 x i32> %a to <2 x i64>
672 ;SKX-LABEL: zext_4x32mem_to_4x64:
674 ;SKX-NEXT: vpmovd2m %xmm0, %k1
675 ;SKX-NEXT: vpmovzxdq (%rdi), %ymm0 {%k1} {z}
677 define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
678 %a = load <4 x i32>,<4 x i32> *%i,align 1
679 %x = zext <4 x i32> %a to <4 x i64>
680 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
684 ;SKX-LABEL: sext_4x32mem_to_4x64mask:
686 ;SKX-NEXT: vpmovd2m %xmm0, %k1
687 ;SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z}
689 define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
690 %a = load <4 x i32>,<4 x i32> *%i,align 1
691 %x = sext <4 x i32> %a to <4 x i64>
692 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
696 ;SKX-LABEL: sext_4x32mem_to_4x64:
698 ;SKX-NEXT: vpmovsxdq (%rdi), %ymm0
700 define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
701 %a = load <4 x i32>,<4 x i32> *%i,align 1
702 %x = sext <4 x i32> %a to <4 x i64>
706 ;SKX-LABEL: sext_4x32_to_4x64:
708 ;SKX-NEXT: vpmovsxdq %xmm0, %ymm0
710 define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
711 %x = sext <4 x i32> %a to <4 x i64>
715 ;SKX-LABEL: zext_4x32_to_4x64mask:
717 ;SKX-NEXT: vpmovd2m %xmm1, %k1
718 ;SKX-NEXT: vpmovzxdq %xmm0, %ymm0 {%k1} {z}
720 define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
721 %x = zext <4 x i32> %a to <4 x i64>
722 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
726 ;SKX-LABEL: zext_8x32mem_to_8x64:
728 ;SKX-NEXT: vpmovw2m %xmm0, %k1
729 ;SKX-NEXT: vpmovzxdq (%rdi), %zmm0 {%k1} {z}
731 define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
732 %a = load <8 x i32>,<8 x i32> *%i,align 1
733 %x = zext <8 x i32> %a to <8 x i64>
734 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
738 ;SKX-LABEL: sext_8x32mem_to_8x64mask:
740 ;SKX-NEXT: vpmovw2m %xmm0, %k1
741 ;SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
743 define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
744 %a = load <8 x i32>,<8 x i32> *%i,align 1
745 %x = sext <8 x i32> %a to <8 x i64>
746 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
750 ;SKX-LABEL: sext_8x32mem_to_8x64:
751 ;KNL-LABEL: sext_8x32mem_to_8x64:
753 ;SKX-NEXT: vpmovsxdq (%rdi), %zmm0
754 ;KNL: vpmovsxdq (%rdi), %zmm0
756 define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
757 %a = load <8 x i32>,<8 x i32> *%i,align 1
758 %x = sext <8 x i32> %a to <8 x i64>
762 ;SKX-LABEL: sext_8x32_to_8x64:
763 ;KNL-LABEL: sext_8x32_to_8x64:
765 ;SKX-NEXT: vpmovsxdq %ymm0, %zmm0
766 ;KNL: vpmovsxdq %ymm0, %zmm0
768 define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
769 %x = sext <8 x i32> %a to <8 x i64>
773 ;SKX-LABEL: zext_8x32_to_8x64mask:
774 ;KNL-LABEL: zext_8x32_to_8x64mask:
776 ;SKX-NEXT: vpmovw2m %xmm1, %k1
777 ;SKX-NEXT: vpmovzxdq %ymm0, %zmm0 {%k1} {z}
778 ;KNL: vpmovzxdq %ymm0, %zmm0 {%k1} {z}
780 define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
781 %x = zext <8 x i32> %a to <8 x i64>
782 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
785 ;KNL-LABEL: fptrunc_test
786 ;KNL: vcvtpd2ps {{.*}}%zmm
788 define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
789 %b = fptrunc <8 x double> %a to <8 x float>
793 ;KNL-LABEL: fpext_test
794 ;KNL: vcvtps2pd {{.*}}%zmm
796 define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
797 %b = fpext <8 x float> %a to <8 x double>
801 ; KNL-LABEL: zext_16i1_to_16xi32
802 ; KNL: vpbroadcastd LCP{{.*}}(%rip), %zmm0 {%k1} {z}
804 define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
805 %a = bitcast i16 %b to <16 x i1>
806 %c = zext <16 x i1> %a to <16 x i32>
810 ; KNL-LABEL: zext_8i1_to_8xi64
811 ; KNL: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
813 define <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
814 %a = bitcast i8 %b to <8 x i1>
815 %c = zext <8 x i1> %a to <8 x i64>
819 ; KNL-LABEL: trunc_16i8_to_16i1
824 ; SKX-LABEL: trunc_16i8_to_16i1
826 define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
827 %mask_b = trunc <16 x i8>%a to <16 x i1>
828 %mask = bitcast <16 x i1> %mask_b to i16
832 ; KNL-LABEL: trunc_16i32_to_16i1
836 ; SKX-LABEL: trunc_16i32_to_16i1
838 define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
839 %mask_b = trunc <16 x i32>%a to <16 x i1>
840 %mask = bitcast <16 x i1> %mask_b to i16
844 ; SKX-LABEL: trunc_4i32_to_4i1
848 define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
849 %mask_a = trunc <4 x i32>%a to <4 x i1>
850 %mask_b = trunc <4 x i32>%b to <4 x i1>
851 %a_and_b = and <4 x i1>%mask_a, %mask_b
852 %res = sext <4 x i1>%a_and_b to <4 x i32>
856 ; KNL-LABEL: trunc_8i16_to_8i1
858 ; KNL: vpandq LCP{{.*}}(%rip){1to8}
862 ; SKX-LABEL: trunc_8i16_to_8i1
864 define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
865 %mask_b = trunc <8 x i16>%a to <8 x i1>
866 %mask = bitcast <8 x i1> %mask_b to i8
870 ; KNL-LABEL: sext_8i1_8i32
871 ; KNL: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
874 define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
875 %x = icmp slt <8 x i32> %a1, %a2
876 %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
877 %y = sext <8 x i1> %x1 to <8 x i32>
882 ; KNL-LABEL: trunc_i32_to_i1
884 ; KNL: kmovw %eax, %k1
886 define i16 @trunc_i32_to_i1(i32 %a) {
887 %a_i = trunc i32 %a to i1
888 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
889 %res = bitcast <16 x i1> %maskv to i16
893 ; KNL-LABEL: sext_8i1_8i16
896 define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
897 %x = icmp slt <8 x i32> %a1, %a2
898 %y = sext <8 x i1> %x to <8 x i16>
902 ; KNL-LABEL: sext_16i1_16i32
905 define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
906 %x = icmp slt <16 x i32> %a1, %a2
907 %y = sext <16 x i1> %x to <16 x i32>
911 ; KNL-LABEL: sext_8i1_8i64
914 define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
915 %x = icmp slt <8 x i32> %a1, %a2
916 %y = sext <8 x i1> %x to <8 x i64>
920 ; KNL-LABEL: @extload_v8i64
922 define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
923 %sign_load = load <8 x i8>, <8 x i8>* %a
924 %c = sext <8 x i8> %sign_load to <8 x i64>
925 store <8 x i64> %c, <8 x i64>* %res
930 ;SKX: vmovdqu16 %zmm0, %zmm3 {%k1}
931 ;SKX-NEXT: kshiftrq $32, %k1, %k1
932 ;SKX-NEXT: vmovdqu16 %zmm1, %zmm2 {%k1}
933 define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
934 %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer