1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
3 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
5 define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
6 ; AVX512BW-LABEL: test_pcmpeq_b:
8 ; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
9 ; AVX512BW-NEXT: kmovq %k0, %rax
12 ; AVX512F-32-LABEL: test_pcmpeq_b:
14 ; AVX512F-32-NEXT: subl $12, %esp
15 ; AVX512F-32-NEXT: .Ltmp0:
16 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
17 ; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
18 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
19 ; AVX512F-32-NEXT: movl (%esp), %eax
20 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
21 ; AVX512F-32-NEXT: addl $12, %esp
22 ; AVX512F-32-NEXT: retl
23 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
27 define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
28 ; AVX512BW-LABEL: test_mask_pcmpeq_b:
30 ; AVX512BW-NEXT: kmovq %rdi, %k1
31 ; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
32 ; AVX512BW-NEXT: kmovq %k0, %rax
35 ; AVX512F-32-LABEL: test_mask_pcmpeq_b:
37 ; AVX512F-32-NEXT: subl $12, %esp
38 ; AVX512F-32-NEXT: .Ltmp1:
39 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
40 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
41 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
42 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
43 ; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
44 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
45 ; AVX512F-32-NEXT: movl (%esp), %eax
46 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
47 ; AVX512F-32-NEXT: addl $12, %esp
48 ; AVX512F-32-NEXT: retl
49 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
53 declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64)
55 define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) {
56 ; AVX512BW-LABEL: test_pcmpeq_w:
58 ; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
59 ; AVX512BW-NEXT: kmovd %k0, %eax
62 ; AVX512F-32-LABEL: test_pcmpeq_w:
64 ; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
65 ; AVX512F-32-NEXT: kmovd %k0, %eax
66 ; AVX512F-32-NEXT: retl
67 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
71 define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
72 ; AVX512BW-LABEL: test_mask_pcmpeq_w:
74 ; AVX512BW-NEXT: kmovd %edi, %k1
75 ; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
76 ; AVX512BW-NEXT: kmovd %k0, %eax
79 ; AVX512F-32-LABEL: test_mask_pcmpeq_w:
81 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
82 ; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
83 ; AVX512F-32-NEXT: kmovd %k0, %eax
84 ; AVX512F-32-NEXT: retl
85 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
89 declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32)
91 define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) {
92 ; AVX512BW-LABEL: test_pcmpgt_b:
94 ; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0
95 ; AVX512BW-NEXT: kmovq %k0, %rax
98 ; AVX512F-32-LABEL: test_pcmpgt_b:
100 ; AVX512F-32-NEXT: subl $12, %esp
101 ; AVX512F-32-NEXT: .Ltmp2:
102 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
103 ; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0
104 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
105 ; AVX512F-32-NEXT: movl (%esp), %eax
106 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
107 ; AVX512F-32-NEXT: addl $12, %esp
108 ; AVX512F-32-NEXT: retl
109 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
113 define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
114 ; AVX512BW-LABEL: test_mask_pcmpgt_b:
116 ; AVX512BW-NEXT: kmovq %rdi, %k1
117 ; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
118 ; AVX512BW-NEXT: kmovq %k0, %rax
119 ; AVX512BW-NEXT: retq
121 ; AVX512F-32-LABEL: test_mask_pcmpgt_b:
122 ; AVX512F-32: # BB#0:
123 ; AVX512F-32-NEXT: subl $12, %esp
124 ; AVX512F-32-NEXT: .Ltmp3:
125 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
126 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
127 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
128 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
129 ; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
130 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
131 ; AVX512F-32-NEXT: movl (%esp), %eax
132 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
133 ; AVX512F-32-NEXT: addl $12, %esp
134 ; AVX512F-32-NEXT: retl
135 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
139 declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64)
141 define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) {
142 ; AVX512BW-LABEL: test_pcmpgt_w:
144 ; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
145 ; AVX512BW-NEXT: kmovd %k0, %eax
146 ; AVX512BW-NEXT: retq
148 ; AVX512F-32-LABEL: test_pcmpgt_w:
149 ; AVX512F-32: # BB#0:
150 ; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
151 ; AVX512F-32-NEXT: kmovd %k0, %eax
152 ; AVX512F-32-NEXT: retl
153 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
157 define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
158 ; AVX512BW-LABEL: test_mask_pcmpgt_w:
160 ; AVX512BW-NEXT: kmovd %edi, %k1
161 ; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
162 ; AVX512BW-NEXT: kmovd %k0, %eax
163 ; AVX512BW-NEXT: retq
165 ; AVX512F-32-LABEL: test_mask_pcmpgt_w:
166 ; AVX512F-32: # BB#0:
167 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
168 ; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
169 ; AVX512F-32-NEXT: kmovd %k0, %eax
170 ; AVX512F-32-NEXT: retl
171 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
175 declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32)
177 define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
178 ; AVX512BW-LABEL: test_cmp_b_512:
180 ; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
181 ; AVX512BW-NEXT: kmovq %k0, %rax
182 ; AVX512BW-NEXT: vpcmpltb %zmm1, %zmm0, %k0
183 ; AVX512BW-NEXT: kmovq %k0, %rcx
184 ; AVX512BW-NEXT: addq %rax, %rcx
185 ; AVX512BW-NEXT: vpcmpleb %zmm1, %zmm0, %k0
186 ; AVX512BW-NEXT: kmovq %k0, %rax
187 ; AVX512BW-NEXT: addq %rcx, %rax
188 ; AVX512BW-NEXT: vpcmpunordb %zmm1, %zmm0, %k0
189 ; AVX512BW-NEXT: kmovq %k0, %rcx
190 ; AVX512BW-NEXT: addq %rax, %rcx
191 ; AVX512BW-NEXT: vpcmpneqb %zmm1, %zmm0, %k0
192 ; AVX512BW-NEXT: kmovq %k0, %rax
193 ; AVX512BW-NEXT: addq %rcx, %rax
194 ; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k0
195 ; AVX512BW-NEXT: kmovq %k0, %rcx
196 ; AVX512BW-NEXT: addq %rax, %rcx
197 ; AVX512BW-NEXT: vpcmpnleb %zmm1, %zmm0, %k0
198 ; AVX512BW-NEXT: kmovq %k0, %rdx
199 ; AVX512BW-NEXT: addq %rcx, %rdx
200 ; AVX512BW-NEXT: vpcmpordb %zmm1, %zmm0, %k0
201 ; AVX512BW-NEXT: kmovq %k0, %rax
202 ; AVX512BW-NEXT: addq %rdx, %rax
203 ; AVX512BW-NEXT: retq
205 ; AVX512F-32-LABEL: test_cmp_b_512:
206 ; AVX512F-32: # BB#0:
207 ; AVX512F-32-NEXT: subl $68, %esp
208 ; AVX512F-32-NEXT: .Ltmp4:
209 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
210 ; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
211 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
212 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
213 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
214 ; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0
215 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
216 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
217 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
218 ; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0
219 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
220 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
221 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
222 ; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0
223 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
224 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
225 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
226 ; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0
227 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
228 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
229 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
230 ; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0
231 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
232 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
233 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
234 ; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0
235 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
236 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
237 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
238 ; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0
239 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
240 ; AVX512F-32-NEXT: addl (%esp), %eax
241 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
242 ; AVX512F-32-NEXT: addl $68, %esp
243 ; AVX512F-32-NEXT: retl
244 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
245 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
246 %ret1 = add i64 %res0, %res1
247 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
248 %ret2 = add i64 %ret1, %res2
249 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
250 %ret3 = add i64 %ret2, %res3
251 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
252 %ret4 = add i64 %ret3, %res4
253 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
254 %ret5 = add i64 %ret4, %res5
255 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
256 %ret6 = add i64 %ret5, %res6
257 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
258 %ret7 = add i64 %ret6, %res7
262 define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
263 ; AVX512BW-LABEL: test_mask_cmp_b_512:
265 ; AVX512BW-NEXT: kmovq %rdi, %k1
266 ; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
267 ; AVX512BW-NEXT: kmovq %k0, %rax
268 ; AVX512BW-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1}
269 ; AVX512BW-NEXT: kmovq %k0, %rcx
270 ; AVX512BW-NEXT: addq %rax, %rcx
271 ; AVX512BW-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1}
272 ; AVX512BW-NEXT: kmovq %k0, %rax
273 ; AVX512BW-NEXT: addq %rcx, %rax
274 ; AVX512BW-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1}
275 ; AVX512BW-NEXT: kmovq %k0, %rcx
276 ; AVX512BW-NEXT: addq %rax, %rcx
277 ; AVX512BW-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
278 ; AVX512BW-NEXT: kmovq %k0, %rax
279 ; AVX512BW-NEXT: addq %rcx, %rax
280 ; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1}
281 ; AVX512BW-NEXT: kmovq %k0, %rcx
282 ; AVX512BW-NEXT: addq %rax, %rcx
283 ; AVX512BW-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1}
284 ; AVX512BW-NEXT: kmovq %k0, %rdx
285 ; AVX512BW-NEXT: addq %rcx, %rdx
286 ; AVX512BW-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1}
287 ; AVX512BW-NEXT: kmovq %k0, %rax
288 ; AVX512BW-NEXT: addq %rdx, %rax
289 ; AVX512BW-NEXT: retq
291 ; AVX512F-32-LABEL: test_mask_cmp_b_512:
292 ; AVX512F-32: # BB#0:
293 ; AVX512F-32-NEXT: subl $68, %esp
294 ; AVX512F-32-NEXT: .Ltmp5:
295 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
296 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
297 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
298 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
299 ; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
300 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
301 ; AVX512F-32-NEXT: movl (%esp), %eax
302 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
303 ; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1}
304 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
305 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
306 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
307 ; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1}
308 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
309 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
310 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
311 ; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1}
312 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
313 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
314 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
315 ; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
316 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
317 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
318 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
319 ; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1}
320 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
321 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
322 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
323 ; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1}
324 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
325 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
326 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
327 ; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1}
328 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
329 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
330 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
331 ; AVX512F-32-NEXT: addl $68, %esp
332 ; AVX512F-32-NEXT: retl
333 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
334 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
335 %ret1 = add i64 %res0, %res1
336 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
337 %ret2 = add i64 %ret1, %res2
338 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
339 %ret3 = add i64 %ret2, %res3
340 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
341 %ret4 = add i64 %ret3, %res4
342 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
343 %ret5 = add i64 %ret4, %res5
344 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
345 %ret6 = add i64 %ret5, %res6
346 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
347 %ret7 = add i64 %ret6, %res7
351 declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
353 define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
354 ; AVX512BW-LABEL: test_ucmp_b_512:
356 ; AVX512BW-NEXT: vpcmpequb %zmm1, %zmm0, %k0
357 ; AVX512BW-NEXT: kmovq %k0, %rax
358 ; AVX512BW-NEXT: vpcmpltub %zmm1, %zmm0, %k0
359 ; AVX512BW-NEXT: kmovq %k0, %rcx
360 ; AVX512BW-NEXT: addq %rax, %rcx
361 ; AVX512BW-NEXT: vpcmpleub %zmm1, %zmm0, %k0
362 ; AVX512BW-NEXT: kmovq %k0, %rax
363 ; AVX512BW-NEXT: addq %rcx, %rax
364 ; AVX512BW-NEXT: vpcmpunordub %zmm1, %zmm0, %k0
365 ; AVX512BW-NEXT: kmovq %k0, %rcx
366 ; AVX512BW-NEXT: addq %rax, %rcx
367 ; AVX512BW-NEXT: vpcmpnequb %zmm1, %zmm0, %k0
368 ; AVX512BW-NEXT: kmovq %k0, %rax
369 ; AVX512BW-NEXT: addq %rcx, %rax
370 ; AVX512BW-NEXT: vpcmpnltub %zmm1, %zmm0, %k0
371 ; AVX512BW-NEXT: kmovq %k0, %rcx
372 ; AVX512BW-NEXT: addq %rax, %rcx
373 ; AVX512BW-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
374 ; AVX512BW-NEXT: kmovq %k0, %rdx
375 ; AVX512BW-NEXT: addq %rcx, %rdx
376 ; AVX512BW-NEXT: vpcmpordub %zmm1, %zmm0, %k0
377 ; AVX512BW-NEXT: kmovq %k0, %rax
378 ; AVX512BW-NEXT: addq %rdx, %rax
379 ; AVX512BW-NEXT: retq
381 ; AVX512F-32-LABEL: test_ucmp_b_512:
382 ; AVX512F-32: # BB#0:
383 ; AVX512F-32-NEXT: subl $68, %esp
384 ; AVX512F-32-NEXT: .Ltmp6:
385 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
386 ; AVX512F-32-NEXT: vpcmpequb %zmm1, %zmm0, %k0
387 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
388 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
389 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
390 ; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0
391 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
392 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
393 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
394 ; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0
395 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
396 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
397 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
398 ; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0
399 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
400 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
401 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
402 ; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0
403 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
404 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
405 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
406 ; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0
407 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
408 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
409 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
410 ; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
411 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
412 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
413 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
414 ; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0
415 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
416 ; AVX512F-32-NEXT: addl (%esp), %eax
417 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
418 ; AVX512F-32-NEXT: addl $68, %esp
419 ; AVX512F-32-NEXT: retl
420 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
421 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
422 %ret1 = add i64 %res0, %res1
423 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
424 %ret2 = add i64 %ret1, %res2
425 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
426 %ret3 = add i64 %ret2, %res3
427 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
428 %ret4 = add i64 %ret3, %res4
429 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
430 %ret5 = add i64 %ret4, %res5
431 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
432 %ret6 = add i64 %ret5, %res6
433 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
434 %ret7 = add i64 %ret6, %res7
438 define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
439 ; AVX512BW-LABEL: test_mask_x86_avx512_ucmp_b_512:
441 ; AVX512BW-NEXT: kmovq %rdi, %k1
442 ; AVX512BW-NEXT: vpcmpequb %zmm1, %zmm0, %k0 {%k1}
443 ; AVX512BW-NEXT: kmovq %k0, %rax
444 ; AVX512BW-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1}
445 ; AVX512BW-NEXT: kmovq %k0, %rcx
446 ; AVX512BW-NEXT: addq %rax, %rcx
447 ; AVX512BW-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1}
448 ; AVX512BW-NEXT: kmovq %k0, %rax
449 ; AVX512BW-NEXT: addq %rcx, %rax
450 ; AVX512BW-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1}
451 ; AVX512BW-NEXT: kmovq %k0, %rcx
452 ; AVX512BW-NEXT: addq %rax, %rcx
453 ; AVX512BW-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1}
454 ; AVX512BW-NEXT: kmovq %k0, %rax
455 ; AVX512BW-NEXT: addq %rcx, %rax
456 ; AVX512BW-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
457 ; AVX512BW-NEXT: kmovq %k0, %rcx
458 ; AVX512BW-NEXT: addq %rax, %rcx
459 ; AVX512BW-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
460 ; AVX512BW-NEXT: kmovq %k0, %rdx
461 ; AVX512BW-NEXT: addq %rcx, %rdx
462 ; AVX512BW-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1}
463 ; AVX512BW-NEXT: kmovq %k0, %rax
464 ; AVX512BW-NEXT: addq %rdx, %rax
465 ; AVX512BW-NEXT: retq
467 ; AVX512F-32-LABEL: test_mask_x86_avx512_ucmp_b_512:
468 ; AVX512F-32: # BB#0:
469 ; AVX512F-32-NEXT: subl $68, %esp
470 ; AVX512F-32-NEXT: .Ltmp7:
471 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
472 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
473 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
474 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
475 ; AVX512F-32-NEXT: vpcmpequb %zmm1, %zmm0, %k0 {%k1}
476 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
477 ; AVX512F-32-NEXT: movl (%esp), %eax
478 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
479 ; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1}
480 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
481 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
482 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
483 ; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1}
484 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
485 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
486 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
487 ; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1}
488 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
489 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
490 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
491 ; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1}
492 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
493 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
494 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
495 ; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
496 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
497 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
498 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
499 ; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
500 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
501 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
502 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
503 ; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1}
504 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
505 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
506 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
507 ; AVX512F-32-NEXT: addl $68, %esp
508 ; AVX512F-32-NEXT: retl
509 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
510 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
511 %ret1 = add i64 %res0, %res1
512 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
513 %ret2 = add i64 %ret1, %res2
514 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
515 %ret3 = add i64 %ret2, %res3
516 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
517 %ret4 = add i64 %ret3, %res4
518 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
519 %ret5 = add i64 %ret4, %res5
520 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
521 %ret6 = add i64 %ret5, %res6
522 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
523 %ret7 = add i64 %ret6, %res7
527 declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
529 define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
530 ; AVX512BW-LABEL: test_cmp_w_512:
532 ; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
533 ; AVX512BW-NEXT: kmovd %k0, %eax
534 ; AVX512BW-NEXT: vpcmpltw %zmm1, %zmm0, %k0
535 ; AVX512BW-NEXT: kmovd %k0, %ecx
536 ; AVX512BW-NEXT: addl %eax, %ecx
537 ; AVX512BW-NEXT: vpcmplew %zmm1, %zmm0, %k0
538 ; AVX512BW-NEXT: kmovd %k0, %eax
539 ; AVX512BW-NEXT: addl %ecx, %eax
540 ; AVX512BW-NEXT: vpcmpunordw %zmm1, %zmm0, %k0
541 ; AVX512BW-NEXT: kmovd %k0, %ecx
542 ; AVX512BW-NEXT: addl %eax, %ecx
543 ; AVX512BW-NEXT: vpcmpneqw %zmm1, %zmm0, %k0
544 ; AVX512BW-NEXT: kmovd %k0, %eax
545 ; AVX512BW-NEXT: addl %ecx, %eax
546 ; AVX512BW-NEXT: vpcmpnltw %zmm1, %zmm0, %k0
547 ; AVX512BW-NEXT: kmovd %k0, %ecx
548 ; AVX512BW-NEXT: addl %eax, %ecx
549 ; AVX512BW-NEXT: vpcmpnlew %zmm1, %zmm0, %k0
550 ; AVX512BW-NEXT: kmovd %k0, %edx
551 ; AVX512BW-NEXT: addl %ecx, %edx
552 ; AVX512BW-NEXT: vpcmpordw %zmm1, %zmm0, %k0
553 ; AVX512BW-NEXT: kmovd %k0, %eax
554 ; AVX512BW-NEXT: addl %edx, %eax
555 ; AVX512BW-NEXT: retq
557 ; AVX512F-32-LABEL: test_cmp_w_512:
558 ; AVX512F-32: # BB#0:
559 ; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
560 ; AVX512F-32-NEXT: kmovd %k0, %eax
561 ; AVX512F-32-NEXT: vpcmpltw %zmm1, %zmm0, %k0
562 ; AVX512F-32-NEXT: kmovd %k0, %ecx
563 ; AVX512F-32-NEXT: addl %eax, %ecx
564 ; AVX512F-32-NEXT: vpcmplew %zmm1, %zmm0, %k0
565 ; AVX512F-32-NEXT: kmovd %k0, %eax
566 ; AVX512F-32-NEXT: addl %ecx, %eax
567 ; AVX512F-32-NEXT: vpcmpunordw %zmm1, %zmm0, %k0
568 ; AVX512F-32-NEXT: kmovd %k0, %ecx
569 ; AVX512F-32-NEXT: addl %eax, %ecx
570 ; AVX512F-32-NEXT: vpcmpneqw %zmm1, %zmm0, %k0
571 ; AVX512F-32-NEXT: kmovd %k0, %eax
572 ; AVX512F-32-NEXT: addl %ecx, %eax
573 ; AVX512F-32-NEXT: vpcmpnltw %zmm1, %zmm0, %k0
574 ; AVX512F-32-NEXT: kmovd %k0, %ecx
575 ; AVX512F-32-NEXT: addl %eax, %ecx
576 ; AVX512F-32-NEXT: vpcmpnlew %zmm1, %zmm0, %k0
577 ; AVX512F-32-NEXT: kmovd %k0, %edx
578 ; AVX512F-32-NEXT: addl %ecx, %edx
579 ; AVX512F-32-NEXT: vpcmpordw %zmm1, %zmm0, %k0
580 ; AVX512F-32-NEXT: kmovd %k0, %eax
581 ; AVX512F-32-NEXT: addl %edx, %eax
582 ; AVX512F-32-NEXT: retl
583 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
584 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
585 %ret1 = add i32 %res0, %res1
586 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
587 %ret2 = add i32 %ret1, %res2
588 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
589 %ret3 = add i32 %ret2, %res3
590 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
591 %ret4 = add i32 %ret3, %res4
592 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
593 %ret5 = add i32 %ret4, %res5
594 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
595 %ret6 = add i32 %ret5, %res6
596 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
597 %ret7 = add i32 %ret6, %res7
601 define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
602 ; AVX512BW-LABEL: test_mask_cmp_w_512:
604 ; AVX512BW-NEXT: kmovd %edi, %k1
605 ; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
606 ; AVX512BW-NEXT: kmovd %k0, %eax
607 ; AVX512BW-NEXT: vpcmpltw %zmm1, %zmm0, %k0 {%k1}
608 ; AVX512BW-NEXT: kmovd %k0, %ecx
609 ; AVX512BW-NEXT: addl %eax, %ecx
610 ; AVX512BW-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1}
611 ; AVX512BW-NEXT: kmovd %k0, %eax
612 ; AVX512BW-NEXT: addl %ecx, %eax
613 ; AVX512BW-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 {%k1}
614 ; AVX512BW-NEXT: kmovd %k0, %ecx
615 ; AVX512BW-NEXT: addl %eax, %ecx
616 ; AVX512BW-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1}
617 ; AVX512BW-NEXT: kmovd %k0, %eax
618 ; AVX512BW-NEXT: addl %ecx, %eax
619 ; AVX512BW-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1}
620 ; AVX512BW-NEXT: kmovd %k0, %ecx
621 ; AVX512BW-NEXT: addl %eax, %ecx
622 ; AVX512BW-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 {%k1}
623 ; AVX512BW-NEXT: kmovd %k0, %edx
624 ; AVX512BW-NEXT: addl %ecx, %edx
625 ; AVX512BW-NEXT: vpcmpordw %zmm1, %zmm0, %k0 {%k1}
626 ; AVX512BW-NEXT: kmovd %k0, %eax
627 ; AVX512BW-NEXT: addl %edx, %eax
628 ; AVX512BW-NEXT: retq
630 ; AVX512F-32-LABEL: test_mask_cmp_w_512:
631 ; AVX512F-32: # BB#0:
632 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
633 ; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
634 ; AVX512F-32-NEXT: kmovd %k0, %eax
635 ; AVX512F-32-NEXT: vpcmpltw %zmm1, %zmm0, %k0 {%k1}
636 ; AVX512F-32-NEXT: kmovd %k0, %ecx
637 ; AVX512F-32-NEXT: addl %eax, %ecx
638 ; AVX512F-32-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1}
639 ; AVX512F-32-NEXT: kmovd %k0, %eax
640 ; AVX512F-32-NEXT: addl %ecx, %eax
641 ; AVX512F-32-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 {%k1}
642 ; AVX512F-32-NEXT: kmovd %k0, %ecx
643 ; AVX512F-32-NEXT: addl %eax, %ecx
644 ; AVX512F-32-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1}
645 ; AVX512F-32-NEXT: kmovd %k0, %eax
646 ; AVX512F-32-NEXT: addl %ecx, %eax
647 ; AVX512F-32-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1}
648 ; AVX512F-32-NEXT: kmovd %k0, %ecx
649 ; AVX512F-32-NEXT: addl %eax, %ecx
650 ; AVX512F-32-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 {%k1}
651 ; AVX512F-32-NEXT: kmovd %k0, %edx
652 ; AVX512F-32-NEXT: addl %ecx, %edx
653 ; AVX512F-32-NEXT: vpcmpordw %zmm1, %zmm0, %k0 {%k1}
654 ; AVX512F-32-NEXT: kmovd %k0, %eax
655 ; AVX512F-32-NEXT: addl %edx, %eax
656 ; AVX512F-32-NEXT: retl
657 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
658 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
659 %ret1 = add i32 %res0, %res1
660 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
661 %ret2 = add i32 %ret1, %res2
662 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
663 %ret3 = add i32 %ret2, %res3
664 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
665 %ret4 = add i32 %ret3, %res4
666 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
667 %ret5 = add i32 %ret4, %res5
668 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
669 %ret6 = add i32 %ret5, %res6
670 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
671 %ret7 = add i32 %ret6, %res7
675 declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
677 define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
678 ; AVX512BW-LABEL: test_ucmp_w_512:
680 ; AVX512BW-NEXT: vpcmpequw %zmm1, %zmm0, %k0
681 ; AVX512BW-NEXT: kmovd %k0, %eax
682 ; AVX512BW-NEXT: vpcmpltuw %zmm1, %zmm0, %k0
683 ; AVX512BW-NEXT: kmovd %k0, %ecx
684 ; AVX512BW-NEXT: addl %eax, %ecx
685 ; AVX512BW-NEXT: vpcmpleuw %zmm1, %zmm0, %k0
686 ; AVX512BW-NEXT: kmovd %k0, %eax
687 ; AVX512BW-NEXT: addl %ecx, %eax
688 ; AVX512BW-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0
689 ; AVX512BW-NEXT: kmovd %k0, %ecx
690 ; AVX512BW-NEXT: addl %eax, %ecx
691 ; AVX512BW-NEXT: vpcmpnequw %zmm1, %zmm0, %k0
692 ; AVX512BW-NEXT: kmovd %k0, %eax
693 ; AVX512BW-NEXT: addl %ecx, %eax
694 ; AVX512BW-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0
695 ; AVX512BW-NEXT: kmovd %k0, %ecx
696 ; AVX512BW-NEXT: addl %eax, %ecx
697 ; AVX512BW-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0
698 ; AVX512BW-NEXT: kmovd %k0, %edx
699 ; AVX512BW-NEXT: addl %ecx, %edx
700 ; AVX512BW-NEXT: vpcmporduw %zmm1, %zmm0, %k0
701 ; AVX512BW-NEXT: kmovd %k0, %eax
702 ; AVX512BW-NEXT: addl %edx, %eax
703 ; AVX512BW-NEXT: retq
705 ; AVX512F-32-LABEL: test_ucmp_w_512:
706 ; AVX512F-32: # BB#0:
707 ; AVX512F-32-NEXT: vpcmpequw %zmm1, %zmm0, %k0
708 ; AVX512F-32-NEXT: kmovd %k0, %eax
709 ; AVX512F-32-NEXT: vpcmpltuw %zmm1, %zmm0, %k0
710 ; AVX512F-32-NEXT: kmovd %k0, %ecx
711 ; AVX512F-32-NEXT: addl %eax, %ecx
712 ; AVX512F-32-NEXT: vpcmpleuw %zmm1, %zmm0, %k0
713 ; AVX512F-32-NEXT: kmovd %k0, %eax
714 ; AVX512F-32-NEXT: addl %ecx, %eax
715 ; AVX512F-32-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0
716 ; AVX512F-32-NEXT: kmovd %k0, %ecx
717 ; AVX512F-32-NEXT: addl %eax, %ecx
718 ; AVX512F-32-NEXT: vpcmpnequw %zmm1, %zmm0, %k0
719 ; AVX512F-32-NEXT: kmovd %k0, %eax
720 ; AVX512F-32-NEXT: addl %ecx, %eax
721 ; AVX512F-32-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0
722 ; AVX512F-32-NEXT: kmovd %k0, %ecx
723 ; AVX512F-32-NEXT: addl %eax, %ecx
724 ; AVX512F-32-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0
725 ; AVX512F-32-NEXT: kmovd %k0, %edx
726 ; AVX512F-32-NEXT: addl %ecx, %edx
727 ; AVX512F-32-NEXT: vpcmporduw %zmm1, %zmm0, %k0
728 ; AVX512F-32-NEXT: kmovd %k0, %eax
729 ; AVX512F-32-NEXT: addl %edx, %eax
730 ; AVX512F-32-NEXT: retl
731 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
732 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
733 %ret1 = add i32 %res0, %res1
734 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
735 %ret2 = add i32 %ret1, %res2
736 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
737 %ret3 = add i32 %ret2, %res3
738 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
739 %ret4 = add i32 %ret3, %res4
740 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
741 %ret5 = add i32 %ret4, %res5
742 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
743 %ret6 = add i32 %ret5, %res6
744 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
745 %ret7 = add i32 %ret6, %res7
749 define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
750 ; AVX512BW-LABEL: test_mask_ucmp_w_512:
752 ; AVX512BW-NEXT: kmovd %edi, %k1
753 ; AVX512BW-NEXT: vpcmpequw %zmm1, %zmm0, %k0 {%k1}
754 ; AVX512BW-NEXT: kmovd %k0, %eax
755 ; AVX512BW-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
756 ; AVX512BW-NEXT: kmovd %k0, %ecx
757 ; AVX512BW-NEXT: addl %eax, %ecx
758 ; AVX512BW-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1}
759 ; AVX512BW-NEXT: kmovd %k0, %eax
760 ; AVX512BW-NEXT: addl %ecx, %eax
761 ; AVX512BW-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1}
762 ; AVX512BW-NEXT: kmovd %k0, %ecx
763 ; AVX512BW-NEXT: addl %eax, %ecx
764 ; AVX512BW-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 {%k1}
765 ; AVX512BW-NEXT: kmovd %k0, %eax
766 ; AVX512BW-NEXT: addl %ecx, %eax
767 ; AVX512BW-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1}
768 ; AVX512BW-NEXT: kmovd %k0, %ecx
769 ; AVX512BW-NEXT: addl %eax, %ecx
770 ; AVX512BW-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1}
771 ; AVX512BW-NEXT: kmovd %k0, %edx
772 ; AVX512BW-NEXT: addl %ecx, %edx
773 ; AVX512BW-NEXT: vpcmporduw %zmm1, %zmm0, %k0 {%k1}
774 ; AVX512BW-NEXT: kmovd %k0, %eax
775 ; AVX512BW-NEXT: addl %edx, %eax
776 ; AVX512BW-NEXT: retq
778 ; AVX512F-32-LABEL: test_mask_ucmp_w_512:
779 ; AVX512F-32: # BB#0:
780 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
781 ; AVX512F-32-NEXT: vpcmpequw %zmm1, %zmm0, %k0 {%k1}
782 ; AVX512F-32-NEXT: kmovd %k0, %eax
783 ; AVX512F-32-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
784 ; AVX512F-32-NEXT: kmovd %k0, %ecx
785 ; AVX512F-32-NEXT: addl %eax, %ecx
786 ; AVX512F-32-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1}
787 ; AVX512F-32-NEXT: kmovd %k0, %eax
788 ; AVX512F-32-NEXT: addl %ecx, %eax
789 ; AVX512F-32-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1}
790 ; AVX512F-32-NEXT: kmovd %k0, %ecx
791 ; AVX512F-32-NEXT: addl %eax, %ecx
792 ; AVX512F-32-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 {%k1}
793 ; AVX512F-32-NEXT: kmovd %k0, %eax
794 ; AVX512F-32-NEXT: addl %ecx, %eax
795 ; AVX512F-32-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1}
796 ; AVX512F-32-NEXT: kmovd %k0, %ecx
797 ; AVX512F-32-NEXT: addl %eax, %ecx
798 ; AVX512F-32-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1}
799 ; AVX512F-32-NEXT: kmovd %k0, %edx
800 ; AVX512F-32-NEXT: addl %ecx, %edx
801 ; AVX512F-32-NEXT: vpcmporduw %zmm1, %zmm0, %k0 {%k1}
802 ; AVX512F-32-NEXT: kmovd %k0, %eax
803 ; AVX512F-32-NEXT: addl %edx, %eax
804 ; AVX512F-32-NEXT: retl
805 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
806 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
807 %ret1 = add i32 %res0, %res1
808 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
809 %ret2 = add i32 %ret1, %res2
810 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
811 %ret3 = add i32 %ret2, %res3
812 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
813 %ret4 = add i32 %ret3, %res4
814 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
815 %ret5 = add i32 %ret4, %res5
816 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
817 %ret6 = add i32 %ret5, %res6
818 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
819 %ret7 = add i32 %ret6, %res7
823 declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
825 declare <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8>, <64 x i8>, i64) nounwind readonly
827 define <32 x i16> @test_x86_mask_blend_w_512(i32 %mask, <32 x i16> %a1, <32 x i16> %a2) {
828 ; AVX512BW-LABEL: test_x86_mask_blend_w_512:
830 ; AVX512BW-NEXT: kmovd %edi, %k1
831 ; AVX512BW-NEXT: vpblendmw %zmm1, %zmm0, %zmm0 {%k1}
832 ; AVX512BW-NEXT: retq
834 ; AVX512F-32-LABEL: test_x86_mask_blend_w_512:
835 ; AVX512F-32: # BB#0:
836 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
837 ; AVX512F-32-NEXT: vpblendmw %zmm1, %zmm0, %zmm0 {%k1}
838 ; AVX512F-32-NEXT: retl
839 %res = call <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16> %a1, <32 x i16> %a2, i32 %mask) ; <<32 x i16>> [#uses=1]
842 declare <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16>, <32 x i16>, i32) nounwind readonly
844 define <64 x i8> @test_x86_mask_blend_b_512(i64 %a0, <64 x i8> %a1, <64 x i8> %a2) {
845 ; AVX512BW-LABEL: test_x86_mask_blend_b_512:
847 ; AVX512BW-NEXT: kmovq %rdi, %k1
848 ; AVX512BW-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k1}
849 ; AVX512BW-NEXT: retq
851 ; AVX512F-32-LABEL: test_x86_mask_blend_b_512:
852 ; AVX512F-32: # BB#0:
853 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
854 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
855 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
856 ; AVX512F-32-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k1}
857 ; AVX512F-32-NEXT: retl
858 %res = call <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8> %a1, <64 x i8> %a2, i64 %a0) ; <<64 x i8>> [#uses=1]
862 define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
863 ; AVX512BW-LABEL: test_mask_packs_epi32_rr_512:
865 ; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0
866 ; AVX512BW-NEXT: retq
868 ; AVX512F-32-LABEL: test_mask_packs_epi32_rr_512:
869 ; AVX512F-32: # BB#0:
870 ; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0
871 ; AVX512F-32-NEXT: retl
872 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
876 define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
877 ; AVX512BW-LABEL: test_mask_packs_epi32_rrk_512:
879 ; AVX512BW-NEXT: kmovd %edi, %k1
880 ; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1}
881 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
882 ; AVX512BW-NEXT: retq
884 ; AVX512F-32-LABEL: test_mask_packs_epi32_rrk_512:
885 ; AVX512F-32: # BB#0:
886 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
887 ; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1}
888 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
889 ; AVX512F-32-NEXT: retl
890 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
894 define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
895 ; AVX512BW-LABEL: test_mask_packs_epi32_rrkz_512:
897 ; AVX512BW-NEXT: kmovd %edi, %k1
898 ; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z}
899 ; AVX512BW-NEXT: retq
901 ; AVX512F-32-LABEL: test_mask_packs_epi32_rrkz_512:
902 ; AVX512F-32: # BB#0:
903 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
904 ; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z}
905 ; AVX512F-32-NEXT: retl
906 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
910 define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
911 ; AVX512BW-LABEL: test_mask_packs_epi32_rm_512:
913 ; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm0
914 ; AVX512BW-NEXT: retq
916 ; AVX512F-32-LABEL: test_mask_packs_epi32_rm_512:
917 ; AVX512F-32: # BB#0:
918 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
919 ; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0
920 ; AVX512F-32-NEXT: retl
921 %b = load <16 x i32>, <16 x i32>* %ptr_b
922 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
926 define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
927 ; AVX512BW-LABEL: test_mask_packs_epi32_rmk_512:
929 ; AVX512BW-NEXT: kmovd %esi, %k1
930 ; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm1 {%k1}
931 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
932 ; AVX512BW-NEXT: retq
934 ; AVX512F-32-LABEL: test_mask_packs_epi32_rmk_512:
935 ; AVX512F-32: # BB#0:
936 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
937 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
938 ; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm1 {%k1}
939 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
940 ; AVX512F-32-NEXT: retl
941 %b = load <16 x i32>, <16 x i32>* %ptr_b
942 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
946 define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
947 ; AVX512BW-LABEL: test_mask_packs_epi32_rmkz_512:
949 ; AVX512BW-NEXT: kmovd %esi, %k1
950 ; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z}
951 ; AVX512BW-NEXT: retq
953 ; AVX512F-32-LABEL: test_mask_packs_epi32_rmkz_512:
954 ; AVX512F-32: # BB#0:
955 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
956 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
957 ; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z}
958 ; AVX512F-32-NEXT: retl
959 %b = load <16 x i32>, <16 x i32>* %ptr_b
960 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
964 define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
965 ; AVX512BW-LABEL: test_mask_packs_epi32_rmb_512:
967 ; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0
968 ; AVX512BW-NEXT: retq
970 ; AVX512F-32-LABEL: test_mask_packs_epi32_rmb_512:
971 ; AVX512F-32: # BB#0:
972 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
973 ; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0
974 ; AVX512F-32-NEXT: retl
975 %q = load i32, i32* %ptr_b
976 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
977 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
978 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
982 define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
983 ; AVX512BW-LABEL: test_mask_packs_epi32_rmbk_512:
985 ; AVX512BW-NEXT: kmovd %esi, %k1
986 ; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
987 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
988 ; AVX512BW-NEXT: retq
990 ; AVX512F-32-LABEL: test_mask_packs_epi32_rmbk_512:
991 ; AVX512F-32: # BB#0:
992 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
993 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
994 ; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1}
995 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
996 ; AVX512F-32-NEXT: retl
997 %q = load i32, i32* %ptr_b
998 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
999 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1000 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1004 define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
1005 ; AVX512BW-LABEL: test_mask_packs_epi32_rmbkz_512:
1006 ; AVX512BW: ## BB#0:
1007 ; AVX512BW-NEXT: kmovd %esi, %k1
1008 ; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
1009 ; AVX512BW-NEXT: retq
1011 ; AVX512F-32-LABEL: test_mask_packs_epi32_rmbkz_512:
1012 ; AVX512F-32: # BB#0:
1013 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1014 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1015 ; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z}
1016 ; AVX512F-32-NEXT: retl
1017 %q = load i32, i32* %ptr_b
1018 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1019 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1020 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1024 declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
1026 define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1027 ; AVX512BW-LABEL: test_mask_packs_epi16_rr_512:
1028 ; AVX512BW: ## BB#0:
1029 ; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm0
1030 ; AVX512BW-NEXT: retq
1032 ; AVX512F-32-LABEL: test_mask_packs_epi16_rr_512:
1033 ; AVX512F-32: # BB#0:
1034 ; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0
1035 ; AVX512F-32-NEXT: retl
1036 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1040 define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
1041 ; AVX512BW-LABEL: test_mask_packs_epi16_rrk_512:
1042 ; AVX512BW: ## BB#0:
1043 ; AVX512BW-NEXT: kmovq %rdi, %k1
1044 ; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1}
1045 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
1046 ; AVX512BW-NEXT: retq
1048 ; AVX512F-32-LABEL: test_mask_packs_epi16_rrk_512:
1049 ; AVX512F-32: # BB#0:
1050 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1051 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1052 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1053 ; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1}
1054 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
1055 ; AVX512F-32-NEXT: retl
1056 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1060 define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
1061 ; AVX512BW-LABEL: test_mask_packs_epi16_rrkz_512:
1062 ; AVX512BW: ## BB#0:
1063 ; AVX512BW-NEXT: kmovq %rdi, %k1
1064 ; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z}
1065 ; AVX512BW-NEXT: retq
1067 ; AVX512F-32-LABEL: test_mask_packs_epi16_rrkz_512:
1068 ; AVX512F-32: # BB#0:
1069 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1070 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1071 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1072 ; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z}
1073 ; AVX512F-32-NEXT: retl
1074 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1078 define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1079 ; AVX512BW-LABEL: test_mask_packs_epi16_rm_512:
1080 ; AVX512BW: ## BB#0:
1081 ; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm0
1082 ; AVX512BW-NEXT: retq
1084 ; AVX512F-32-LABEL: test_mask_packs_epi16_rm_512:
1085 ; AVX512F-32: # BB#0:
1086 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1087 ; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0
1088 ; AVX512F-32-NEXT: retl
1089 %b = load <32 x i16>, <32 x i16>* %ptr_b
1090 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1094 define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
1095 ; AVX512BW-LABEL: test_mask_packs_epi16_rmk_512:
1096 ; AVX512BW: ## BB#0:
1097 ; AVX512BW-NEXT: kmovq %rsi, %k1
1098 ; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm1 {%k1}
1099 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
1100 ; AVX512BW-NEXT: retq
1102 ; AVX512F-32-LABEL: test_mask_packs_epi16_rmk_512:
1103 ; AVX512F-32: # BB#0:
1104 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1105 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1106 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1107 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1108 ; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1}
1109 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1110 ; AVX512F-32-NEXT: retl
1111 %b = load <32 x i16>, <32 x i16>* %ptr_b
1112 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1116 define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
1117 ; AVX512BW-LABEL: test_mask_packs_epi16_rmkz_512:
1118 ; AVX512BW: ## BB#0:
1119 ; AVX512BW-NEXT: kmovq %rsi, %k1
1120 ; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z}
1121 ; AVX512BW-NEXT: retq
1123 ; AVX512F-32-LABEL: test_mask_packs_epi16_rmkz_512:
1124 ; AVX512F-32: # BB#0:
1125 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1126 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1127 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1128 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1129 ; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z}
1130 ; AVX512F-32-NEXT: retl
1131 %b = load <32 x i16>, <32 x i16>* %ptr_b
1132 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1136 declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
1139 define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
1140 ; AVX512BW-LABEL: test_mask_packus_epi32_rr_512:
1141 ; AVX512BW: ## BB#0:
1142 ; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0
1143 ; AVX512BW-NEXT: retq
1145 ; AVX512F-32-LABEL: test_mask_packus_epi32_rr_512:
1146 ; AVX512F-32: # BB#0:
1147 ; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0
1148 ; AVX512F-32-NEXT: retl
1149 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1153 define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
1154 ; AVX512BW-LABEL: test_mask_packus_epi32_rrk_512:
1155 ; AVX512BW: ## BB#0:
1156 ; AVX512BW-NEXT: kmovd %edi, %k1
1157 ; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
1158 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
1159 ; AVX512BW-NEXT: retq
1161 ; AVX512F-32-LABEL: test_mask_packus_epi32_rrk_512:
1162 ; AVX512F-32: # BB#0:
1163 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1164 ; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
1165 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
1166 ; AVX512F-32-NEXT: retl
1167 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1171 define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
1172 ; AVX512BW-LABEL: test_mask_packus_epi32_rrkz_512:
1173 ; AVX512BW: ## BB#0:
1174 ; AVX512BW-NEXT: kmovd %edi, %k1
1175 ; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
1176 ; AVX512BW-NEXT: retq
1178 ; AVX512F-32-LABEL: test_mask_packus_epi32_rrkz_512:
1179 ; AVX512F-32: # BB#0:
1180 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1181 ; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
1182 ; AVX512F-32-NEXT: retl
1183 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1187 define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
1188 ; AVX512BW-LABEL: test_mask_packus_epi32_rm_512:
1189 ; AVX512BW: ## BB#0:
1190 ; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm0
1191 ; AVX512BW-NEXT: retq
1193 ; AVX512F-32-LABEL: test_mask_packus_epi32_rm_512:
1194 ; AVX512F-32: # BB#0:
1195 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1196 ; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0
1197 ; AVX512F-32-NEXT: retl
1198 %b = load <16 x i32>, <16 x i32>* %ptr_b
1199 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1203 define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1204 ; AVX512BW-LABEL: test_mask_packus_epi32_rmk_512:
1205 ; AVX512BW: ## BB#0:
1206 ; AVX512BW-NEXT: kmovd %esi, %k1
1207 ; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm1 {%k1}
1208 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
1209 ; AVX512BW-NEXT: retq
1211 ; AVX512F-32-LABEL: test_mask_packus_epi32_rmk_512:
1212 ; AVX512F-32: # BB#0:
1213 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1214 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1215 ; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm1 {%k1}
1216 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1217 ; AVX512F-32-NEXT: retl
1218 %b = load <16 x i32>, <16 x i32>* %ptr_b
1219 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1223 define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
1224 ; AVX512BW-LABEL: test_mask_packus_epi32_rmkz_512:
1225 ; AVX512BW: ## BB#0:
1226 ; AVX512BW-NEXT: kmovd %esi, %k1
1227 ; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z}
1228 ; AVX512BW-NEXT: retq
1230 ; AVX512F-32-LABEL: test_mask_packus_epi32_rmkz_512:
1231 ; AVX512F-32: # BB#0:
1232 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1233 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1234 ; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z}
1235 ; AVX512F-32-NEXT: retl
1236 %b = load <16 x i32>, <16 x i32>* %ptr_b
1237 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1241 define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
1242 ; AVX512BW-LABEL: test_mask_packus_epi32_rmb_512:
1243 ; AVX512BW: ## BB#0:
1244 ; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0
1245 ; AVX512BW-NEXT: retq
1247 ; AVX512F-32-LABEL: test_mask_packus_epi32_rmb_512:
1248 ; AVX512F-32: # BB#0:
1249 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1250 ; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0
1251 ; AVX512F-32-NEXT: retl
1252 %q = load i32, i32* %ptr_b
1253 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1254 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1255 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1259 define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1260 ; AVX512BW-LABEL: test_mask_packus_epi32_rmbk_512:
1261 ; AVX512BW: ## BB#0:
1262 ; AVX512BW-NEXT: kmovd %esi, %k1
1263 ; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
1264 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
1265 ; AVX512BW-NEXT: retq
1267 ; AVX512F-32-LABEL: test_mask_packus_epi32_rmbk_512:
1268 ; AVX512F-32: # BB#0:
1269 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1270 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1271 ; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1}
1272 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1273 ; AVX512F-32-NEXT: retl
1274 %q = load i32, i32* %ptr_b
1275 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1276 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1277 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1281 define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
1282 ; AVX512BW-LABEL: test_mask_packus_epi32_rmbkz_512:
1283 ; AVX512BW: ## BB#0:
1284 ; AVX512BW-NEXT: kmovd %esi, %k1
1285 ; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
1286 ; AVX512BW-NEXT: retq
1288 ; AVX512F-32-LABEL: test_mask_packus_epi32_rmbkz_512:
1289 ; AVX512F-32: # BB#0:
1290 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1291 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1292 ; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z}
1293 ; AVX512F-32-NEXT: retl
1294 %q = load i32, i32* %ptr_b
1295 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1296 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1297 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1301 declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
1303 define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1304 ; AVX512BW-LABEL: test_mask_packus_epi16_rr_512:
1305 ; AVX512BW: ## BB#0:
1306 ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
1307 ; AVX512BW-NEXT: retq
1309 ; AVX512F-32-LABEL: test_mask_packus_epi16_rr_512:
1310 ; AVX512F-32: # BB#0:
1311 ; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
1312 ; AVX512F-32-NEXT: retl
1313 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1317 define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
1318 ; AVX512BW-LABEL: test_mask_packus_epi16_rrk_512:
1319 ; AVX512BW: ## BB#0:
1320 ; AVX512BW-NEXT: kmovq %rdi, %k1
1321 ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
1322 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
1323 ; AVX512BW-NEXT: retq
1325 ; AVX512F-32-LABEL: test_mask_packus_epi16_rrk_512:
1326 ; AVX512F-32: # BB#0:
1327 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1328 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1329 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1330 ; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
1331 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
1332 ; AVX512F-32-NEXT: retl
1333 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1337 define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
1338 ; AVX512BW-LABEL: test_mask_packus_epi16_rrkz_512:
1339 ; AVX512BW: ## BB#0:
1340 ; AVX512BW-NEXT: kmovq %rdi, %k1
1341 ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
1342 ; AVX512BW-NEXT: retq
1344 ; AVX512F-32-LABEL: test_mask_packus_epi16_rrkz_512:
1345 ; AVX512F-32: # BB#0:
1346 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1347 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1348 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1349 ; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
1350 ; AVX512F-32-NEXT: retl
1351 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1355 define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1356 ; AVX512BW-LABEL: test_mask_packus_epi16_rm_512:
1357 ; AVX512BW: ## BB#0:
1358 ; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm0
1359 ; AVX512BW-NEXT: retq
1361 ; AVX512F-32-LABEL: test_mask_packus_epi16_rm_512:
1362 ; AVX512F-32: # BB#0:
1363 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1364 ; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0
1365 ; AVX512F-32-NEXT: retl
1366 %b = load <32 x i16>, <32 x i16>* %ptr_b
1367 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1371 define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
1372 ; AVX512BW-LABEL: test_mask_packus_epi16_rmk_512:
1373 ; AVX512BW: ## BB#0:
1374 ; AVX512BW-NEXT: kmovq %rsi, %k1
1375 ; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm1 {%k1}
1376 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
1377 ; AVX512BW-NEXT: retq
1379 ; AVX512F-32-LABEL: test_mask_packus_epi16_rmk_512:
1380 ; AVX512F-32: # BB#0:
1381 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1382 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1383 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1384 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1385 ; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1}
1386 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1387 ; AVX512F-32-NEXT: retl
1388 %b = load <32 x i16>, <32 x i16>* %ptr_b
1389 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1393 define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
1394 ; AVX512BW-LABEL: test_mask_packus_epi16_rmkz_512:
1395 ; AVX512BW: ## BB#0:
1396 ; AVX512BW-NEXT: kmovq %rsi, %k1
1397 ; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z}
1398 ; AVX512BW-NEXT: retq
1400 ; AVX512F-32-LABEL: test_mask_packus_epi16_rmkz_512:
1401 ; AVX512F-32: # BB#0:
1402 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1403 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1404 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1405 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1406 ; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z}
1407 ; AVX512F-32-NEXT: retl
1408 %b = load <32 x i16>, <32 x i16>* %ptr_b
1409 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1413 declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
1415 define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1416 ; AVX512BW-LABEL: test_mask_adds_epi16_rr_512:
1417 ; AVX512BW: ## BB#0:
1418 ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
1419 ; AVX512BW-NEXT: retq
1421 ; AVX512F-32-LABEL: test_mask_adds_epi16_rr_512:
1422 ; AVX512F-32: # BB#0:
1423 ; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
1424 ; AVX512F-32-NEXT: retl
1425 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1429 define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1430 ; AVX512BW-LABEL: test_mask_adds_epi16_rrk_512:
1431 ; AVX512BW: ## BB#0:
1432 ; AVX512BW-NEXT: kmovd %edi, %k1
1433 ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
1434 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
1435 ; AVX512BW-NEXT: retq
1437 ; AVX512F-32-LABEL: test_mask_adds_epi16_rrk_512:
1438 ; AVX512F-32: # BB#0:
1439 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1440 ; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
1441 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
1442 ; AVX512F-32-NEXT: retl
1443 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1447 define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1448 ; AVX512BW-LABEL: test_mask_adds_epi16_rrkz_512:
1449 ; AVX512BW: ## BB#0:
1450 ; AVX512BW-NEXT: kmovd %edi, %k1
1451 ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
1452 ; AVX512BW-NEXT: retq
1454 ; AVX512F-32-LABEL: test_mask_adds_epi16_rrkz_512:
1455 ; AVX512F-32: # BB#0:
1456 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1457 ; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
1458 ; AVX512F-32-NEXT: retl
1459 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1463 define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1464 ; AVX512BW-LABEL: test_mask_adds_epi16_rm_512:
1465 ; AVX512BW: ## BB#0:
1466 ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0
1467 ; AVX512BW-NEXT: retq
1469 ; AVX512F-32-LABEL: test_mask_adds_epi16_rm_512:
1470 ; AVX512F-32: # BB#0:
1471 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1472 ; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0
1473 ; AVX512F-32-NEXT: retl
1474 %b = load <32 x i16>, <32 x i16>* %ptr_b
1475 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1479 define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1480 ; AVX512BW-LABEL: test_mask_adds_epi16_rmk_512:
1481 ; AVX512BW: ## BB#0:
1482 ; AVX512BW-NEXT: kmovd %esi, %k1
1483 ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1}
1484 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
1485 ; AVX512BW-NEXT: retq
1487 ; AVX512F-32-LABEL: test_mask_adds_epi16_rmk_512:
1488 ; AVX512F-32: # BB#0:
1489 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1490 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1491 ; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1}
1492 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1493 ; AVX512F-32-NEXT: retl
1494 %b = load <32 x i16>, <32 x i16>* %ptr_b
1495 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1499 define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1500 ; AVX512BW-LABEL: test_mask_adds_epi16_rmkz_512:
1501 ; AVX512BW: ## BB#0:
1502 ; AVX512BW-NEXT: kmovd %esi, %k1
1503 ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z}
1504 ; AVX512BW-NEXT: retq
1506 ; AVX512F-32-LABEL: test_mask_adds_epi16_rmkz_512:
1507 ; AVX512F-32: # BB#0:
1508 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1509 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1510 ; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z}
1511 ; AVX512F-32-NEXT: retl
1512 %b = load <32 x i16>, <32 x i16>* %ptr_b
1513 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1517 declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1519 define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1520 ; AVX512BW-LABEL: test_mask_subs_epi16_rr_512:
1521 ; AVX512BW: ## BB#0:
1522 ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
1523 ; AVX512BW-NEXT: retq
1525 ; AVX512F-32-LABEL: test_mask_subs_epi16_rr_512:
1526 ; AVX512F-32: # BB#0:
1527 ; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
1528 ; AVX512F-32-NEXT: retl
1529 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1533 define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1534 ; AVX512BW-LABEL: test_mask_subs_epi16_rrk_512:
1535 ; AVX512BW: ## BB#0:
1536 ; AVX512BW-NEXT: kmovd %edi, %k1
1537 ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
1538 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
1539 ; AVX512BW-NEXT: retq
1541 ; AVX512F-32-LABEL: test_mask_subs_epi16_rrk_512:
1542 ; AVX512F-32: # BB#0:
1543 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1544 ; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
1545 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
1546 ; AVX512F-32-NEXT: retl
1547 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1551 define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1552 ; AVX512BW-LABEL: test_mask_subs_epi16_rrkz_512:
1553 ; AVX512BW: ## BB#0:
1554 ; AVX512BW-NEXT: kmovd %edi, %k1
1555 ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
1556 ; AVX512BW-NEXT: retq
1558 ; AVX512F-32-LABEL: test_mask_subs_epi16_rrkz_512:
1559 ; AVX512F-32: # BB#0:
1560 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1561 ; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
1562 ; AVX512F-32-NEXT: retl
1563 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1567 define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1568 ; AVX512BW-LABEL: test_mask_subs_epi16_rm_512:
1569 ; AVX512BW: ## BB#0:
1570 ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0
1571 ; AVX512BW-NEXT: retq
1573 ; AVX512F-32-LABEL: test_mask_subs_epi16_rm_512:
1574 ; AVX512F-32: # BB#0:
1575 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1576 ; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0
1577 ; AVX512F-32-NEXT: retl
1578 %b = load <32 x i16>, <32 x i16>* %ptr_b
1579 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1583 define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1584 ; AVX512BW-LABEL: test_mask_subs_epi16_rmk_512:
1585 ; AVX512BW: ## BB#0:
1586 ; AVX512BW-NEXT: kmovd %esi, %k1
1587 ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1}
1588 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
1589 ; AVX512BW-NEXT: retq
1591 ; AVX512F-32-LABEL: test_mask_subs_epi16_rmk_512:
1592 ; AVX512F-32: # BB#0:
1593 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1594 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1595 ; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1}
1596 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1597 ; AVX512F-32-NEXT: retl
1598 %b = load <32 x i16>, <32 x i16>* %ptr_b
1599 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1603 define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1604 ; AVX512BW-LABEL: test_mask_subs_epi16_rmkz_512:
1605 ; AVX512BW: ## BB#0:
1606 ; AVX512BW-NEXT: kmovd %esi, %k1
1607 ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z}
1608 ; AVX512BW-NEXT: retq
1610 ; AVX512F-32-LABEL: test_mask_subs_epi16_rmkz_512:
1611 ; AVX512F-32: # BB#0:
1612 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1613 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1614 ; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z}
1615 ; AVX512F-32-NEXT: retl
1616 %b = load <32 x i16>, <32 x i16>* %ptr_b
1617 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1621 declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1623 define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1624 ; AVX512BW-LABEL: test_mask_adds_epu16_rr_512:
1625 ; AVX512BW: ## BB#0:
1626 ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
1627 ; AVX512BW-NEXT: retq
1629 ; AVX512F-32-LABEL: test_mask_adds_epu16_rr_512:
1630 ; AVX512F-32: # BB#0:
1631 ; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
1632 ; AVX512F-32-NEXT: retl
1633 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1637 define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1638 ; AVX512BW-LABEL: test_mask_adds_epu16_rrk_512:
1639 ; AVX512BW: ## BB#0:
1640 ; AVX512BW-NEXT: kmovd %edi, %k1
1641 ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
1642 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
1643 ; AVX512BW-NEXT: retq
1645 ; AVX512F-32-LABEL: test_mask_adds_epu16_rrk_512:
1646 ; AVX512F-32: # BB#0:
1647 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1648 ; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
1649 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
1650 ; AVX512F-32-NEXT: retl
1651 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1655 define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1656 ; AVX512BW-LABEL: test_mask_adds_epu16_rrkz_512:
1657 ; AVX512BW: ## BB#0:
1658 ; AVX512BW-NEXT: kmovd %edi, %k1
1659 ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
1660 ; AVX512BW-NEXT: retq
1662 ; AVX512F-32-LABEL: test_mask_adds_epu16_rrkz_512:
1663 ; AVX512F-32: # BB#0:
1664 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1665 ; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
1666 ; AVX512F-32-NEXT: retl
1667 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1671 define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1672 ; AVX512BW-LABEL: test_mask_adds_epu16_rm_512:
1673 ; AVX512BW: ## BB#0:
1674 ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0
1675 ; AVX512BW-NEXT: retq
1677 ; AVX512F-32-LABEL: test_mask_adds_epu16_rm_512:
1678 ; AVX512F-32: # BB#0:
1679 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1680 ; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0
1681 ; AVX512F-32-NEXT: retl
1682 %b = load <32 x i16>, <32 x i16>* %ptr_b
1683 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1687 define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1688 ; AVX512BW-LABEL: test_mask_adds_epu16_rmk_512:
1689 ; AVX512BW: ## BB#0:
1690 ; AVX512BW-NEXT: kmovd %esi, %k1
1691 ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm1 {%k1}
1692 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
1693 ; AVX512BW-NEXT: retq
1695 ; AVX512F-32-LABEL: test_mask_adds_epu16_rmk_512:
1696 ; AVX512F-32: # BB#0:
1697 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1698 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1699 ; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm1 {%k1}
1700 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1701 ; AVX512F-32-NEXT: retl
1702 %b = load <32 x i16>, <32 x i16>* %ptr_b
1703 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1707 define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1708 ; AVX512BW-LABEL: test_mask_adds_epu16_rmkz_512:
1709 ; AVX512BW: ## BB#0:
1710 ; AVX512BW-NEXT: kmovd %esi, %k1
1711 ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z}
1712 ; AVX512BW-NEXT: retq
1714 ; AVX512F-32-LABEL: test_mask_adds_epu16_rmkz_512:
1715 ; AVX512F-32: # BB#0:
1716 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1717 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1718 ; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z}
1719 ; AVX512F-32-NEXT: retl
1720 %b = load <32 x i16>, <32 x i16>* %ptr_b
1721 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1725 declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1727 define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1728 ; AVX512BW-LABEL: test_mask_subs_epu16_rr_512:
1729 ; AVX512BW: ## BB#0:
1730 ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
1731 ; AVX512BW-NEXT: retq
1733 ; AVX512F-32-LABEL: test_mask_subs_epu16_rr_512:
1734 ; AVX512F-32: # BB#0:
1735 ; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
1736 ; AVX512F-32-NEXT: retl
1737 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1741 define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1742 ; AVX512BW-LABEL: test_mask_subs_epu16_rrk_512:
1743 ; AVX512BW: ## BB#0:
1744 ; AVX512BW-NEXT: kmovd %edi, %k1
1745 ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
1746 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
1747 ; AVX512BW-NEXT: retq
1749 ; AVX512F-32-LABEL: test_mask_subs_epu16_rrk_512:
1750 ; AVX512F-32: # BB#0:
1751 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1752 ; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
1753 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
1754 ; AVX512F-32-NEXT: retl
1755 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1759 define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1760 ; AVX512BW-LABEL: test_mask_subs_epu16_rrkz_512:
1761 ; AVX512BW: ## BB#0:
1762 ; AVX512BW-NEXT: kmovd %edi, %k1
1763 ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
1764 ; AVX512BW-NEXT: retq
1766 ; AVX512F-32-LABEL: test_mask_subs_epu16_rrkz_512:
1767 ; AVX512F-32: # BB#0:
1768 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1769 ; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
1770 ; AVX512F-32-NEXT: retl
1771 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1775 define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1776 ; AVX512BW-LABEL: test_mask_subs_epu16_rm_512:
1777 ; AVX512BW: ## BB#0:
1778 ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0
1779 ; AVX512BW-NEXT: retq
1781 ; AVX512F-32-LABEL: test_mask_subs_epu16_rm_512:
1782 ; AVX512F-32: # BB#0:
1783 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1784 ; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0
1785 ; AVX512F-32-NEXT: retl
1786 %b = load <32 x i16>, <32 x i16>* %ptr_b
1787 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1791 define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1792 ; AVX512BW-LABEL: test_mask_subs_epu16_rmk_512:
1793 ; AVX512BW: ## BB#0:
1794 ; AVX512BW-NEXT: kmovd %esi, %k1
1795 ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm1 {%k1}
1796 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
1797 ; AVX512BW-NEXT: retq
1799 ; AVX512F-32-LABEL: test_mask_subs_epu16_rmk_512:
1800 ; AVX512F-32: # BB#0:
1801 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1802 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1803 ; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm1 {%k1}
1804 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
1805 ; AVX512F-32-NEXT: retl
1806 %b = load <32 x i16>, <32 x i16>* %ptr_b
1807 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1811 define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1812 ; AVX512BW-LABEL: test_mask_subs_epu16_rmkz_512:
1813 ; AVX512BW: ## BB#0:
1814 ; AVX512BW-NEXT: kmovd %esi, %k1
1815 ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z}
1816 ; AVX512BW-NEXT: retq
1818 ; AVX512F-32-LABEL: test_mask_subs_epu16_rmkz_512:
1819 ; AVX512F-32: # BB#0:
1820 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1821 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1822 ; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z}
1823 ; AVX512F-32-NEXT: retl
1824 %b = load <32 x i16>, <32 x i16>* %ptr_b
1825 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1829 declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1831 declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1833 define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1834 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxs_b_512:
1835 ; AVX512BW: ## BB#0:
1836 ; AVX512BW-NEXT: kmovq %rdi, %k1
1837 ; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1}
1838 ; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
1839 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1840 ; AVX512BW-NEXT: retq
1842 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxs_b_512:
1843 ; AVX512F-32: # BB#0:
1844 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1845 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1846 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1847 ; AVX512F-32-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1}
1848 ; AVX512F-32-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
1849 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1850 ; AVX512F-32-NEXT: retl
1851 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1852 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1853 %res2 = add <64 x i8> %res, %res1
1857 declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1859 define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1860 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxs_w_512:
1861 ; AVX512BW: ## BB#0:
1862 ; AVX512BW-NEXT: kmovd %edi, %k1
1863 ; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1}
1864 ; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
1865 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1866 ; AVX512BW-NEXT: retq
1868 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxs_w_512:
1869 ; AVX512F-32: # BB#0:
1870 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1871 ; AVX512F-32-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1}
1872 ; AVX512F-32-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
1873 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1874 ; AVX512F-32-NEXT: retl
1875 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1876 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1877 %res2 = add <32 x i16> %res, %res1
1878 ret <32 x i16> %res2
1881 declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1883 define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1884 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxu_b_512:
1885 ; AVX512BW: ## BB#0:
1886 ; AVX512BW-NEXT: kmovq %rdi, %k1
1887 ; AVX512BW-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1}
1888 ; AVX512BW-NEXT: vpmaxub %zmm1, %zmm0, %zmm0
1889 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1890 ; AVX512BW-NEXT: retq
1892 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxu_b_512:
1893 ; AVX512F-32: # BB#0:
1894 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1895 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1896 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1897 ; AVX512F-32-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1}
1898 ; AVX512F-32-NEXT: vpmaxub %zmm1, %zmm0, %zmm0
1899 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1900 ; AVX512F-32-NEXT: retl
1901 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1902 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1903 %res2 = add <64 x i8> %res, %res1
1907 declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1909 define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1910 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxu_w_512:
1911 ; AVX512BW: ## BB#0:
1912 ; AVX512BW-NEXT: kmovd %edi, %k1
1913 ; AVX512BW-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1}
1914 ; AVX512BW-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0
1915 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1916 ; AVX512BW-NEXT: retq
1918 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxu_w_512:
1919 ; AVX512F-32: # BB#0:
1920 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1921 ; AVX512F-32-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1}
1922 ; AVX512F-32-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0
1923 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1924 ; AVX512F-32-NEXT: retl
1925 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1926 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1927 %res2 = add <32 x i16> %res, %res1
1928 ret <32 x i16> %res2
1931 declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1933 define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1934 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmins_b_512:
1935 ; AVX512BW: ## BB#0:
1936 ; AVX512BW-NEXT: kmovq %rdi, %k1
1937 ; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1}
1938 ; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm0
1939 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1940 ; AVX512BW-NEXT: retq
1942 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmins_b_512:
1943 ; AVX512F-32: # BB#0:
1944 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1945 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1946 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1947 ; AVX512F-32-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1}
1948 ; AVX512F-32-NEXT: vpminsb %zmm1, %zmm0, %zmm0
1949 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1950 ; AVX512F-32-NEXT: retl
1951 %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1952 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1953 %res2 = add <64 x i8> %res, %res1
1957 declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1959 define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1960 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmins_w_512:
1961 ; AVX512BW: ## BB#0:
1962 ; AVX512BW-NEXT: kmovd %edi, %k1
1963 ; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1}
1964 ; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm0
1965 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1966 ; AVX512BW-NEXT: retq
1968 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmins_w_512:
1969 ; AVX512F-32: # BB#0:
1970 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1971 ; AVX512F-32-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1}
1972 ; AVX512F-32-NEXT: vpminsw %zmm1, %zmm0, %zmm0
1973 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1974 ; AVX512F-32-NEXT: retl
1975 %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1976 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1977 %res2 = add <32 x i16> %res, %res1
1978 ret <32 x i16> %res2
1981 declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1983 define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1984 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pminu_b_512:
1985 ; AVX512BW: ## BB#0:
1986 ; AVX512BW-NEXT: kmovq %rdi, %k1
1987 ; AVX512BW-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1}
1988 ; AVX512BW-NEXT: vpminub %zmm1, %zmm0, %zmm0
1989 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1990 ; AVX512BW-NEXT: retq
1992 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pminu_b_512:
1993 ; AVX512F-32: # BB#0:
1994 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
1995 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
1996 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
1997 ; AVX512F-32-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1}
1998 ; AVX512F-32-NEXT: vpminub %zmm1, %zmm0, %zmm0
1999 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
2000 ; AVX512F-32-NEXT: retl
2001 %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
2002 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
2003 %res2 = add <64 x i8> %res, %res1
2007 declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2009 define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2010 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pminu_w_512:
2011 ; AVX512BW: ## BB#0:
2012 ; AVX512BW-NEXT: kmovd %edi, %k1
2013 ; AVX512BW-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1}
2014 ; AVX512BW-NEXT: vpminuw %zmm1, %zmm0, %zmm0
2015 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2016 ; AVX512BW-NEXT: retq
2018 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pminu_w_512:
2019 ; AVX512F-32: # BB#0:
2020 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2021 ; AVX512F-32-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1}
2022 ; AVX512F-32-NEXT: vpminuw %zmm1, %zmm0, %zmm0
2023 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2024 ; AVX512F-32-NEXT: retl
2025 %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2026 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2027 %res2 = add <32 x i16> %res, %res1
2028 ret <32 x i16> %res2
2031 declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2033 define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2034 ; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
2035 ; AVX512BW: ## BB#0:
2036 ; AVX512BW-NEXT: kmovd %edi, %k1
2037 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm3
2038 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1}
2039 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
2040 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
2041 ; AVX512BW-NEXT: retq
2043 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
2044 ; AVX512F-32: # BB#0:
2045 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2046 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3
2047 ; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1}
2048 ; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
2049 ; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
2050 ; AVX512F-32-NEXT: retl
2051 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2052 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2053 %res2 = add <32 x i16> %res, %res1
2054 ret <32 x i16> %res2
2057 declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2059 define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2060 ; AVX512BW-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
2061 ; AVX512BW: ## BB#0:
2062 ; AVX512BW-NEXT: kmovd %edi, %k1
2063 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm3
2064 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z}
2065 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
2066 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
2067 ; AVX512BW-NEXT: retq
2069 ; AVX512F-32-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
2070 ; AVX512F-32: # BB#0:
2071 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2072 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3
2073 ; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z}
2074 ; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
2075 ; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
2076 ; AVX512F-32-NEXT: retl
2077 %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2078 %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2079 %res2 = add <32 x i16> %res, %res1
2080 ret <32 x i16> %res2
2083 declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2085 define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2086 ; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
2087 ; AVX512BW: ## BB#0:
2088 ; AVX512BW-NEXT: kmovd %edi, %k1
2089 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm3
2090 ; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
2091 ; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm1
2092 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
2093 ; AVX512BW-NEXT: retq
2095 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
2096 ; AVX512F-32: # BB#0:
2097 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2098 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3
2099 ; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
2100 ; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm1
2101 ; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
2102 ; AVX512F-32-NEXT: retl
2103 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2104 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2105 %res2 = add <32 x i16> %res, %res1
2106 ret <32 x i16> %res2
2109 declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
2111 define <64 x i8>@test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
2112 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pavg_b_512:
2113 ; AVX512BW: ## BB#0:
2114 ; AVX512BW-NEXT: kmovq %rdi, %k1
2115 ; AVX512BW-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1}
2116 ; AVX512BW-NEXT: vpavgb %zmm1, %zmm0, %zmm0
2117 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
2118 ; AVX512BW-NEXT: retq
2120 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pavg_b_512:
2121 ; AVX512F-32: # BB#0:
2122 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2123 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2124 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
2125 ; AVX512F-32-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1}
2126 ; AVX512F-32-NEXT: vpavgb %zmm1, %zmm0, %zmm0
2127 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
2128 ; AVX512F-32-NEXT: retl
2129 %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
2130 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
2131 %res2 = add <64 x i8> %res, %res1
2135 declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2137 define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2138 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pavg_w_512:
2139 ; AVX512BW: ## BB#0:
2140 ; AVX512BW-NEXT: kmovd %edi, %k1
2141 ; AVX512BW-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1}
2142 ; AVX512BW-NEXT: vpavgw %zmm1, %zmm0, %zmm0
2143 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2144 ; AVX512BW-NEXT: retq
2146 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pavg_w_512:
2147 ; AVX512F-32: # BB#0:
2148 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2149 ; AVX512F-32-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1}
2150 ; AVX512F-32-NEXT: vpavgw %zmm1, %zmm0, %zmm0
2151 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2152 ; AVX512F-32-NEXT: retl
2153 %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2154 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2155 %res2 = add <32 x i16> %res, %res1
2156 ret <32 x i16> %res2
2159 declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
2161 define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
2162 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
2163 ; AVX512BW: ## BB#0:
2164 ; AVX512BW-NEXT: kmovq %rdi, %k1
2165 ; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
2166 ; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0
2167 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
2168 ; AVX512BW-NEXT: retq
2170 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
2171 ; AVX512F-32: # BB#0:
2172 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2173 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2174 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
2175 ; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
2176 ; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0
2177 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
2178 ; AVX512F-32-NEXT: retl
2179 %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
2180 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
2181 %res2 = add <64 x i8> %res, %res1
2185 declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32)
2187 define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
2188 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_w_512:
2189 ; AVX512BW: ## BB#0:
2190 ; AVX512BW-NEXT: kmovd %edi, %k1
2191 ; AVX512BW-NEXT: vpabsw %zmm0, %zmm1 {%k1}
2192 ; AVX512BW-NEXT: vpabsw %zmm0, %zmm0
2193 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2194 ; AVX512BW-NEXT: retq
2196 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_w_512:
2197 ; AVX512F-32: # BB#0:
2198 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2199 ; AVX512F-32-NEXT: vpabsw %zmm0, %zmm1 {%k1}
2200 ; AVX512F-32-NEXT: vpabsw %zmm0, %zmm0
2201 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2202 ; AVX512F-32-NEXT: retl
2203 %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
2204 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1)
2205 %res2 = add <32 x i16> %res, %res1
2206 ret <32 x i16> %res2
2209 declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64)
2211 define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
2212 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_b_512:
2213 ; AVX512BW: ## BB#0:
2214 ; AVX512BW-NEXT: kmovq %rdi, %k1
2215 ; AVX512BW-NEXT: vpabsb %zmm0, %zmm1 {%k1}
2216 ; AVX512BW-NEXT: vpabsb %zmm0, %zmm0
2217 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
2218 ; AVX512BW-NEXT: retq
2220 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_b_512:
2221 ; AVX512F-32: # BB#0:
2222 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2223 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2224 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
2225 ; AVX512F-32-NEXT: vpabsb %zmm0, %zmm1 {%k1}
2226 ; AVX512F-32-NEXT: vpabsb %zmm0, %zmm0
2227 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm1, %zmm0
2228 ; AVX512F-32-NEXT: retl
2229 %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
2230 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1)
2231 %res2 = add <64 x i8> %res, %res1
2235 declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2237 define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2238 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
2239 ; AVX512BW: ## BB#0:
2240 ; AVX512BW-NEXT: kmovd %edi, %k1
2241 ; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1}
2242 ; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0
2243 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2244 ; AVX512BW-NEXT: retq
2246 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
2247 ; AVX512F-32: # BB#0:
2248 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2249 ; AVX512F-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1}
2250 ; AVX512F-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0
2251 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2252 ; AVX512F-32-NEXT: retl
2253 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2254 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2255 %res2 = add <32 x i16> %res, %res1
2256 ret <32 x i16> %res2
2259 declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2261 define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2262 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
2263 ; AVX512BW: ## BB#0:
2264 ; AVX512BW-NEXT: kmovd %edi, %k1
2265 ; AVX512BW-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1}
2266 ; AVX512BW-NEXT: vpmulhw %zmm1, %zmm0, %zmm0
2267 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2268 ; AVX512BW-NEXT: retq
2270 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
2271 ; AVX512F-32: # BB#0:
2272 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2273 ; AVX512F-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1}
2274 ; AVX512F-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm0
2275 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2276 ; AVX512F-32-NEXT: retl
2277 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2278 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2279 %res2 = add <32 x i16> %res, %res1
2280 ret <32 x i16> %res2
2283 declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2285 define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2286 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
2287 ; AVX512BW: ## BB#0:
2288 ; AVX512BW-NEXT: kmovd %edi, %k1
2289 ; AVX512BW-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1}
2290 ; AVX512BW-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm0
2291 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2292 ; AVX512BW-NEXT: retq
2294 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
2295 ; AVX512F-32: # BB#0:
2296 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2297 ; AVX512F-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1}
2298 ; AVX512F-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm0
2299 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2300 ; AVX512F-32-NEXT: retl
2301 %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2302 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2303 %res2 = add <32 x i16> %res, %res1
2304 ret <32 x i16> %res2
2307 declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32)
2309 define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
2310 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
2311 ; AVX512BW: ## BB#0:
2312 ; AVX512BW-NEXT: kmovd %edi, %k1
2313 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm1 {%k1}
2314 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z}
2315 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
2316 ; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0
2317 ; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0
2318 ; AVX512BW-NEXT: retq
2320 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
2321 ; AVX512F-32: # BB#0:
2322 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2323 ; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm1 {%k1}
2324 ; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z}
2325 ; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm0
2326 ; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
2327 ; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0
2328 ; AVX512F-32-NEXT: retl
2329 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
2330 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
2331 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
2332 %res3 = add <32 x i8> %res0, %res1
2333 %res4 = add <32 x i8> %res3, %res2
2337 declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32)
2339 define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
2340 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
2341 ; AVX512BW: ## BB#0:
2342 ; AVX512BW-NEXT: kmovd %esi, %k1
2343 ; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi)
2344 ; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi) {%k1}
2345 ; AVX512BW-NEXT: retq
2347 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
2348 ; AVX512F-32: # BB#0:
2349 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2350 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
2351 ; AVX512F-32-NEXT: vpmovwb %zmm0, (%eax)
2352 ; AVX512F-32-NEXT: vpmovwb %zmm0, (%eax) {%k1}
2353 ; AVX512F-32-NEXT: retl
2354 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
2355 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
2359 declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32)
2361 define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
2362 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
2363 ; AVX512BW: ## BB#0:
2364 ; AVX512BW-NEXT: kmovd %edi, %k1
2365 ; AVX512BW-NEXT: vpmovswb %zmm0, %ymm1 {%k1}
2366 ; AVX512BW-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z}
2367 ; AVX512BW-NEXT: vpmovswb %zmm0, %ymm0
2368 ; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0
2369 ; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0
2370 ; AVX512BW-NEXT: retq
2372 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
2373 ; AVX512F-32: # BB#0:
2374 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2375 ; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm1 {%k1}
2376 ; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z}
2377 ; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm0
2378 ; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
2379 ; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0
2380 ; AVX512F-32-NEXT: retl
2381 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
2382 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
2383 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
2384 %res3 = add <32 x i8> %res0, %res1
2385 %res4 = add <32 x i8> %res3, %res2
2389 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32)
2391 define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
2392 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
2393 ; AVX512BW: ## BB#0:
2394 ; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi)
2395 ; AVX512BW-NEXT: kmovd %esi, %k1
2396 ; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi) {%k1}
2397 ; AVX512BW-NEXT: retq
2399 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
2400 ; AVX512F-32: # BB#0:
2401 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
2402 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
2403 ; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx)
2404 ; AVX512F-32-NEXT: kmovd %eax, %k1
2405 ; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx) {%k1}
2406 ; AVX512F-32-NEXT: retl
2407 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
2408 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
2412 declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32)
2414 define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
2415 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
2416 ; AVX512BW: ## BB#0:
2417 ; AVX512BW-NEXT: kmovd %edi, %k1
2418 ; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm1 {%k1}
2419 ; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z}
2420 ; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0
2421 ; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0
2422 ; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0
2423 ; AVX512BW-NEXT: retq
2425 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
2426 ; AVX512F-32: # BB#0:
2427 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2428 ; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm1 {%k1}
2429 ; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z}
2430 ; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm0
2431 ; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
2432 ; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0
2433 ; AVX512F-32-NEXT: retl
2434 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
2435 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
2436 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
2437 %res3 = add <32 x i8> %res0, %res1
2438 %res4 = add <32 x i8> %res3, %res2
2442 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32)
2444 define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
2445 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
2446 ; AVX512BW: ## BB#0:
2447 ; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi)
2448 ; AVX512BW-NEXT: kmovd %esi, %k1
2449 ; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi) {%k1}
2450 ; AVX512BW-NEXT: retq
2452 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
2453 ; AVX512F-32: # BB#0:
2454 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
2455 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
2456 ; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx)
2457 ; AVX512F-32-NEXT: kmovd %eax, %k1
2458 ; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx) {%k1}
2459 ; AVX512F-32-NEXT: retl
2460 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
2461 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
2465 declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32)
2467 define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) {
2468 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
2469 ; AVX512BW: ## BB#0:
2470 ; AVX512BW-NEXT: kmovd %edi, %k1
2471 ; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1}
2472 ; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0
2473 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2474 ; AVX512BW-NEXT: retq
2476 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
2477 ; AVX512F-32: # BB#0:
2478 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2479 ; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1}
2480 ; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0
2481 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2482 ; AVX512F-32-NEXT: retl
2483 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3)
2484 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1)
2485 %res2 = add <32 x i16> %res, %res1
2486 ret <32 x i16> %res2
2489 declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16)
2491 define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) {
2492 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
2493 ; AVX512BW: ## BB#0:
2494 ; AVX512BW-NEXT: kmovw %edi, %k1
2495 ; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
2496 ; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0
2497 ; AVX512BW-NEXT: vpaddd %zmm0, %zmm2, %zmm0
2498 ; AVX512BW-NEXT: retq
2500 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
2501 ; AVX512F-32: # BB#0:
2502 ; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
2503 ; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
2504 ; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0
2505 ; AVX512F-32-NEXT: vpaddd %zmm0, %zmm2, %zmm0
2506 ; AVX512F-32-NEXT: retl
2507 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3)
2508 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1)
2509 %res2 = add <16 x i32> %res, %res1
2510 ret <16 x i32> %res2
2513 declare <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
2515 define <64 x i8>@test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
2516 ; AVX512BW-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
2517 ; AVX512BW: ## BB#0:
2518 ; AVX512BW-NEXT: kmovq %rdi, %k1
2519 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31],zmm2[40],k1[40],zmm2[41],k1[41],zmm2[42],k1[42],zmm2[43],k1[43],zmm2[44],k1[44],zmm2[45],k1[45],zmm2[46],k1[46],zmm2[47],k1[47],zmm2[56],k1[56],zmm2[57],k1[57],zmm2[58],k1[58],zmm2[59],k1[59],zmm2[60],k1[60],zmm2[61],k1[61],zmm2[62],k1[62],zmm2[63],k1[63]
2520 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
2521 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
2522 ; AVX512BW-NEXT: retq
2524 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
2525 ; AVX512F-32: # BB#0:
2526 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2527 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2528 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
2529 ; AVX512F-32-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31],zmm2[40],k1[40],zmm2[41],k1[41],zmm2[42],k1[42],zmm2[43],k1[43],zmm2[44],k1[44],zmm2[45],k1[45],zmm2[46],k1[46],zmm2[47],k1[47],zmm2[56],k1[56],zmm2[57],k1[57],zmm2[58],k1[58],zmm2[59],k1[59],zmm2[60],k1[60],zmm2[61],k1[61],zmm2[62],k1[62],zmm2[63],k1[63]
2530 ; AVX512F-32-NEXT: vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
2531 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
2532 ; AVX512F-32-NEXT: retl
2533 %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
2534 %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
2535 %res2 = add <64 x i8> %res, %res1
2539 declare <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
2541 define <64 x i8>@test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
2542 ; AVX512BW-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
2543 ; AVX512BW: ## BB#0:
2544 ; AVX512BW-NEXT: kmovq %rdi, %k1
2545 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[32],k1[32],zmm2[33],k1[33],zmm2[34],k1[34],zmm2[35],k1[35],zmm2[36],k1[36],zmm2[37],k1[37],zmm2[38],k1[38],zmm2[39],k1[39],zmm2[48],k1[48],zmm2[49],k1[49],zmm2[50],k1[50],zmm2[51],k1[51],zmm2[52],k1[52],zmm2[53],k1[53],zmm2[54],k1[54],zmm2[55],k1[55]
2546 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
2547 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
2548 ; AVX512BW-NEXT: retq
2550 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
2551 ; AVX512F-32: # BB#0:
2552 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2553 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2554 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
2555 ; AVX512F-32-NEXT: vpunpcklbw {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[32],k1[32],zmm2[33],k1[33],zmm2[34],k1[34],zmm2[35],k1[35],zmm2[36],k1[36],zmm2[37],k1[37],zmm2[38],k1[38],zmm2[39],k1[39],zmm2[48],k1[48],zmm2[49],k1[49],zmm2[50],k1[50],zmm2[51],k1[51],zmm2[52],k1[52],zmm2[53],k1[53],zmm2[54],k1[54],zmm2[55],k1[55]
2556 ; AVX512F-32-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
2557 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
2558 ; AVX512F-32-NEXT: retl
2559 %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
2560 %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
2561 %res2 = add <64 x i8> %res, %res1
2565 declare <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2567 define <32 x i16>@test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2568 ; AVX512BW-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
2569 ; AVX512BW: ## BB#0:
2570 ; AVX512BW-NEXT: kmovd %edi, %k1
2571 ; AVX512BW-NEXT: vpunpckhwd {{.*#+}} zmm2 = zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31]
2572 ; AVX512BW-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
2573 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2574 ; AVX512BW-NEXT: retq
2576 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
2577 ; AVX512F-32: # BB#0:
2578 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2579 ; AVX512F-32-NEXT: vpunpckhwd {{.*#+}} zmm2 = zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31]
2580 ; AVX512F-32-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
2581 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2582 ; AVX512F-32-NEXT: retl
2583 %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2584 %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2585 %res2 = add <32 x i16> %res, %res1
2586 ret <32 x i16> %res2
2589 declare <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2591 define <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2592 ; AVX512BW-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
2593 ; AVX512BW: ## BB#0:
2594 ; AVX512BW-NEXT: kmovd %edi, %k1
2595 ; AVX512BW-NEXT: vpunpcklwd {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27]
2596 ; AVX512BW-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
2597 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2598 ; AVX512BW-NEXT: retq
2600 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
2601 ; AVX512F-32: # BB#0:
2602 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2603 ; AVX512F-32-NEXT: vpunpcklwd {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27]
2604 ; AVX512F-32-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
2605 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
2606 ; AVX512F-32-NEXT: retl
2607 %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2608 %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2609 %res2 = add <32 x i16> %res, %res1
2610 ret <32 x i16> %res2
2613 declare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64)
2615 define <64 x i8>@test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) {
2616 ; AVX512BW-LABEL: test_int_x86_avx512_mask_palignr_512:
2617 ; AVX512BW: ## BB#0:
2618 ; AVX512BW-NEXT: kmovq %rdi, %k1
2619 ; AVX512BW-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1}
2620 ; AVX512BW-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
2621 ; AVX512BW-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0
2622 ; AVX512BW-NEXT: vpaddb %zmm3, %zmm2, %zmm1
2623 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
2624 ; AVX512BW-NEXT: retq
2626 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_palignr_512:
2627 ; AVX512F-32: # BB#0:
2628 ; AVX512F-32-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm3
2629 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2630 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2631 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
2632 ; AVX512F-32-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1}
2633 ; AVX512F-32-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0 {%k1} {z}
2634 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
2635 ; AVX512F-32-NEXT: vpaddb %zmm3, %zmm0, %zmm0
2636 ; AVX512F-32-NEXT: retl
2637 %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4)
2638 %res1 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4)
2639 %res2 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1)
2640 %res3 = add <64 x i8> %res, %res1
2641 %res4 = add <64 x i8> %res3, %res2
2645 declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32)
2647 define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) {
2648 ; AVX512BW-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
2649 ; AVX512BW: ## BB#0:
2650 ; AVX512BW-NEXT: kmovd %edi, %k1
2651 ; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1}
2652 ; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
2653 ; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0
2654 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2655 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2656 ; AVX512BW-NEXT: retq
2658 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
2659 ; AVX512F-32: # BB#0:
2660 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2661 ; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1}
2662 ; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
2663 ; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0
2664 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
2665 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
2666 ; AVX512F-32-NEXT: retl
2667 %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4)
2668 %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4)
2669 %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1)
2670 %res3 = add <32 x i16> %res, %res1
2671 %res4 = add <32 x i16> %res3, %res2
2672 ret <32 x i16> %res4
2675 declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32)
2677 define <8 x i64>@test_int_x86_avx512_mask_psll_dq_512(<8 x i64> %x0) {
2678 ; AVX512BW-LABEL: test_int_x86_avx512_mask_psll_dq_512:
2679 ; AVX512BW: ## BB#0:
2680 ; AVX512BW-NEXT: vpslldq $8, %zmm0, %zmm1
2681 ; AVX512BW-NEXT: vpslldq $4, %zmm0, %zmm0
2682 ; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0
2683 ; AVX512BW-NEXT: retq
2685 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_dq_512:
2686 ; AVX512F-32: # BB#0:
2687 ; AVX512F-32-NEXT: vpslldq $8, %zmm0, %zmm1
2688 ; AVX512F-32-NEXT: vpslldq $4, %zmm0, %zmm0
2689 ; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0
2690 ; AVX512F-32-NEXT: retl
2691 %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8)
2692 %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4)
2693 %res2 = add <8 x i64> %res, %res1
2697 declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32)
2699 define <8 x i64>@test_int_x86_avx512_mask_psrl_dq_512(<8 x i64> %x0) {
2700 ; AVX512BW-LABEL: test_int_x86_avx512_mask_psrl_dq_512:
2701 ; AVX512BW: ## BB#0:
2702 ; AVX512BW-NEXT: vpsrldq $8, %zmm0, %zmm1
2703 ; AVX512BW-NEXT: vpsrldq $4, %zmm0, %zmm0
2704 ; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0
2705 ; AVX512BW-NEXT: retq
2707 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_dq_512:
2708 ; AVX512F-32: # BB#0:
2709 ; AVX512F-32-NEXT: vpsrldq $8, %zmm0, %zmm1
2710 ; AVX512F-32-NEXT: vpsrldq $4, %zmm0, %zmm0
2711 ; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0
2712 ; AVX512F-32-NEXT: retl
2713 %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8)
2714 %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4)
2715 %res2 = add <8 x i64> %res, %res1
2718 declare <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>)
2720 define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){
2721 ; AVX512BW-LABEL: test_int_x86_avx512_mask_psadb_w_512:
2722 ; AVX512BW: ## BB#0:
2723 ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm1
2724 ; AVX512BW-NEXT: vpsadbw %zmm2, %zmm0, %zmm0
2725 ; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0
2726 ; AVX512BW-NEXT: retq
2728 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psadb_w_512:
2729 ; AVX512F-32: # BB#0:
2730 ; AVX512F-32-NEXT: vpsadbw %zmm1, %zmm0, %zmm1
2731 ; AVX512F-32-NEXT: vpsadbw %zmm2, %zmm0, %zmm0
2732 ; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0
2733 ; AVX512F-32-NEXT: retl
2734 %res = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1)
2735 %res1 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2)
2736 %res2 = add <8 x i64> %res, %res1
2740 declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
2742 define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
2743 ; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd:
2744 ; AVX512BW: ## BB#0:
2745 ; AVX512BW-NEXT: kmovd %edi, %k0
2746 ; AVX512BW-NEXT: kmovd %esi, %k1
2747 ; AVX512BW-NEXT: kunpckwd %k1, %k0, %k0
2748 ; AVX512BW-NEXT: kmovd %k0, %eax
2749 ; AVX512BW-NEXT: retq
2751 ; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd:
2752 ; AVX512F-32: # BB#0:
2753 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2754 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2755 ; AVX512F-32-NEXT: kunpckwd %k1, %k0, %k0
2756 ; AVX512F-32-NEXT: kmovd %k0, %eax
2757 ; AVX512F-32-NEXT: retl
2758 %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
2762 declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
2764 define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
2765 ; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd:
2766 ; AVX512BW: ## BB#0:
2767 ; AVX512BW-NEXT: kmovq %rdi, %k0
2768 ; AVX512BW-NEXT: kmovq %rsi, %k1
2769 ; AVX512BW-NEXT: kunpckdq %k1, %k0, %k0
2770 ; AVX512BW-NEXT: kmovq %k0, %rax
2771 ; AVX512BW-NEXT: retq
2773 ; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd:
2774 ; AVX512F-32: # BB#0:
2775 ; AVX512F-32-NEXT: subl $12, %esp
2776 ; AVX512F-32-NEXT: .Ltmp8:
2777 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
2778 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
2779 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
2780 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0
2781 ; AVX512F-32-NEXT: kmovq %k0, (%esp)
2782 ; AVX512F-32-NEXT: movl (%esp), %eax
2783 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
2784 ; AVX512F-32-NEXT: addl $12, %esp
2785 ; AVX512F-32-NEXT: retl
2786 %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1)