1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
3 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
5 define i16 @mask16(i16 %x) {
8 ; CHECK-NEXT: kmovw %edi, %k0
9 ; CHECK-NEXT: knotw %k0, %k0
10 ; CHECK-NEXT: kmovw %k0, %eax
12 %m0 = bitcast i16 %x to <16 x i1>
13 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
14 %ret = bitcast <16 x i1> %m1 to i16
18 define i8 @mask8(i8 %x) {
21 ; KNL-NEXT: movzbl %dil, %eax
22 ; KNL-NEXT: kmovw %eax, %k0
23 ; KNL-NEXT: knotw %k0, %k0
24 ; KNL-NEXT: kmovw %k0, %eax
29 ; SKX-NEXT: kmovb %edi, %k0
30 ; SKX-NEXT: knotb %k0, %k0
31 ; SKX-NEXT: kmovb %k0, %eax
33 %m0 = bitcast i8 %x to <8 x i1>
34 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
35 %ret = bitcast <8 x i1> %m1 to i8
39 define void @mask16_mem(i16* %ptr) {
40 ; CHECK-LABEL: mask16_mem:
42 ; CHECK-NEXT: kmovw (%rdi), %k0
43 ; CHECK-NEXT: knotw %k0, %k0
44 ; CHECK-NEXT: kmovw %k0, (%rdi)
46 %x = load i16, i16* %ptr, align 4
47 %m0 = bitcast i16 %x to <16 x i1>
48 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
49 %ret = bitcast <16 x i1> %m1 to i16
50 store i16 %ret, i16* %ptr, align 4
54 define void @mask8_mem(i8* %ptr) {
55 ; KNL-LABEL: mask8_mem:
57 ; KNL-NEXT: kmovw (%rdi), %k0
58 ; KNL-NEXT: knotw %k0, %k0
59 ; KNL-NEXT: kmovw %k0, (%rdi)
62 ; SKX-LABEL: mask8_mem:
64 ; SKX-NEXT: kmovb (%rdi), %k0
65 ; SKX-NEXT: knotb %k0, %k0
66 ; SKX-NEXT: kmovb %k0, (%rdi)
68 %x = load i8, i8* %ptr, align 4
69 %m0 = bitcast i8 %x to <8 x i1>
70 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
71 %ret = bitcast <8 x i1> %m1 to i8
72 store i8 %ret, i8* %ptr, align 4
76 define i16 @mand16(i16 %x, i16 %y) {
77 ; CHECK-LABEL: mand16:
79 ; CHECK-NEXT: kmovw %edi, %k0
80 ; CHECK-NEXT: kmovw %esi, %k1
81 ; CHECK-NEXT: kandw %k1, %k0, %k2
82 ; CHECK-NEXT: kxorw %k1, %k0, %k0
83 ; CHECK-NEXT: korw %k0, %k2, %k0
84 ; CHECK-NEXT: kmovw %k0, %eax
86 %ma = bitcast i16 %x to <16 x i1>
87 %mb = bitcast i16 %y to <16 x i1>
88 %mc = and <16 x i1> %ma, %mb
89 %md = xor <16 x i1> %ma, %mb
90 %me = or <16 x i1> %mc, %md
91 %ret = bitcast <16 x i1> %me to i16
95 define i8 @shuf_test1(i16 %v) nounwind {
96 ; KNL-LABEL: shuf_test1:
98 ; KNL-NEXT: kmovw %edi, %k0
99 ; KNL-NEXT: kshiftrw $8, %k0, %k0
100 ; KNL-NEXT: kmovw %k0, %eax
103 ; SKX-LABEL: shuf_test1:
105 ; SKX-NEXT: kmovw %edi, %k0
106 ; SKX-NEXT: kshiftrw $8, %k0, %k0
107 ; SKX-NEXT: kmovb %k0, %eax
109 %v1 = bitcast i16 %v to <16 x i1>
110 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
111 %mask1 = bitcast <8 x i1> %mask to i8
115 define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
116 ; CHECK-LABEL: zext_test1:
118 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
119 ; CHECK-NEXT: kshiftlw $10, %k0, %k0
120 ; CHECK-NEXT: kshiftrw $15, %k0, %k0
121 ; CHECK-NEXT: kmovw %k0, %eax
122 ; CHECK-NEXT: andl $1, %eax
124 %cmp_res = icmp ugt <16 x i32> %a, %b
125 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
126 %res = zext i1 %cmp_res.i1 to i32
128 }define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
129 %cmp_res = icmp ugt <16 x i32> %a, %b
130 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
131 %res = zext i1 %cmp_res.i1 to i16
133 }define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
134 %cmp_res = icmp ugt <16 x i32> %a, %b
135 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
136 %res = zext i1 %cmp_res.i1 to i8
140 define i8 @conv1(<8 x i1>* %R) {
142 ; KNL: ## BB#0: ## %entry
143 ; KNL-NEXT: kxnorw %k0, %k0, %k0
144 ; KNL-NEXT: kmovw %k0, %eax
145 ; KNL-NEXT: movb %al, (%rdi)
146 ; KNL-NEXT: movb $-2, -{{[0-9]+}}(%rsp)
147 ; KNL-NEXT: movb $-2, %al
151 ; SKX: ## BB#0: ## %entry
152 ; SKX-NEXT: kxnorw %k0, %k0, %k0
153 ; SKX-NEXT: kmovb %k0, (%rdi)
154 ; SKX-NEXT: movb $-2, -{{[0-9]+}}(%rsp)
155 ; SKX-NEXT: movb $-2, %al
158 store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R
160 %maskPtr = alloca <8 x i1>
161 store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr
162 %mask = load <8 x i1>, <8 x i1>* %maskPtr
163 %mask_convert = bitcast <8 x i1> %mask to i8
167 define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
170 ; KNL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
171 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
172 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0
173 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
174 ; KNL-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1
175 ; KNL-NEXT: vpmovqd %zmm1, %ymm1
176 ; KNL-NEXT: vpslld $31, %xmm1, %xmm1
177 ; KNL-NEXT: vpsrad $31, %xmm1, %xmm1
178 ; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
183 ; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0
184 ; SKX-NEXT: knotw %k0, %k1
185 ; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
186 ; SKX-NEXT: vpmovm2d %k0, %xmm0
188 %x_gt_y = icmp sgt <4 x i64> %x, %y
189 %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
190 %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1
191 %resse = sext <4 x i1>%res to <4 x i32>
195 define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
198 ; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
199 ; KNL-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1
200 ; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
205 ; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0
206 ; SKX-NEXT: knotw %k0, %k1
207 ; SKX-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1}
208 ; SKX-NEXT: vpmovm2q %k0, %xmm0
210 %x_gt_y = icmp slt <2 x i64> %x, %y
211 %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
212 %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
213 %resse = sext <2 x i1>%res to <2 x i64>
215 }define void @test6(<16 x i1> %mask) {
217 %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
218 %b = bitcast <16 x i1> %a to i16
219 %c = icmp eq i16 %b, 0
220 br i1 %c, label %true, label %false
228 define void @test7(<8 x i1> %mask) {
230 ; KNL: ## BB#0: ## %allocas
231 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
232 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
233 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
234 ; KNL-NEXT: movb $85, %al
235 ; KNL-NEXT: movzbl %al, %eax
236 ; KNL-NEXT: kmovw %eax, %k1
237 ; KNL-NEXT: korw %k1, %k0, %k0
238 ; KNL-NEXT: kmovw %k0, %eax
239 ; KNL-NEXT: testb %al, %al
243 ; SKX: ## BB#0: ## %allocas
244 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
245 ; SKX-NEXT: vpmovw2m %xmm0, %k0
246 ; SKX-NEXT: movb $85, %al
247 ; SKX-NEXT: kmovb %eax, %k1
248 ; SKX-NEXT: korb %k1, %k0, %k0
249 ; SKX-NEXT: kmovb %k0, %eax
250 ; SKX-NEXT: testb %al, %al
253 %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
254 %b = bitcast <8 x i1> %a to i8
255 %c = icmp eq i8 %b, 0
256 br i1 %c, label %true, label %false
264 define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
267 ; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2
268 ; KNL-NEXT: cmpl %esi, %edi
269 ; KNL-NEXT: jg LBB14_1
271 ; KNL-NEXT: vpcmpltud %zmm2, %zmm1, %k1
272 ; KNL-NEXT: jmp LBB14_3
274 ; KNL-NEXT: vpcmpgtd %zmm2, %zmm0, %k1
276 ; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
277 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
282 ; SKX-NEXT: vpxord %zmm2, %zmm2, %zmm2
283 ; SKX-NEXT: cmpl %esi, %edi
284 ; SKX-NEXT: jg LBB14_1
286 ; SKX-NEXT: vpcmpltud %zmm2, %zmm1, %k0
287 ; SKX-NEXT: vpmovm2b %k0, %xmm0
290 ; SKX-NEXT: vpcmpgtd %zmm2, %zmm0, %k0
291 ; SKX-NEXT: vpmovm2b %k0, %xmm0
293 %cond = icmp sgt i32 %a1, %b1
294 %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer
295 %cmp2 = icmp ult <16 x i32> %b, zeroinitializer
296 %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2
297 %res = sext <16 x i1> %mix to <16 x i8>
300 define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
303 ; KNL-NEXT: cmpl %esi, %edi
304 ; KNL-NEXT: jg LBB15_1
306 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
307 ; KNL-NEXT: jmp LBB15_3
309 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
311 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
312 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
313 ; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
314 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
319 ; SKX-NEXT: cmpl %esi, %edi
320 ; SKX-NEXT: jg LBB15_1
322 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm0
323 ; SKX-NEXT: jmp LBB15_3
325 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
327 ; SKX-NEXT: vpmovb2m %xmm0, %k0
328 ; SKX-NEXT: vpmovm2b %k0, %xmm0
330 %mask = icmp sgt i32 %a1, %b1
331 %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
333 }define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
334 %mask = icmp sgt i32 %a1, %b1
335 %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b
339 define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
342 ; KNL-NEXT: cmpl %esi, %edi
343 ; KNL-NEXT: jg LBB17_2
345 ; KNL-NEXT: vmovaps %zmm1, %zmm0
351 ; SKX-NEXT: cmpl %esi, %edi
352 ; SKX-NEXT: jg LBB17_1
354 ; SKX-NEXT: vpslld $31, %xmm1, %xmm0
355 ; SKX-NEXT: jmp LBB17_3
357 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
359 ; SKX-NEXT: vpmovd2m %xmm0, %k0
360 ; SKX-NEXT: vpmovm2d %k0, %xmm0
362 %mask = icmp sgt i32 %a1, %b1
363 %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
367 define i32 @test12(i32 %x, i32 %y) {
368 ; CHECK-LABEL: test12:
370 ; CHECK-NEXT: movl %edi, %eax
372 %a = bitcast i16 21845 to <16 x i1>
373 %b = extractelement <16 x i1> %a, i32 0
374 %c = select i1 %b, i32 %x, i32 %y
378 define i32 @test13(i32 %x, i32 %y) {
379 ; CHECK-LABEL: test13:
381 ; CHECK-NEXT: movl %esi, %eax
383 %a = bitcast i16 21845 to <16 x i1>
384 %b = extractelement <16 x i1> %a, i32 3
385 %c = select i1 %b, i32 %x, i32 %y
387 }define <4 x i1> @test14() {
388 %a = bitcast i16 21845 to <16 x i1>
389 %b = extractelement <16 x i1> %a, i32 2
390 %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
394 define <16 x i1> @test15(i32 %x, i32 %y) {
397 ; KNL-NEXT: cmpl %esi, %edi
398 ; KNL-NEXT: movw $21845, %ax ## imm = 0x5555
399 ; KNL-NEXT: movw $1, %cx
400 ; KNL-NEXT: cmovgw %ax, %cx
401 ; KNL-NEXT: kmovw %ecx, %k1
402 ; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
403 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
408 ; SKX-NEXT: cmpl %esi, %edi
409 ; SKX-NEXT: movw $21845, %ax ## imm = 0x5555
410 ; SKX-NEXT: movw $1, %cx
411 ; SKX-NEXT: cmovgw %ax, %cx
412 ; SKX-NEXT: kmovw %ecx, %k0
413 ; SKX-NEXT: vpmovm2b %k0, %xmm0
415 %a = bitcast i16 21845 to <16 x i1>
416 %b = bitcast i16 1 to <16 x i1>
417 %mask = icmp sgt i32 %x, %y
418 %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b
422 define <64 x i8> @test16(i64 %x) {
425 ; KNL-NEXT: pushq %rbp
427 ; KNL-NEXT: .cfi_def_cfa_offset 16
429 ; KNL-NEXT: .cfi_offset %rbp, -16
430 ; KNL-NEXT: movq %rsp, %rbp
432 ; KNL-NEXT: .cfi_def_cfa_register %rbp
433 ; KNL-NEXT: pushq %r15
434 ; KNL-NEXT: pushq %r14
435 ; KNL-NEXT: pushq %r13
436 ; KNL-NEXT: pushq %r12
437 ; KNL-NEXT: pushq %rbx
438 ; KNL-NEXT: andq $-32, %rsp
439 ; KNL-NEXT: subq $128, %rsp
441 ; KNL-NEXT: .cfi_offset %rbx, -56
443 ; KNL-NEXT: .cfi_offset %r12, -48
445 ; KNL-NEXT: .cfi_offset %r13, -40
447 ; KNL-NEXT: .cfi_offset %r14, -32
449 ; KNL-NEXT: .cfi_offset %r15, -24
450 ; KNL-NEXT: movq %rdi, %rax
451 ; KNL-NEXT: shrq $32, %rax
452 ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp)
453 ; KNL-NEXT: movl $271, %eax ## imm = 0x10F
454 ; KNL-NEXT: bextrl %eax, %edi, %eax
455 ; KNL-NEXT: movl %edi, %ecx
456 ; KNL-NEXT: andl $1, %ecx
457 ; KNL-NEXT: vmovd %ecx, %xmm0
458 ; KNL-NEXT: movl $257, %ecx ## imm = 0x101
459 ; KNL-NEXT: bextrl %ecx, %edi, %ecx
460 ; KNL-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
461 ; KNL-NEXT: movl $258, %ecx ## imm = 0x102
462 ; KNL-NEXT: bextrl %ecx, %edi, %ecx
463 ; KNL-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
464 ; KNL-NEXT: movl $259, %ecx ## imm = 0x103
465 ; KNL-NEXT: bextrl %ecx, %edi, %ecx
466 ; KNL-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
467 ; KNL-NEXT: movl $260, %ecx ## imm = 0x104
468 ; KNL-NEXT: bextrl %ecx, %edi, %ecx
469 ; KNL-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
470 ; KNL-NEXT: movl $261, %ecx ## imm = 0x105
471 ; KNL-NEXT: bextrl %ecx, %edi, %ecx
472 ; KNL-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
473 ; KNL-NEXT: movl $262, %ecx ## imm = 0x106
474 ; KNL-NEXT: bextrl %ecx, %edi, %ecx
475 ; KNL-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
476 ; KNL-NEXT: movl $263, %ecx ## imm = 0x107
477 ; KNL-NEXT: bextrl %ecx, %edi, %ecx
478 ; KNL-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
479 ; KNL-NEXT: movl $264, %ecx ## imm = 0x108
480 ; KNL-NEXT: bextrl %ecx, %edi, %ecx
481 ; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
482 ; KNL-NEXT: movl $265, %ecx ## imm = 0x109
483 ; KNL-NEXT: bextrl %ecx, %edi, %ecx
484 ; KNL-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
485 ; KNL-NEXT: movl $266, %ecx ## imm = 0x10A
486 ; KNL-NEXT: bextrl %ecx, %edi, %ecx
487 ; KNL-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
488 ; KNL-NEXT: movl $267, %ecx ## imm = 0x10B
489 ; KNL-NEXT: bextrl %ecx, %edi, %ecx
490 ; KNL-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
491 ; KNL-NEXT: movl $268, %ecx ## imm = 0x10C
492 ; KNL-NEXT: bextrl %ecx, %edi, %ecx
493 ; KNL-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
494 ; KNL-NEXT: movl $269, %ecx ## imm = 0x10D
495 ; KNL-NEXT: bextrl %ecx, %edi, %ecx
496 ; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
497 ; KNL-NEXT: movl $270, %ecx ## imm = 0x10E
498 ; KNL-NEXT: bextrl %ecx, %edi, %ecx
499 ; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
500 ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1
501 ; KNL-NEXT: movl $1, %eax
502 ; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm0
503 ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %r15d
504 ; KNL-NEXT: movq %r15, %rdx
505 ; KNL-NEXT: shrq $17, %rdx
506 ; KNL-NEXT: andb $1, %dl
507 ; KNL-NEXT: je LBB22_2
509 ; KNL-NEXT: movb $-1, %dl
511 ; KNL-NEXT: movq %r15, %r11
512 ; KNL-NEXT: shrq $16, %r11
513 ; KNL-NEXT: andb $1, %r11b
514 ; KNL-NEXT: je LBB22_4
516 ; KNL-NEXT: movb $-1, %r11b
518 ; KNL-NEXT: movq %r15, %r10
519 ; KNL-NEXT: shrq $18, %r10
520 ; KNL-NEXT: andb $1, %r10b
521 ; KNL-NEXT: je LBB22_6
523 ; KNL-NEXT: movb $-1, %r10b
525 ; KNL-NEXT: movq %r15, %r9
526 ; KNL-NEXT: shrq $19, %r9
527 ; KNL-NEXT: andb $1, %r9b
528 ; KNL-NEXT: je LBB22_8
530 ; KNL-NEXT: movb $-1, %r9b
532 ; KNL-NEXT: movq %r15, %rbx
533 ; KNL-NEXT: shrq $20, %rbx
534 ; KNL-NEXT: andb $1, %bl
535 ; KNL-NEXT: je LBB22_10
537 ; KNL-NEXT: movb $-1, %bl
538 ; KNL-NEXT: LBB22_10:
539 ; KNL-NEXT: movq %r15, %r12
540 ; KNL-NEXT: shrq $21, %r12
541 ; KNL-NEXT: andb $1, %r12b
542 ; KNL-NEXT: je LBB22_12
543 ; KNL-NEXT: ## BB#11:
544 ; KNL-NEXT: movb $-1, %r12b
545 ; KNL-NEXT: LBB22_12:
546 ; KNL-NEXT: movq %r15, %r14
547 ; KNL-NEXT: shrq $22, %r14
548 ; KNL-NEXT: andb $1, %r14b
549 ; KNL-NEXT: je LBB22_14
550 ; KNL-NEXT: ## BB#13:
551 ; KNL-NEXT: movb $-1, %r14b
552 ; KNL-NEXT: LBB22_14:
553 ; KNL-NEXT: movq %r15, %r8
554 ; KNL-NEXT: shrq $23, %r8
555 ; KNL-NEXT: andb $1, %r8b
556 ; KNL-NEXT: je LBB22_16
557 ; KNL-NEXT: ## BB#15:
558 ; KNL-NEXT: movb $-1, %r8b
559 ; KNL-NEXT: LBB22_16:
560 ; KNL-NEXT: movq %r15, %r13
561 ; KNL-NEXT: shrq $24, %r13
562 ; KNL-NEXT: andb $1, %r13b
563 ; KNL-NEXT: je LBB22_18
564 ; KNL-NEXT: ## BB#17:
565 ; KNL-NEXT: movb $-1, %r13b
566 ; KNL-NEXT: LBB22_18:
567 ; KNL-NEXT: movq %r15, %rax
568 ; KNL-NEXT: shrq $25, %rax
569 ; KNL-NEXT: andb $1, %al
570 ; KNL-NEXT: je LBB22_20
571 ; KNL-NEXT: ## BB#19:
572 ; KNL-NEXT: movb $-1, %al
573 ; KNL-NEXT: LBB22_20:
574 ; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
575 ; KNL-NEXT: movq %r15, %rax
576 ; KNL-NEXT: shrq $26, %rax
577 ; KNL-NEXT: andb $1, %al
578 ; KNL-NEXT: je LBB22_22
579 ; KNL-NEXT: ## BB#21:
580 ; KNL-NEXT: movb $-1, %al
581 ; KNL-NEXT: LBB22_22:
582 ; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
583 ; KNL-NEXT: movl $272, %esi ## imm = 0x110
584 ; KNL-NEXT: movq %r15, %rax
585 ; KNL-NEXT: shrq $27, %rax
586 ; KNL-NEXT: andb $1, %al
587 ; KNL-NEXT: je LBB22_24
588 ; KNL-NEXT: ## BB#23:
589 ; KNL-NEXT: movb $-1, %al
590 ; KNL-NEXT: LBB22_24:
591 ; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
592 ; KNL-NEXT: movl $273, %eax ## imm = 0x111
593 ; KNL-NEXT: bextrl %esi, %edi, %esi
594 ; KNL-NEXT: movq %r15, %rcx
595 ; KNL-NEXT: shrq $28, %rcx
596 ; KNL-NEXT: andb $1, %cl
597 ; KNL-NEXT: je LBB22_26
598 ; KNL-NEXT: ## BB#25:
599 ; KNL-NEXT: movb $-1, %cl
600 ; KNL-NEXT: LBB22_26:
601 ; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
602 ; KNL-NEXT: bextrl %eax, %edi, %eax
603 ; KNL-NEXT: vmovd %esi, %xmm2
604 ; KNL-NEXT: movl $274, %esi ## imm = 0x112
605 ; KNL-NEXT: movq %r15, %rcx
606 ; KNL-NEXT: shrq $29, %rcx
607 ; KNL-NEXT: andb $1, %cl
608 ; KNL-NEXT: je LBB22_28
609 ; KNL-NEXT: ## BB#27:
610 ; KNL-NEXT: movb $-1, %cl
611 ; KNL-NEXT: LBB22_28:
612 ; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
613 ; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
614 ; KNL-NEXT: bextrl %esi, %edi, %eax
615 ; KNL-NEXT: movzbl %r11b, %esi
616 ; KNL-NEXT: movq %r15, %rcx
617 ; KNL-NEXT: shrq $30, %rcx
618 ; KNL-NEXT: andb $1, %cl
619 ; KNL-NEXT: je LBB22_30
620 ; KNL-NEXT: ## BB#29:
621 ; KNL-NEXT: movb $-1, %cl
622 ; KNL-NEXT: LBB22_30:
623 ; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
624 ; KNL-NEXT: movl $275, %eax ## imm = 0x113
625 ; KNL-NEXT: bextrl %eax, %edi, %r11d
626 ; KNL-NEXT: movzbl %dl, %edx
627 ; KNL-NEXT: vmovd %esi, %xmm3
628 ; KNL-NEXT: movq %r15, %rax
629 ; KNL-NEXT: shrq $31, %rax
630 ; KNL-NEXT: andb $1, %al
631 ; KNL-NEXT: je LBB22_32
632 ; KNL-NEXT: ## BB#31:
633 ; KNL-NEXT: movb $-1, %al
634 ; KNL-NEXT: LBB22_32:
635 ; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
636 ; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
637 ; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2
638 ; KNL-NEXT: movl $276, %eax ## imm = 0x114
639 ; KNL-NEXT: bextrl %eax, %edi, %esi
640 ; KNL-NEXT: movl $277, %r11d ## imm = 0x115
641 ; KNL-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3
642 ; KNL-NEXT: movzbl %r10b, %r10d
643 ; KNL-NEXT: movb %r15b, %al
645 ; KNL-NEXT: andb $1, %al
646 ; KNL-NEXT: je LBB22_34
647 ; KNL-NEXT: ## BB#33:
648 ; KNL-NEXT: movb $-1, %al
649 ; KNL-NEXT: LBB22_34:
650 ; KNL-NEXT: vpinsrb $4, %esi, %xmm2, %xmm2
651 ; KNL-NEXT: bextrl %r11d, %edi, %edx
652 ; KNL-NEXT: movl $278, %r11d ## imm = 0x116
653 ; KNL-NEXT: vpinsrb $2, %r10d, %xmm3, %xmm3
654 ; KNL-NEXT: movzbl %r9b, %esi
655 ; KNL-NEXT: movzbl %al, %eax
656 ; KNL-NEXT: movq %r15, %rcx
657 ; KNL-NEXT: shlq $63, %rcx
658 ; KNL-NEXT: sarq $63, %rcx
659 ; KNL-NEXT: vmovd %ecx, %xmm4
660 ; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
661 ; KNL-NEXT: movb %r15b, %al
662 ; KNL-NEXT: shrb $2, %al
663 ; KNL-NEXT: andb $1, %al
664 ; KNL-NEXT: je LBB22_36
665 ; KNL-NEXT: ## BB#35:
666 ; KNL-NEXT: movb $-1, %al
667 ; KNL-NEXT: LBB22_36:
668 ; KNL-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2
669 ; KNL-NEXT: bextrl %r11d, %edi, %edx
670 ; KNL-NEXT: movl $279, %r9d ## imm = 0x117
671 ; KNL-NEXT: vpinsrb $3, %esi, %xmm3, %xmm3
672 ; KNL-NEXT: movzbl %bl, %ebx
673 ; KNL-NEXT: movzbl %al, %eax
674 ; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
675 ; KNL-NEXT: movb %r15b, %al
676 ; KNL-NEXT: shrb $3, %al
677 ; KNL-NEXT: andb $1, %al
678 ; KNL-NEXT: je LBB22_38
679 ; KNL-NEXT: ## BB#37:
680 ; KNL-NEXT: movb $-1, %al
681 ; KNL-NEXT: LBB22_38:
682 ; KNL-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
683 ; KNL-NEXT: bextrl %r9d, %edi, %edx
684 ; KNL-NEXT: movl $280, %esi ## imm = 0x118
685 ; KNL-NEXT: vpinsrb $4, %ebx, %xmm3, %xmm3
686 ; KNL-NEXT: movzbl %r12b, %ebx
687 ; KNL-NEXT: movzbl %al, %eax
688 ; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
689 ; KNL-NEXT: movb %r15b, %al
690 ; KNL-NEXT: shrb $4, %al
691 ; KNL-NEXT: andb $1, %al
692 ; KNL-NEXT: je LBB22_40
693 ; KNL-NEXT: ## BB#39:
694 ; KNL-NEXT: movb $-1, %al
695 ; KNL-NEXT: LBB22_40:
696 ; KNL-NEXT: vpinsrb $7, %edx, %xmm2, %xmm2
697 ; KNL-NEXT: bextrl %esi, %edi, %ecx
698 ; KNL-NEXT: movl $281, %edx ## imm = 0x119
699 ; KNL-NEXT: vpinsrb $5, %ebx, %xmm3, %xmm3
700 ; KNL-NEXT: movzbl %r14b, %esi
701 ; KNL-NEXT: movzbl %al, %eax
702 ; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
703 ; KNL-NEXT: movb %r15b, %al
704 ; KNL-NEXT: shrb $5, %al
705 ; KNL-NEXT: andb $1, %al
706 ; KNL-NEXT: je LBB22_42
707 ; KNL-NEXT: ## BB#41:
708 ; KNL-NEXT: movb $-1, %al
709 ; KNL-NEXT: LBB22_42:
710 ; KNL-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2
711 ; KNL-NEXT: bextrl %edx, %edi, %ecx
712 ; KNL-NEXT: movl $282, %edx ## imm = 0x11A
713 ; KNL-NEXT: vpinsrb $6, %esi, %xmm3, %xmm3
714 ; KNL-NEXT: movzbl %r8b, %esi
715 ; KNL-NEXT: movzbl %al, %eax
716 ; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
717 ; KNL-NEXT: movb %r15b, %bl
718 ; KNL-NEXT: shrb $6, %bl
719 ; KNL-NEXT: andb $1, %bl
720 ; KNL-NEXT: je LBB22_44
721 ; KNL-NEXT: ## BB#43:
722 ; KNL-NEXT: movb $-1, %bl
723 ; KNL-NEXT: LBB22_44:
724 ; KNL-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2
725 ; KNL-NEXT: bextrl %edx, %edi, %eax
726 ; KNL-NEXT: movl $283, %ecx ## imm = 0x11B
727 ; KNL-NEXT: vpinsrb $7, %esi, %xmm3, %xmm3
728 ; KNL-NEXT: movzbl %r13b, %esi
729 ; KNL-NEXT: movzbl %bl, %edx
730 ; KNL-NEXT: vpinsrb $6, %edx, %xmm4, %xmm4
731 ; KNL-NEXT: movb %r15b, %bl
732 ; KNL-NEXT: shrb $7, %bl
733 ; KNL-NEXT: je LBB22_46
734 ; KNL-NEXT: ## BB#45:
735 ; KNL-NEXT: movb $-1, %bl
736 ; KNL-NEXT: LBB22_46:
737 ; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
738 ; KNL-NEXT: bextrl %ecx, %edi, %ecx
739 ; KNL-NEXT: movl $284, %edx ## imm = 0x11C
740 ; KNL-NEXT: vpinsrb $8, %esi, %xmm3, %xmm3
741 ; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rax ## 8-byte Reload
742 ; KNL-NEXT: movzbl %al, %esi
743 ; KNL-NEXT: movzbl %bl, %eax
744 ; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
745 ; KNL-NEXT: movq %r15, %rax
746 ; KNL-NEXT: shrq $8, %rax
747 ; KNL-NEXT: andb $1, %al
748 ; KNL-NEXT: je LBB22_48
749 ; KNL-NEXT: ## BB#47:
750 ; KNL-NEXT: movb $-1, %al
751 ; KNL-NEXT: LBB22_48:
752 ; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
753 ; KNL-NEXT: bextrl %edx, %edi, %ecx
754 ; KNL-NEXT: movl $285, %edx ## imm = 0x11D
755 ; KNL-NEXT: vpinsrb $9, %esi, %xmm3, %xmm3
756 ; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload
757 ; KNL-NEXT: movzbl %sil, %esi
758 ; KNL-NEXT: movzbl %al, %eax
759 ; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
760 ; KNL-NEXT: movq %r15, %rax
761 ; KNL-NEXT: shrq $9, %rax
762 ; KNL-NEXT: andb $1, %al
763 ; KNL-NEXT: je LBB22_50
764 ; KNL-NEXT: ## BB#49:
765 ; KNL-NEXT: movb $-1, %al
766 ; KNL-NEXT: LBB22_50:
767 ; KNL-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2
768 ; KNL-NEXT: bextrl %edx, %edi, %ecx
769 ; KNL-NEXT: movl $286, %edx ## imm = 0x11E
770 ; KNL-NEXT: vpinsrb $10, %esi, %xmm3, %xmm3
771 ; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload
772 ; KNL-NEXT: movzbl %sil, %esi
773 ; KNL-NEXT: movzbl %al, %eax
774 ; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
775 ; KNL-NEXT: movq %r15, %rax
776 ; KNL-NEXT: shrq $10, %rax
777 ; KNL-NEXT: andb $1, %al
778 ; KNL-NEXT: je LBB22_52
779 ; KNL-NEXT: ## BB#51:
780 ; KNL-NEXT: movb $-1, %al
781 ; KNL-NEXT: LBB22_52:
782 ; KNL-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2
783 ; KNL-NEXT: bextrl %edx, %edi, %edx
784 ; KNL-NEXT: vpinsrb $11, %esi, %xmm3, %xmm3
785 ; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
786 ; KNL-NEXT: movzbl %cl, %ecx
787 ; KNL-NEXT: movzbl %al, %eax
788 ; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
789 ; KNL-NEXT: movq %r15, %rax
790 ; KNL-NEXT: shrq $11, %rax
791 ; KNL-NEXT: andb $1, %al
792 ; KNL-NEXT: je LBB22_54
793 ; KNL-NEXT: ## BB#53:
794 ; KNL-NEXT: movb $-1, %al
795 ; KNL-NEXT: LBB22_54:
796 ; KNL-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2
797 ; KNL-NEXT: shrl $31, %edi
798 ; KNL-NEXT: vpinsrb $12, %ecx, %xmm3, %xmm3
799 ; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
800 ; KNL-NEXT: movzbl %cl, %ecx
801 ; KNL-NEXT: movzbl %al, %eax
802 ; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
803 ; KNL-NEXT: movq %r15, %rax
804 ; KNL-NEXT: shrq $12, %rax
805 ; KNL-NEXT: andb $1, %al
806 ; KNL-NEXT: je LBB22_56
807 ; KNL-NEXT: ## BB#55:
808 ; KNL-NEXT: movb $-1, %al
809 ; KNL-NEXT: LBB22_56:
810 ; KNL-NEXT: vpinsrb $15, %edi, %xmm2, %xmm2
811 ; KNL-NEXT: vpinsrb $13, %ecx, %xmm3, %xmm3
812 ; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
813 ; KNL-NEXT: movzbl %cl, %ecx
814 ; KNL-NEXT: movzbl %al, %eax
815 ; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
816 ; KNL-NEXT: movq %r15, %rax
817 ; KNL-NEXT: shrq $13, %rax
818 ; KNL-NEXT: andb $1, %al
819 ; KNL-NEXT: je LBB22_58
820 ; KNL-NEXT: ## BB#57:
821 ; KNL-NEXT: movb $-1, %al
822 ; KNL-NEXT: LBB22_58:
823 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
824 ; KNL-NEXT: vpinsrb $14, %ecx, %xmm3, %xmm2
825 ; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
826 ; KNL-NEXT: movzbl %cl, %ecx
827 ; KNL-NEXT: movzbl %al, %eax
828 ; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm3
829 ; KNL-NEXT: movq %r15, %rax
830 ; KNL-NEXT: shrq $14, %rax
831 ; KNL-NEXT: andb $1, %al
832 ; KNL-NEXT: je LBB22_60
833 ; KNL-NEXT: ## BB#59:
834 ; KNL-NEXT: movb $-1, %al
835 ; KNL-NEXT: LBB22_60:
836 ; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
837 ; KNL-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm1
838 ; KNL-NEXT: movzbl %al, %eax
839 ; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm2
840 ; KNL-NEXT: shrq $15, %r15
841 ; KNL-NEXT: andb $1, %r15b
842 ; KNL-NEXT: je LBB22_62
843 ; KNL-NEXT: ## BB#61:
844 ; KNL-NEXT: movb $-1, %r15b
845 ; KNL-NEXT: LBB22_62:
846 ; KNL-NEXT: movzbl %r15b, %eax
847 ; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
848 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
849 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
850 ; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
851 ; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
852 ; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
853 ; KNL-NEXT: leaq -40(%rbp), %rsp
854 ; KNL-NEXT: popq %rbx
855 ; KNL-NEXT: popq %r12
856 ; KNL-NEXT: popq %r13
857 ; KNL-NEXT: popq %r14
858 ; KNL-NEXT: popq %r15
859 ; KNL-NEXT: popq %rbp
864 ; SKX-NEXT: kmovq %rdi, %k0
865 ; SKX-NEXT: kxnorw %k0, %k0, %k1
866 ; SKX-NEXT: kshiftrw $15, %k1, %k1
867 ; SKX-NEXT: kshiftlq $5, %k1, %k1
868 ; SKX-NEXT: korq %k1, %k0, %k0
869 ; SKX-NEXT: vpmovm2b %k0, %zmm0
871 %a = bitcast i64 %x to <64 x i1>
872 %b = insertelement <64 x i1>%a, i1 true, i32 5
873 %c = sext <64 x i1>%b to <64 x i8>
877 define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
880 ; KNL-NEXT: pushq %rbp
882 ; KNL-NEXT: .cfi_def_cfa_offset 16
884 ; KNL-NEXT: .cfi_offset %rbp, -16
885 ; KNL-NEXT: movq %rsp, %rbp
887 ; KNL-NEXT: .cfi_def_cfa_register %rbp
888 ; KNL-NEXT: pushq %r15
889 ; KNL-NEXT: pushq %r14
890 ; KNL-NEXT: pushq %r13
891 ; KNL-NEXT: pushq %r12
892 ; KNL-NEXT: pushq %rbx
893 ; KNL-NEXT: andq $-32, %rsp
894 ; KNL-NEXT: subq $128, %rsp
896 ; KNL-NEXT: .cfi_offset %rbx, -56
898 ; KNL-NEXT: .cfi_offset %r12, -48
900 ; KNL-NEXT: .cfi_offset %r13, -40
902 ; KNL-NEXT: .cfi_offset %r14, -32
904 ; KNL-NEXT: .cfi_offset %r15, -24
905 ; KNL-NEXT: movq %rdi, %rax
906 ; KNL-NEXT: shrq $32, %rax
907 ; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp)
908 ; KNL-NEXT: movl %edi, %eax
909 ; KNL-NEXT: andl $1, %eax
910 ; KNL-NEXT: vmovd %eax, %xmm0
911 ; KNL-NEXT: movl $257, %eax ## imm = 0x101
912 ; KNL-NEXT: bextrl %eax, %edi, %eax
913 ; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
914 ; KNL-NEXT: movl $258, %eax ## imm = 0x102
915 ; KNL-NEXT: bextrl %eax, %edi, %eax
916 ; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
917 ; KNL-NEXT: movl $259, %eax ## imm = 0x103
918 ; KNL-NEXT: bextrl %eax, %edi, %eax
919 ; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
920 ; KNL-NEXT: movl $260, %eax ## imm = 0x104
921 ; KNL-NEXT: bextrl %eax, %edi, %eax
922 ; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
923 ; KNL-NEXT: movl $261, %eax ## imm = 0x105
924 ; KNL-NEXT: bextrl %eax, %edi, %eax
925 ; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
926 ; KNL-NEXT: movl $262, %eax ## imm = 0x106
927 ; KNL-NEXT: bextrl %eax, %edi, %eax
928 ; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
929 ; KNL-NEXT: movl $263, %eax ## imm = 0x107
930 ; KNL-NEXT: bextrl %eax, %edi, %eax
931 ; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
932 ; KNL-NEXT: movl $264, %eax ## imm = 0x108
933 ; KNL-NEXT: bextrl %eax, %edi, %eax
934 ; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
935 ; KNL-NEXT: movl $265, %eax ## imm = 0x109
936 ; KNL-NEXT: bextrl %eax, %edi, %eax
937 ; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
938 ; KNL-NEXT: movl $266, %eax ## imm = 0x10A
939 ; KNL-NEXT: bextrl %eax, %edi, %eax
940 ; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
941 ; KNL-NEXT: movl $267, %eax ## imm = 0x10B
942 ; KNL-NEXT: bextrl %eax, %edi, %eax
943 ; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
944 ; KNL-NEXT: movl $268, %eax ## imm = 0x10C
945 ; KNL-NEXT: bextrl %eax, %edi, %eax
946 ; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
947 ; KNL-NEXT: movl $269, %eax ## imm = 0x10D
948 ; KNL-NEXT: bextrl %eax, %edi, %eax
949 ; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
950 ; KNL-NEXT: movl $270, %eax ## imm = 0x10E
951 ; KNL-NEXT: bextrl %eax, %edi, %eax
952 ; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
953 ; KNL-NEXT: movl $271, %eax ## imm = 0x10F
954 ; KNL-NEXT: bextrl %eax, %edi, %eax
955 ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1
956 ; KNL-NEXT: cmpl %edx, %esi
958 ; KNL-NEXT: movzbl %al, %eax
959 ; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm0
960 ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %r15d
961 ; KNL-NEXT: movq %r15, %rdx
962 ; KNL-NEXT: shrq $17, %rdx
963 ; KNL-NEXT: andb $1, %dl
964 ; KNL-NEXT: je LBB23_2
966 ; KNL-NEXT: movb $-1, %dl
968 ; KNL-NEXT: movq %r15, %r11
969 ; KNL-NEXT: shrq $16, %r11
970 ; KNL-NEXT: andb $1, %r11b
971 ; KNL-NEXT: je LBB23_4
973 ; KNL-NEXT: movb $-1, %r11b
975 ; KNL-NEXT: movq %r15, %r10
976 ; KNL-NEXT: shrq $18, %r10
977 ; KNL-NEXT: andb $1, %r10b
978 ; KNL-NEXT: je LBB23_6
980 ; KNL-NEXT: movb $-1, %r10b
982 ; KNL-NEXT: movq %r15, %r9
983 ; KNL-NEXT: shrq $19, %r9
984 ; KNL-NEXT: andb $1, %r9b
985 ; KNL-NEXT: je LBB23_8
987 ; KNL-NEXT: movb $-1, %r9b
989 ; KNL-NEXT: movq %r15, %rbx
990 ; KNL-NEXT: shrq $20, %rbx
991 ; KNL-NEXT: andb $1, %bl
992 ; KNL-NEXT: je LBB23_10
994 ; KNL-NEXT: movb $-1, %bl
995 ; KNL-NEXT: LBB23_10:
996 ; KNL-NEXT: movq %r15, %r12
997 ; KNL-NEXT: shrq $21, %r12
998 ; KNL-NEXT: andb $1, %r12b
999 ; KNL-NEXT: je LBB23_12
1000 ; KNL-NEXT: ## BB#11:
1001 ; KNL-NEXT: movb $-1, %r12b
1002 ; KNL-NEXT: LBB23_12:
1003 ; KNL-NEXT: movq %r15, %r14
1004 ; KNL-NEXT: shrq $22, %r14
1005 ; KNL-NEXT: andb $1, %r14b
1006 ; KNL-NEXT: je LBB23_14
1007 ; KNL-NEXT: ## BB#13:
1008 ; KNL-NEXT: movb $-1, %r14b
1009 ; KNL-NEXT: LBB23_14:
1010 ; KNL-NEXT: movq %r15, %r8
1011 ; KNL-NEXT: shrq $23, %r8
1012 ; KNL-NEXT: andb $1, %r8b
1013 ; KNL-NEXT: je LBB23_16
1014 ; KNL-NEXT: ## BB#15:
1015 ; KNL-NEXT: movb $-1, %r8b
1016 ; KNL-NEXT: LBB23_16:
1017 ; KNL-NEXT: movq %r15, %r13
1018 ; KNL-NEXT: shrq $24, %r13
1019 ; KNL-NEXT: andb $1, %r13b
1020 ; KNL-NEXT: je LBB23_18
1021 ; KNL-NEXT: ## BB#17:
1022 ; KNL-NEXT: movb $-1, %r13b
1023 ; KNL-NEXT: LBB23_18:
1024 ; KNL-NEXT: movq %r15, %rax
1025 ; KNL-NEXT: shrq $25, %rax
1026 ; KNL-NEXT: andb $1, %al
1027 ; KNL-NEXT: je LBB23_20
1028 ; KNL-NEXT: ## BB#19:
1029 ; KNL-NEXT: movb $-1, %al
1030 ; KNL-NEXT: LBB23_20:
1031 ; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
1032 ; KNL-NEXT: movq %r15, %rax
1033 ; KNL-NEXT: shrq $26, %rax
1034 ; KNL-NEXT: andb $1, %al
1035 ; KNL-NEXT: je LBB23_22
1036 ; KNL-NEXT: ## BB#21:
1037 ; KNL-NEXT: movb $-1, %al
1038 ; KNL-NEXT: LBB23_22:
1039 ; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
1040 ; KNL-NEXT: movl $272, %esi ## imm = 0x110
1041 ; KNL-NEXT: movq %r15, %rax
1042 ; KNL-NEXT: shrq $27, %rax
1043 ; KNL-NEXT: andb $1, %al
1044 ; KNL-NEXT: je LBB23_24
1045 ; KNL-NEXT: ## BB#23:
1046 ; KNL-NEXT: movb $-1, %al
1047 ; KNL-NEXT: LBB23_24:
1048 ; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
1049 ; KNL-NEXT: movl $273, %eax ## imm = 0x111
1050 ; KNL-NEXT: bextrl %esi, %edi, %esi
1051 ; KNL-NEXT: movq %r15, %rcx
1052 ; KNL-NEXT: shrq $28, %rcx
1053 ; KNL-NEXT: andb $1, %cl
1054 ; KNL-NEXT: je LBB23_26
1055 ; KNL-NEXT: ## BB#25:
1056 ; KNL-NEXT: movb $-1, %cl
1057 ; KNL-NEXT: LBB23_26:
1058 ; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
1059 ; KNL-NEXT: bextrl %eax, %edi, %eax
1060 ; KNL-NEXT: vmovd %esi, %xmm2
1061 ; KNL-NEXT: movl $274, %esi ## imm = 0x112
1062 ; KNL-NEXT: movq %r15, %rcx
1063 ; KNL-NEXT: shrq $29, %rcx
1064 ; KNL-NEXT: andb $1, %cl
1065 ; KNL-NEXT: je LBB23_28
1066 ; KNL-NEXT: ## BB#27:
1067 ; KNL-NEXT: movb $-1, %cl
1068 ; KNL-NEXT: LBB23_28:
1069 ; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
1070 ; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
1071 ; KNL-NEXT: bextrl %esi, %edi, %eax
1072 ; KNL-NEXT: movzbl %r11b, %esi
1073 ; KNL-NEXT: movq %r15, %rcx
1074 ; KNL-NEXT: shrq $30, %rcx
1075 ; KNL-NEXT: andb $1, %cl
1076 ; KNL-NEXT: je LBB23_30
1077 ; KNL-NEXT: ## BB#29:
1078 ; KNL-NEXT: movb $-1, %cl
1079 ; KNL-NEXT: LBB23_30:
1080 ; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
1081 ; KNL-NEXT: movl $275, %eax ## imm = 0x113
1082 ; KNL-NEXT: bextrl %eax, %edi, %r11d
1083 ; KNL-NEXT: movzbl %dl, %edx
1084 ; KNL-NEXT: vmovd %esi, %xmm3
1085 ; KNL-NEXT: movq %r15, %rax
1086 ; KNL-NEXT: shrq $31, %rax
1087 ; KNL-NEXT: andb $1, %al
1088 ; KNL-NEXT: je LBB23_32
1089 ; KNL-NEXT: ## BB#31:
1090 ; KNL-NEXT: movb $-1, %al
1091 ; KNL-NEXT: LBB23_32:
1092 ; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
1093 ; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
1094 ; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2
1095 ; KNL-NEXT: movl $276, %eax ## imm = 0x114
1096 ; KNL-NEXT: bextrl %eax, %edi, %esi
1097 ; KNL-NEXT: movl $277, %r11d ## imm = 0x115
1098 ; KNL-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3
1099 ; KNL-NEXT: movzbl %r10b, %r10d
1100 ; KNL-NEXT: movb %r15b, %al
1101 ; KNL-NEXT: shrb %al
1102 ; KNL-NEXT: andb $1, %al
1103 ; KNL-NEXT: je LBB23_34
1104 ; KNL-NEXT: ## BB#33:
1105 ; KNL-NEXT: movb $-1, %al
1106 ; KNL-NEXT: LBB23_34:
1107 ; KNL-NEXT: vpinsrb $4, %esi, %xmm2, %xmm2
1108 ; KNL-NEXT: bextrl %r11d, %edi, %edx
1109 ; KNL-NEXT: movl $278, %r11d ## imm = 0x116
1110 ; KNL-NEXT: vpinsrb $2, %r10d, %xmm3, %xmm3
1111 ; KNL-NEXT: movzbl %r9b, %esi
1112 ; KNL-NEXT: movzbl %al, %eax
1113 ; KNL-NEXT: movq %r15, %rcx
1114 ; KNL-NEXT: shlq $63, %rcx
1115 ; KNL-NEXT: sarq $63, %rcx
1116 ; KNL-NEXT: vmovd %ecx, %xmm4
1117 ; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
1118 ; KNL-NEXT: movb %r15b, %al
1119 ; KNL-NEXT: shrb $2, %al
1120 ; KNL-NEXT: andb $1, %al
1121 ; KNL-NEXT: je LBB23_36
1122 ; KNL-NEXT: ## BB#35:
1123 ; KNL-NEXT: movb $-1, %al
1124 ; KNL-NEXT: LBB23_36:
1125 ; KNL-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2
1126 ; KNL-NEXT: bextrl %r11d, %edi, %edx
1127 ; KNL-NEXT: movl $279, %r9d ## imm = 0x117
1128 ; KNL-NEXT: vpinsrb $3, %esi, %xmm3, %xmm3
1129 ; KNL-NEXT: movzbl %bl, %ebx
1130 ; KNL-NEXT: movzbl %al, %eax
1131 ; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
1132 ; KNL-NEXT: movb %r15b, %al
1133 ; KNL-NEXT: shrb $3, %al
1134 ; KNL-NEXT: andb $1, %al
1135 ; KNL-NEXT: je LBB23_38
1136 ; KNL-NEXT: ## BB#37:
1137 ; KNL-NEXT: movb $-1, %al
1138 ; KNL-NEXT: LBB23_38:
1139 ; KNL-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
1140 ; KNL-NEXT: bextrl %r9d, %edi, %edx
1141 ; KNL-NEXT: movl $280, %esi ## imm = 0x118
1142 ; KNL-NEXT: vpinsrb $4, %ebx, %xmm3, %xmm3
1143 ; KNL-NEXT: movzbl %r12b, %ebx
1144 ; KNL-NEXT: movzbl %al, %eax
1145 ; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
1146 ; KNL-NEXT: movb %r15b, %al
1147 ; KNL-NEXT: shrb $4, %al
1148 ; KNL-NEXT: andb $1, %al
1149 ; KNL-NEXT: je LBB23_40
1150 ; KNL-NEXT: ## BB#39:
1151 ; KNL-NEXT: movb $-1, %al
1152 ; KNL-NEXT: LBB23_40:
1153 ; KNL-NEXT: vpinsrb $7, %edx, %xmm2, %xmm2
1154 ; KNL-NEXT: bextrl %esi, %edi, %ecx
1155 ; KNL-NEXT: movl $281, %edx ## imm = 0x119
1156 ; KNL-NEXT: vpinsrb $5, %ebx, %xmm3, %xmm3
1157 ; KNL-NEXT: movzbl %r14b, %esi
1158 ; KNL-NEXT: movzbl %al, %eax
1159 ; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
1160 ; KNL-NEXT: movb %r15b, %al
1161 ; KNL-NEXT: shrb $5, %al
1162 ; KNL-NEXT: andb $1, %al
1163 ; KNL-NEXT: je LBB23_42
1164 ; KNL-NEXT: ## BB#41:
1165 ; KNL-NEXT: movb $-1, %al
1166 ; KNL-NEXT: LBB23_42:
1167 ; KNL-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2
1168 ; KNL-NEXT: bextrl %edx, %edi, %ecx
1169 ; KNL-NEXT: movl $282, %edx ## imm = 0x11A
1170 ; KNL-NEXT: vpinsrb $6, %esi, %xmm3, %xmm3
1171 ; KNL-NEXT: movzbl %r8b, %esi
1172 ; KNL-NEXT: movzbl %al, %eax
1173 ; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
1174 ; KNL-NEXT: movb %r15b, %bl
1175 ; KNL-NEXT: shrb $6, %bl
1176 ; KNL-NEXT: andb $1, %bl
1177 ; KNL-NEXT: je LBB23_44
1178 ; KNL-NEXT: ## BB#43:
1179 ; KNL-NEXT: movb $-1, %bl
1180 ; KNL-NEXT: LBB23_44:
1181 ; KNL-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2
1182 ; KNL-NEXT: bextrl %edx, %edi, %eax
1183 ; KNL-NEXT: movl $283, %ecx ## imm = 0x11B
1184 ; KNL-NEXT: vpinsrb $7, %esi, %xmm3, %xmm3
1185 ; KNL-NEXT: movzbl %r13b, %esi
1186 ; KNL-NEXT: movzbl %bl, %edx
1187 ; KNL-NEXT: vpinsrb $6, %edx, %xmm4, %xmm4
1188 ; KNL-NEXT: movb %r15b, %bl
1189 ; KNL-NEXT: shrb $7, %bl
1190 ; KNL-NEXT: je LBB23_46
1191 ; KNL-NEXT: ## BB#45:
1192 ; KNL-NEXT: movb $-1, %bl
1193 ; KNL-NEXT: LBB23_46:
1194 ; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
1195 ; KNL-NEXT: bextrl %ecx, %edi, %ecx
1196 ; KNL-NEXT: movl $284, %edx ## imm = 0x11C
1197 ; KNL-NEXT: vpinsrb $8, %esi, %xmm3, %xmm3
1198 ; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rax ## 8-byte Reload
1199 ; KNL-NEXT: movzbl %al, %esi
1200 ; KNL-NEXT: movzbl %bl, %eax
1201 ; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
1202 ; KNL-NEXT: movq %r15, %rax
1203 ; KNL-NEXT: shrq $8, %rax
1204 ; KNL-NEXT: andb $1, %al
1205 ; KNL-NEXT: je LBB23_48
1206 ; KNL-NEXT: ## BB#47:
1207 ; KNL-NEXT: movb $-1, %al
1208 ; KNL-NEXT: LBB23_48:
1209 ; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
1210 ; KNL-NEXT: bextrl %edx, %edi, %ecx
1211 ; KNL-NEXT: movl $285, %edx ## imm = 0x11D
1212 ; KNL-NEXT: vpinsrb $9, %esi, %xmm3, %xmm3
1213 ; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload
1214 ; KNL-NEXT: movzbl %sil, %esi
1215 ; KNL-NEXT: movzbl %al, %eax
1216 ; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
1217 ; KNL-NEXT: movq %r15, %rax
1218 ; KNL-NEXT: shrq $9, %rax
1219 ; KNL-NEXT: andb $1, %al
1220 ; KNL-NEXT: je LBB23_50
1221 ; KNL-NEXT: ## BB#49:
1222 ; KNL-NEXT: movb $-1, %al
1223 ; KNL-NEXT: LBB23_50:
1224 ; KNL-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2
1225 ; KNL-NEXT: bextrl %edx, %edi, %ecx
1226 ; KNL-NEXT: movl $286, %edx ## imm = 0x11E
1227 ; KNL-NEXT: vpinsrb $10, %esi, %xmm3, %xmm3
1228 ; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload
1229 ; KNL-NEXT: movzbl %sil, %esi
1230 ; KNL-NEXT: movzbl %al, %eax
1231 ; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
1232 ; KNL-NEXT: movq %r15, %rax
1233 ; KNL-NEXT: shrq $10, %rax
1234 ; KNL-NEXT: andb $1, %al
1235 ; KNL-NEXT: je LBB23_52
1236 ; KNL-NEXT: ## BB#51:
1237 ; KNL-NEXT: movb $-1, %al
1238 ; KNL-NEXT: LBB23_52:
1239 ; KNL-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2
1240 ; KNL-NEXT: bextrl %edx, %edi, %edx
1241 ; KNL-NEXT: vpinsrb $11, %esi, %xmm3, %xmm3
1242 ; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
1243 ; KNL-NEXT: movzbl %cl, %ecx
1244 ; KNL-NEXT: movzbl %al, %eax
1245 ; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
1246 ; KNL-NEXT: movq %r15, %rax
1247 ; KNL-NEXT: shrq $11, %rax
1248 ; KNL-NEXT: andb $1, %al
1249 ; KNL-NEXT: je LBB23_54
1250 ; KNL-NEXT: ## BB#53:
1251 ; KNL-NEXT: movb $-1, %al
1252 ; KNL-NEXT: LBB23_54:
1253 ; KNL-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2
1254 ; KNL-NEXT: shrl $31, %edi
1255 ; KNL-NEXT: vpinsrb $12, %ecx, %xmm3, %xmm3
1256 ; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
1257 ; KNL-NEXT: movzbl %cl, %ecx
1258 ; KNL-NEXT: movzbl %al, %eax
1259 ; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
1260 ; KNL-NEXT: movq %r15, %rax
1261 ; KNL-NEXT: shrq $12, %rax
1262 ; KNL-NEXT: andb $1, %al
1263 ; KNL-NEXT: je LBB23_56
1264 ; KNL-NEXT: ## BB#55:
1265 ; KNL-NEXT: movb $-1, %al
1266 ; KNL-NEXT: LBB23_56:
1267 ; KNL-NEXT: vpinsrb $15, %edi, %xmm2, %xmm2
1268 ; KNL-NEXT: vpinsrb $13, %ecx, %xmm3, %xmm3
1269 ; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
1270 ; KNL-NEXT: movzbl %cl, %ecx
1271 ; KNL-NEXT: movzbl %al, %eax
1272 ; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
1273 ; KNL-NEXT: movq %r15, %rax
1274 ; KNL-NEXT: shrq $13, %rax
1275 ; KNL-NEXT: andb $1, %al
1276 ; KNL-NEXT: je LBB23_58
1277 ; KNL-NEXT: ## BB#57:
1278 ; KNL-NEXT: movb $-1, %al
1279 ; KNL-NEXT: LBB23_58:
1280 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
1281 ; KNL-NEXT: vpinsrb $14, %ecx, %xmm3, %xmm2
1282 ; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
1283 ; KNL-NEXT: movzbl %cl, %ecx
1284 ; KNL-NEXT: movzbl %al, %eax
1285 ; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm3
1286 ; KNL-NEXT: movq %r15, %rax
1287 ; KNL-NEXT: shrq $14, %rax
1288 ; KNL-NEXT: andb $1, %al
1289 ; KNL-NEXT: je LBB23_60
1290 ; KNL-NEXT: ## BB#59:
1291 ; KNL-NEXT: movb $-1, %al
1292 ; KNL-NEXT: LBB23_60:
1293 ; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
1294 ; KNL-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm1
1295 ; KNL-NEXT: movzbl %al, %eax
1296 ; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm2
1297 ; KNL-NEXT: shrq $15, %r15
1298 ; KNL-NEXT: andb $1, %r15b
1299 ; KNL-NEXT: je LBB23_62
1300 ; KNL-NEXT: ## BB#61:
1301 ; KNL-NEXT: movb $-1, %r15b
1302 ; KNL-NEXT: LBB23_62:
1303 ; KNL-NEXT: movzbl %r15b, %eax
1304 ; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
1305 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
1306 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
1307 ; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
1308 ; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
1309 ; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
1310 ; KNL-NEXT: leaq -40(%rbp), %rsp
1311 ; KNL-NEXT: popq %rbx
1312 ; KNL-NEXT: popq %r12
1313 ; KNL-NEXT: popq %r13
1314 ; KNL-NEXT: popq %r14
1315 ; KNL-NEXT: popq %r15
1316 ; KNL-NEXT: popq %rbp
1319 ; SKX-LABEL: test17:
1321 ; SKX-NEXT: kmovq %rdi, %k0
1322 ; SKX-NEXT: cmpl %edx, %esi
1323 ; SKX-NEXT: setg %al
1324 ; SKX-NEXT: andl $1, %eax
1325 ; SKX-NEXT: kmovw %eax, %k1
1326 ; SKX-NEXT: kshiftlq $5, %k1, %k1
1327 ; SKX-NEXT: korq %k1, %k0, %k0
1328 ; SKX-NEXT: vpmovm2b %k0, %zmm0
1330 %a = bitcast i64 %x to <64 x i1>
1331 %b = icmp sgt i32 %y, %z
1332 %c = insertelement <64 x i1>%a, i1 %b, i32 5
1333 %d = sext <64 x i1>%c to <64 x i8>
1337 define <8 x i1> @test18(i8 %a, i16 %y) {
1338 ; KNL-LABEL: test18:
1340 ; KNL-NEXT: movzbl %dil, %eax
1341 ; KNL-NEXT: kmovw %eax, %k0
1342 ; KNL-NEXT: kmovw %esi, %k1
1343 ; KNL-NEXT: kshiftlw $7, %k1, %k2
1344 ; KNL-NEXT: kshiftrw $15, %k2, %k2
1345 ; KNL-NEXT: kshiftlw $6, %k1, %k1
1346 ; KNL-NEXT: kshiftrw $15, %k1, %k1
1347 ; KNL-NEXT: kshiftlw $6, %k1, %k1
1348 ; KNL-NEXT: korw %k1, %k0, %k0
1349 ; KNL-NEXT: kshiftlw $7, %k2, %k1
1350 ; KNL-NEXT: korw %k1, %k0, %k1
1351 ; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
1352 ; KNL-NEXT: vpmovqw %zmm0, %xmm0
1355 ; SKX-LABEL: test18:
1357 ; SKX-NEXT: kmovb %edi, %k0
1358 ; SKX-NEXT: kmovw %esi, %k1
1359 ; SKX-NEXT: kshiftlw $6, %k1, %k2
1360 ; SKX-NEXT: kshiftrw $15, %k2, %k2
1361 ; SKX-NEXT: kshiftlw $7, %k1, %k1
1362 ; SKX-NEXT: kshiftrw $15, %k1, %k1
1363 ; SKX-NEXT: kshiftlb $7, %k1, %k1
1364 ; SKX-NEXT: kshiftlb $6, %k2, %k2
1365 ; SKX-NEXT: korb %k2, %k0, %k0
1366 ; SKX-NEXT: korb %k1, %k0, %k0
1367 ; SKX-NEXT: vpmovm2w %k0, %xmm0
1369 %b = bitcast i8 %a to <8 x i1>
1370 %b1 = bitcast i16 %y to <16 x i1>
1371 %el1 = extractelement <16 x i1>%b1, i32 8
1372 %el2 = extractelement <16 x i1>%b1, i32 9
1373 %c = insertelement <8 x i1>%b, i1 %el1, i32 7
1374 %d = insertelement <8 x i1>%c, i1 %el2, i32 6
1377 define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
1378 ; KNL-LABEL: test21:
1380 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
1381 ; KNL-NEXT: vpsllw $15, %ymm3, %ymm3
1382 ; KNL-NEXT: vpsraw $15, %ymm3, %ymm3
1383 ; KNL-NEXT: vpand %ymm0, %ymm3, %ymm0
1384 ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
1385 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
1386 ; KNL-NEXT: vpsllw $15, %ymm2, %ymm2
1387 ; KNL-NEXT: vpsraw $15, %ymm2, %ymm2
1388 ; KNL-NEXT: vpand %ymm1, %ymm2, %ymm1
1391 ; SKX-LABEL: test21:
1393 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
1394 ; SKX-NEXT: vpmovb2m %ymm1, %k1
1395 ; SKX-NEXT: vpxord %zmm1, %zmm1, %zmm1
1396 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
1397 ; SKX-NEXT: vmovaps %zmm1, %zmm0
1399 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
1403 define void @test22(<4 x i1> %a, <4 x i1>* %addr) {
1404 ; KNL-LABEL: test22:
1406 ; KNL-NEXT: vpextrd $3, %xmm0, %eax
1407 ; KNL-NEXT: andl $1, %eax
1408 ; KNL-NEXT: movb %al, (%rdi)
1409 ; KNL-NEXT: vpextrd $2, %xmm0, %eax
1410 ; KNL-NEXT: andl $1, %eax
1411 ; KNL-NEXT: movb %al, (%rdi)
1412 ; KNL-NEXT: vpextrd $1, %xmm0, %eax
1413 ; KNL-NEXT: andl $1, %eax
1414 ; KNL-NEXT: movb %al, (%rdi)
1415 ; KNL-NEXT: vmovd %xmm0, %eax
1416 ; KNL-NEXT: andl $1, %eax
1417 ; KNL-NEXT: movb %al, (%rdi)
1420 ; SKX-LABEL: test22:
1422 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
1423 ; SKX-NEXT: vpmovd2m %xmm0, %k0
1424 ; SKX-NEXT: kmovb %k0, (%rdi)
1426 store <4 x i1> %a, <4 x i1>* %addr
1430 define void @test23(<2 x i1> %a, <2 x i1>* %addr) {
1431 ; KNL-LABEL: test23:
1433 ; KNL-NEXT: vpextrq $1, %xmm0, %rax
1434 ; KNL-NEXT: andl $1, %eax
1435 ; KNL-NEXT: movb %al, (%rdi)
1436 ; KNL-NEXT: vmovq %xmm0, %rax
1437 ; KNL-NEXT: andl $1, %eax
1438 ; KNL-NEXT: movb %al, (%rdi)
1441 ; SKX-LABEL: test23:
1443 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
1444 ; SKX-NEXT: vpmovq2m %xmm0, %k0
1445 ; SKX-NEXT: kmovb %k0, (%rdi)
1447 store <2 x i1> %a, <2 x i1>* %addr