1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
4 define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
7 ; KNL-NEXT: vcmpleps %zmm1, %zmm0, %k1
8 ; KNL-NEXT: vmovaps %zmm0, %zmm1 {%k1}
9 ; KNL-NEXT: vmovaps %zmm1, %zmm0
11 %mask = fcmp ole <16 x float> %x, %y
12 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
16 define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
19 ; KNL-NEXT: vcmplepd %zmm1, %zmm0, %k1
20 ; KNL-NEXT: vmovapd %zmm0, %zmm1 {%k1}
21 ; KNL-NEXT: vmovaps %zmm1, %zmm0
23 %mask = fcmp ole <8 x double> %x, %y
24 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
28 define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind {
31 ; KNL-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
32 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
33 ; KNL-NEXT: vmovaps %zmm1, %zmm0
35 %y = load <16 x i32>, <16 x i32>* %yp, align 4
36 %mask = icmp eq <16 x i32> %x, %y
37 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
41 define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
42 ; KNL-LABEL: test4_unsigned:
44 ; KNL-NEXT: vpcmpnltud %zmm1, %zmm0, %k1
45 ; KNL-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
46 ; KNL-NEXT: vmovaps %zmm1, %zmm0
48 %mask = icmp uge <16 x i32> %x, %y
49 %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
53 define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
56 ; KNL-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
57 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
58 ; KNL-NEXT: vmovaps %zmm1, %zmm0
60 %mask = icmp eq <8 x i64> %x, %y
61 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
65 define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
66 ; KNL-LABEL: test6_unsigned:
68 ; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
69 ; KNL-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1}
70 ; KNL-NEXT: vmovaps %zmm1, %zmm0
72 %mask = icmp ugt <8 x i64> %x, %y
73 %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
77 define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
80 ; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2
81 ; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
82 ; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
86 ; SKX: vxorps %xmm2, %xmm2, %xmm2
87 ; SKX: vcmpltps %xmm2, %xmm0, %k1
88 ; SKX: vmovaps %xmm0, %xmm1 {%k1}
89 ; SKX: vmovaps %zmm1, %zmm0
92 %mask = fcmp olt <4 x float> %a, zeroinitializer
93 %c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
97 define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
100 ; KNL-NEXT: vxorpd %xmm2, %xmm2, %xmm2
101 ; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2
102 ; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
106 ; SKX: vxorpd %xmm2, %xmm2, %xmm2
107 ; SKX: vcmpltpd %xmm2, %xmm0, %k1
108 ; SKX: vmovapd %xmm0, %xmm1 {%k1}
109 ; SKX: vmovaps %zmm1, %zmm0
111 %mask = fcmp olt <2 x double> %a, zeroinitializer
112 %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
116 define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
119 ; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
120 ; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
122 %mask = icmp eq <8 x i32> %x, %y
123 %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
127 define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
130 ; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1
131 ; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
135 ; SKX: vcmpeqps %ymm1, %ymm0, %k1
136 ; SKX: vmovaps %ymm0, %ymm1 {%k1}
137 ; SKX: vmovaps %zmm1, %zmm0
140 %mask = fcmp oeq <8 x float> %x, %y
141 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
145 define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
146 ; KNL-LABEL: test11_unsigned:
148 ; KNL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
150 %mask = icmp ugt <8 x i32> %x, %y
151 %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
156 define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
159 ; KNL-NEXT: vpcmpeqq %zmm2, %zmm0, %k0
160 ; KNL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1
161 ; KNL-NEXT: kunpckbw %k0, %k1, %k0
162 ; KNL-NEXT: kmovw %k0, %eax
164 %res = icmp eq <16 x i64> %a, %b
165 %res1 = bitcast <16 x i1> %res to i16
169 define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
172 ; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1
173 ; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
176 %cmpvector_i = fcmp oeq <16 x float> %a, %b
177 %conv = zext <16 x i1> %cmpvector_i to <16 x i32>
181 define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
184 ; KNL-NEXT: vpsubd %zmm1, %zmm0, %zmm1
185 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
186 ; KNL-NEXT: knotw %k0, %k0
187 ; KNL-NEXT: knotw %k0, %k1
188 ; KNL-NEXT: vmovdqu32 %zmm1, %zmm0 {%k1} {z}
190 %sub_r = sub <16 x i32> %a, %b
191 %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
192 %sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
193 %mask = icmp eq <16 x i32> %sext.i3.i, zeroinitializer
194 %res = select <16 x i1> %mask, <16 x i32> zeroinitializer, <16 x i32> %sub_r
198 define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
201 ; KNL-NEXT: vpsubq %zmm1, %zmm0, %zmm1
202 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
203 ; KNL-NEXT: knotw %k0, %k0
204 ; KNL-NEXT: knotw %k0, %k1
205 ; KNL-NEXT: vmovdqu64 %zmm1, %zmm0 {%k1} {z}
207 %sub_r = sub <8 x i64> %a, %b
208 %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
209 %sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
210 %mask = icmp eq <8 x i64> %sext.i3.i, zeroinitializer
211 %res = select <8 x i1> %mask, <8 x i64> zeroinitializer, <8 x i64> %sub_r
215 define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
218 ; KNL-NEXT: vpcmpled %zmm0, %zmm1, %k1
219 ; KNL-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
220 ; KNL-NEXT: vmovaps %zmm1, %zmm0
222 %mask = icmp sge <16 x i32> %x, %y
223 %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
227 define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
230 ; KNL-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
231 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
232 ; KNL-NEXT: vmovaps %zmm1, %zmm0
234 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
235 %mask = icmp sgt <16 x i32> %x, %y
236 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
240 define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
243 ; KNL-NEXT: vpcmpled (%rdi), %zmm0, %k1
244 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
245 ; KNL-NEXT: vmovaps %zmm1, %zmm0
247 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
248 %mask = icmp sle <16 x i32> %x, %y
249 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
253 define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
256 ; KNL-NEXT: vpcmpleud (%rdi), %zmm0, %k1
257 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
258 ; KNL-NEXT: vmovaps %zmm1, %zmm0
260 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
261 %mask = icmp ule <16 x i32> %x, %y
262 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
266 define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
269 ; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
270 ; KNL-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
271 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
272 ; KNL-NEXT: vmovaps %zmm1, %zmm0
274 %mask1 = icmp eq <16 x i32> %x1, %y1
275 %mask0 = icmp eq <16 x i32> %x, %y
276 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
277 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
281 define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
284 ; KNL-NEXT: vpcmpleq %zmm1, %zmm0, %k1
285 ; KNL-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1}
286 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
287 ; KNL-NEXT: vmovaps %zmm2, %zmm0
289 %mask1 = icmp sge <8 x i64> %x1, %y1
290 %mask0 = icmp sle <8 x i64> %x, %y
291 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
292 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
296 define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
299 ; KNL-NEXT: vpcmpgtq %zmm2, %zmm1, %k1
300 ; KNL-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
301 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
302 ; KNL-NEXT: vmovaps %zmm1, %zmm0
304 %mask1 = icmp sgt <8 x i64> %x1, %y1
305 %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
306 %mask0 = icmp sgt <8 x i64> %x, %y
307 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
308 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
312 define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
315 ; KNL-NEXT: vpcmpled %zmm1, %zmm2, %k1
316 ; KNL-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1}
317 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
318 ; KNL-NEXT: vmovaps %zmm1, %zmm0
320 %mask1 = icmp sge <16 x i32> %x1, %y1
321 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
322 %mask0 = icmp ule <16 x i32> %x, %y
323 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
324 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
328 define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
331 ; KNL-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
332 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
333 ; KNL-NEXT: vmovaps %zmm1, %zmm0
335 %yb = load i64, i64* %yb.ptr, align 4
336 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
337 %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
338 %mask = icmp eq <8 x i64> %x, %y
339 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
343 define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind {
346 ; KNL-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
347 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
348 ; KNL-NEXT: vmovaps %zmm1, %zmm0
350 %yb = load i32, i32* %yb.ptr, align 4
351 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
352 %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
353 %mask = icmp sle <16 x i32> %x, %y
354 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
358 define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
361 ; KNL-NEXT: vpcmpled %zmm1, %zmm2, %k1
362 ; KNL-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
363 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
364 ; KNL-NEXT: vmovaps %zmm1, %zmm0
366 %mask1 = icmp sge <16 x i32> %x1, %y1
367 %yb = load i32, i32* %yb.ptr, align 4
368 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
369 %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
370 %mask0 = icmp sgt <16 x i32> %x, %y
371 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
372 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
376 define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
379 ; KNL-NEXT: vpcmpleq %zmm1, %zmm2, %k1
380 ; KNL-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
381 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
382 ; KNL-NEXT: vmovaps %zmm1, %zmm0
384 %mask1 = icmp sge <8 x i64> %x1, %y1
385 %yb = load i64, i64* %yb.ptr, align 4
386 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
387 %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
388 %mask0 = icmp sle <8 x i64> %x, %y
389 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
390 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
398 define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) {
399 %x_gt_y = icmp sgt <8 x i64> %x, %y
400 %x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1
401 %res = icmp eq <8 x i1>%x_gt_y, %x1_gt_y1
402 %resse = sext <8 x i1>%res to <8 x i32>
410 define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) {
411 %x_gt_y = icmp sgt <16 x i32> %x, %y
412 %x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1
413 %res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1
414 %resse = sext <16 x i1>%res to <16 x i8>
418 define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
420 ; SKX: vcmpeqpd %ymm1, %ymm0, %k1
421 ; SKX: vmovapd %ymm0, %ymm1 {%k1}
423 %mask = fcmp oeq <4 x double> %x, %y
424 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
425 ret <4 x double> %max
428 define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind {
430 ; SKX: vcmpltpd (%rdi), %xmm0, %k1
431 ; SKX: vmovapd %xmm0, %xmm1 {%k1}
433 %y = load <2 x double>, <2 x double>* %yp, align 4
434 %mask = fcmp olt <2 x double> %x, %y
435 %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
436 ret <2 x double> %max
439 define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind {
441 ; SKX: vcmpltpd (%rdi), %ymm0, %k1
442 ; SKX: vmovapd %ymm0, %ymm1 {%k1}
444 %y = load <4 x double>, <4 x double>* %yp, align 4
445 %mask = fcmp ogt <4 x double> %y, %x
446 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
447 ret <4 x double> %max
450 define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind {
452 ; SKX: vcmpltpd (%rdi), %zmm0, %k1
453 ; SKX: vmovapd %zmm0, %zmm1 {%k1}
454 %y = load <8 x double>, <8 x double>* %yp, align 4
455 %mask = fcmp olt <8 x double> %x, %y
456 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
457 ret <8 x double> %max
460 define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind {
462 ; SKX: vcmpltps (%rdi), %xmm0, %k1
463 ; SKX: vmovaps %xmm0, %xmm1 {%k1}
464 %y = load <4 x float>, <4 x float>* %yp, align 4
465 %mask = fcmp olt <4 x float> %x, %y
466 %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
470 define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind {
472 ; SKX: vcmpltps (%rdi), %ymm0, %k1
473 ; SKX: vmovaps %ymm0, %ymm1 {%k1}
475 %y = load <8 x float>, <8 x float>* %yp, align 4
476 %mask = fcmp ogt <8 x float> %y, %x
477 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
481 define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind {
483 ; SKX: vcmpltps (%rdi), %zmm0, %k1
484 ; SKX: vmovaps %zmm0, %zmm1 {%k1}
485 %y = load <16 x float>, <16 x float>* %yp, align 4
486 %mask = fcmp olt <16 x float> %x, %y
487 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
488 ret <16 x float> %max
491 define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind {
493 ; SKX: vcmpltpd (%rdi){1to8}, %zmm0, %k1
494 ; SKX: vmovapd %zmm0, %zmm1 {%k1}
496 %a = load double, double* %ptr
497 %v = insertelement <8 x double> undef, double %a, i32 0
498 %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
500 %mask = fcmp ogt <8 x double> %shuffle, %x
501 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
502 ret <8 x double> %max
505 define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind {
507 ; SKX: vcmpltpd (%rdi){1to4}, %ymm0, %k1
508 ; SKX: vmovapd %ymm0, %ymm1 {%k1}
510 %a = load double, double* %ptr
511 %v = insertelement <4 x double> undef, double %a, i32 0
512 %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer
514 %mask = fcmp ogt <4 x double> %shuffle, %x
515 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
516 ret <4 x double> %max
519 define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind {
521 ; SKX: vcmpltpd (%rdi){1to2}, %xmm0, %k1
522 ; SKX: vmovapd %xmm0, %xmm1 {%k1}
524 %a = load double, double* %ptr
525 %v = insertelement <2 x double> undef, double %a, i32 0
526 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
528 %mask = fcmp ogt <2 x double> %shuffle, %x
529 %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
530 ret <2 x double> %max
534 define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) nounwind {
536 ; SKX: vcmpltps (%rdi){1to16}, %zmm0, %k1
537 ; SKX: vmovaps %zmm0, %zmm1 {%k1}
539 %a = load float, float* %ptr
540 %v = insertelement <16 x float> undef, float %a, i32 0
541 %shuffle = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
543 %mask = fcmp ogt <16 x float> %shuffle, %x
544 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
545 ret <16 x float> %max
548 define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) nounwind {
550 ; SKX: vcmpltps (%rdi){1to8}, %ymm0, %k1
551 ; SKX: vmovaps %ymm0, %ymm1 {%k1}
553 %a = load float, float* %ptr
554 %v = insertelement <8 x float> undef, float %a, i32 0
555 %shuffle = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
557 %mask = fcmp ogt <8 x float> %shuffle, %x
558 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
562 define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) nounwind {
564 ; SKX: vcmpltps (%rdi){1to4}, %xmm0, %k1
565 ; SKX: vmovaps %xmm0, %xmm1 {%k1}
567 %a = load float, float* %ptr
568 %v = insertelement <4 x float> undef, float %a, i32 0
569 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
571 %mask = fcmp ogt <4 x float> %shuffle, %x
572 %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
576 define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind {
578 ; SKX: vpmovw2m %xmm2, %k1
579 ; SKX: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
580 ; SKX: vmovapd %zmm0, %zmm1 {%k1}
582 %a = load double, double* %ptr
583 %v = insertelement <8 x double> undef, double %a, i32 0
584 %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
586 %mask_cmp = fcmp ogt <8 x double> %shuffle, %x
587 %mask = and <8 x i1> %mask_cmp, %mask_in
588 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
589 ret <8 x double> %max