1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
3 define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
6 ; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1
7 ; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1}
8 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
10 %mask = fcmp ole <16 x float> %x, %y
11 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
15 define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
18 ; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1
19 ; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1}
20 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
22 %mask = fcmp ole <8 x double> %x, %y
23 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
27 define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind {
30 ; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
31 ; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
32 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
34 %y = load <16 x i32>, <16 x i32>* %yp, align 4
35 %mask = icmp eq <16 x i32> %x, %y
36 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
40 define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
41 ; CHECK-LABEL: test4_unsigned:
43 ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1
44 ; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
45 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
47 %mask = icmp uge <16 x i32> %x, %y
48 %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
52 define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
55 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
56 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
57 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
59 %mask = icmp eq <8 x i64> %x, %y
60 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
64 define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
65 ; CHECK-LABEL: test6_unsigned:
67 ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
68 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1}
69 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
71 %mask = icmp ugt <8 x i64> %x, %y
72 %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
76 define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
79 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
80 ; CHECK-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
81 ; CHECK-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
83 %mask = fcmp olt <4 x float> %a, zeroinitializer
84 %c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
88 define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
91 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
92 ; CHECK-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2
93 ; CHECK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
95 %mask = fcmp olt <2 x double> %a, zeroinitializer
96 %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
100 define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
101 ; CHECK-LABEL: test9:
103 ; CHECK-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
104 ; CHECK-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
105 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
106 ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
107 ; CHECK-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
109 %mask = icmp eq <8 x i32> %x, %y
110 %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
114 define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
115 ; CHECK-LABEL: test10:
117 ; CHECK-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
118 ; CHECK-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
119 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
120 ; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
121 ; CHECK-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
123 %mask = fcmp oeq <8 x float> %x, %y
124 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
128 define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
129 ; CHECK-LABEL: test11_unsigned:
131 ; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
133 %mask = icmp ugt <8 x i32> %x, %y
134 %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
139 define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
140 ; CHECK-LABEL: test12:
142 ; CHECK-NEXT: vpcmpeqq %zmm2, %zmm0, %k0
143 ; CHECK-NEXT: vpcmpeqq %zmm3, %zmm1, %k1
144 ; CHECK-NEXT: kunpckbw %k0, %k1, %k0
145 ; CHECK-NEXT: kmovw %k0, %eax
146 ; CHECK-NEXT: ## kill: AX<def> AX<kill> EAX<kill>
148 %res = icmp eq <16 x i64> %a, %b
149 %res1 = bitcast <16 x i1> %res to i16
153 define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
154 ; CHECK-LABEL: test13:
156 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
157 ; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
160 %cmpvector_i = fcmp oeq <16 x float> %a, %b
161 %conv = zext <16 x i1> %cmpvector_i to <16 x i32>
165 define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
166 ; CHECK-LABEL: test14:
168 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm1
169 ; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
170 ; CHECK-NEXT: knotw %k0, %k0
171 ; CHECK-NEXT: knotw %k0, %k1
172 ; CHECK-NEXT: vmovdqu32 %zmm1, %zmm0 {%k1} {z}
174 %sub_r = sub <16 x i32> %a, %b
175 %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
176 %sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
177 %mask = icmp eq <16 x i32> %sext.i3.i, zeroinitializer
178 %res = select <16 x i1> %mask, <16 x i32> zeroinitializer, <16 x i32> %sub_r
182 define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
183 ; CHECK-LABEL: test15:
185 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm1
186 ; CHECK-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
187 ; CHECK-NEXT: knotw %k0, %k0
188 ; CHECK-NEXT: knotw %k0, %k1
189 ; CHECK-NEXT: vmovdqu64 %zmm1, %zmm0 {%k1} {z}
191 %sub_r = sub <8 x i64> %a, %b
192 %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
193 %sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
194 %mask = icmp eq <8 x i64> %sext.i3.i, zeroinitializer
195 %res = select <8 x i1> %mask, <8 x i64> zeroinitializer, <8 x i64> %sub_r
199 define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
200 ; CHECK-LABEL: test16:
202 ; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k1
203 ; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
204 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
206 %mask = icmp sge <16 x i32> %x, %y
207 %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
211 define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
212 ; CHECK-LABEL: test17:
214 ; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
215 ; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
216 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
218 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
219 %mask = icmp sgt <16 x i32> %x, %y
220 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
224 define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
225 ; CHECK-LABEL: test18:
227 ; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1
228 ; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
229 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
231 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
232 %mask = icmp sle <16 x i32> %x, %y
233 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
237 define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
238 ; CHECK-LABEL: test19:
240 ; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1
241 ; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
242 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
244 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
245 %mask = icmp ule <16 x i32> %x, %y
246 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
250 define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
251 ; CHECK-LABEL: test20:
253 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
254 ; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
255 ; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
256 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
258 %mask1 = icmp eq <16 x i32> %x1, %y1
259 %mask0 = icmp eq <16 x i32> %x, %y
260 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
261 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
265 define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
266 ; CHECK-LABEL: test21:
268 ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k1
269 ; CHECK-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1}
270 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
271 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
273 %mask1 = icmp sge <8 x i64> %x1, %y1
274 %mask0 = icmp sle <8 x i64> %x, %y
275 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
276 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
280 define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
281 ; CHECK-LABEL: test22:
283 ; CHECK-NEXT: vpcmpgtq %zmm2, %zmm1, %k1
284 ; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
285 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
286 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
288 %mask1 = icmp sgt <8 x i64> %x1, %y1
289 %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
290 %mask0 = icmp sgt <8 x i64> %x, %y
291 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
292 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
296 define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
297 ; CHECK-LABEL: test23:
299 ; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1
300 ; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1}
301 ; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
302 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
304 %mask1 = icmp sge <16 x i32> %x1, %y1
305 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
306 %mask0 = icmp ule <16 x i32> %x, %y
307 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
308 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
312 define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
313 ; CHECK-LABEL: test24:
315 ; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
316 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
317 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
319 %yb = load i64, i64* %yb.ptr, align 4
320 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
321 %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
322 %mask = icmp eq <8 x i64> %x, %y
323 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
327 define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind {
328 ; CHECK-LABEL: test25:
330 ; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
331 ; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
332 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
334 %yb = load i32, i32* %yb.ptr, align 4
335 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
336 %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
337 %mask = icmp sle <16 x i32> %x, %y
338 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
342 define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
343 ; CHECK-LABEL: test26:
345 ; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1
346 ; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
347 ; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
348 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
350 %mask1 = icmp sge <16 x i32> %x1, %y1
351 %yb = load i32, i32* %yb.ptr, align 4
352 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
353 %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
354 %mask0 = icmp sgt <16 x i32> %x, %y
355 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
356 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
360 define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
361 ; CHECK-LABEL: test27:
363 ; CHECK-NEXT: vpcmpleq %zmm1, %zmm2, %k1
364 ; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
365 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
366 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
368 %mask1 = icmp sge <8 x i64> %x1, %y1
369 %yb = load i64, i64* %yb.ptr, align 4
370 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
371 %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
372 %mask0 = icmp sle <8 x i64> %x, %y
373 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
374 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1