1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
7 define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
8 %mask = fcmp ole <16 x float> %x, %y
9 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
17 define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
18 %mask = fcmp ole <8 x double> %x, %y
19 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
24 ; CHECK: vpcmpeqd (%rdi)
27 define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind {
28 %y = load <16 x i32>* %yp, align 4
29 %mask = icmp eq <16 x i32> %x, %y
30 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
34 ; CHECK-LABEL: @test4_unsigned
38 define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y) nounwind {
39 %mask = icmp uge <16 x i32> %x, %y
40 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
45 ; CHECK: vpcmpeqq {{.*}}%k1
46 ; CHECK: vmovdqa64 {{.*}}%k1
48 define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
49 %mask = icmp eq <8 x i64> %x, %y
50 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
54 ; CHECK-LABEL: test6_unsigned
55 ; CHECK: vpcmpnleuq {{.*}}%k1
56 ; CHECK: vmovdqa64 {{.*}}%k1
58 define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y) nounwind {
59 %mask = icmp ugt <8 x i64> %x, %y
60 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
69 define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
70 %mask = fcmp olt <4 x float> %a, zeroinitializer
71 %c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
80 define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
81 %mask = fcmp olt <2 x double> %a, zeroinitializer
82 %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
90 define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
91 %mask = icmp eq <8 x i32> %x, %y
92 %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
100 define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
101 %mask = fcmp oeq <8 x float> %x, %y
102 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
106 ; CHECK-LABEL: test11_unsigned
109 define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
110 %mask = icmp ugt <8 x i32> %x, %y
111 %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
115 ; CHECK-LABEL: test12
116 ; CHECK: vpcmpeqq %zmm2, %zmm0, [[LO:%k[0-7]]]
117 ; CHECK: vpcmpeqq %zmm3, %zmm1, [[HI:%k[0-7]]]
118 ; CHECK: kunpckbw [[LO]], [[HI]], {{%k[0-7]}}
120 define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
121 %res = icmp eq <16 x i64> %a, %b
122 %res1 = bitcast <16 x i1> %res to i16
126 ; CHECK-LABEL: test13
127 ; CHECK: vcmpeqps %zmm
128 ; CHECK: vpbroadcastd
130 define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
132 %cmpvector_i = fcmp oeq <16 x float> %a, %b
133 %conv = zext <16 x i1> %cmpvector_i to <16 x i32>
137 ; CHECK-LABEL: test14
140 ; CHECK: vmovdqu32 {{.*}}{%k1} {z}
142 define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
143 %sub_r = sub <16 x i32> %a, %b
144 %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
145 %sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
146 %mask = icmp eq <16 x i32> %sext.i3.i, zeroinitializer
147 %res = select <16 x i1> %mask, <16 x i32> zeroinitializer, <16 x i32> %sub_r
151 ; CHECK-LABEL: test15
154 ; CHECK: vmovdqu64 {{.*}}{%k1} {z}
156 define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
157 %sub_r = sub <8 x i64> %a, %b
158 %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
159 %sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
160 %mask = icmp eq <8 x i64> %sext.i3.i, zeroinitializer
161 %res = select <8 x i1> %mask, <8 x i64> zeroinitializer, <8 x i64> %sub_r
165 ; CHECK-LABEL: @test16
169 define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y) nounwind {
170 %mask = icmp sge <16 x i32> %x, %y
171 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
175 ; CHECK-LABEL: @test17
176 ; CHECK: vpcmpgtd (%rdi)
179 define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
180 %y = load <16 x i32>* %y.ptr, align 4
181 %mask = icmp sgt <16 x i32> %x, %y
182 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
186 ; CHECK-LABEL: @test18
187 ; CHECK: vpcmpled (%rdi)
190 define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
191 %y = load <16 x i32>* %y.ptr, align 4
192 %mask = icmp sle <16 x i32> %x, %y
193 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
197 ; CHECK-LABEL: @test19
198 ; CHECK: vpcmpleud (%rdi)
201 define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
202 %y = load <16 x i32>* %y.ptr, align 4
203 %mask = icmp ule <16 x i32> %x, %y
204 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
208 ; CHECK-LABEL: @test20
209 ; CHECK: vpcmpeqd %zmm{{.*{%k[1-7]}}}
212 define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
213 %mask1 = icmp eq <16 x i32> %x1, %y1
214 %mask0 = icmp eq <16 x i32> %x, %y
215 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
216 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
220 ; CHECK-LABEL: @test21
221 ; CHECK: vpcmpleq %zmm{{.*{%k[1-7]}}}
224 define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
225 %mask1 = icmp sge <8 x i64> %x1, %y1
226 %mask0 = icmp sle <8 x i64> %x, %y
227 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
228 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
232 ; CHECK-LABEL: @test22
233 ; CHECK: vpcmpgtq (%rdi){{.*{%k[1-7]}}}
236 define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
237 %mask1 = icmp sgt <8 x i64> %x1, %y1
238 %y = load <8 x i64>* %y.ptr, align 4
239 %mask0 = icmp sgt <8 x i64> %x, %y
240 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
241 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
245 ; CHECK-LABEL: @test23
246 ; CHECK: vpcmpleud (%rdi){{.*{%k[1-7]}}}
249 define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
250 %mask1 = icmp sge <16 x i32> %x1, %y1
251 %y = load <16 x i32>* %y.ptr, align 4
252 %mask0 = icmp ule <16 x i32> %x, %y
253 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
254 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
258 ; CHECK-LABEL: test24
259 ; CHECK: vpcmpeqq (%rdi){1to8}
262 define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
263 %yb = load i64* %yb.ptr, align 4
264 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
265 %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
266 %mask = icmp eq <8 x i64> %x, %y
267 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
271 ; CHECK-LABEL: test25
272 ; CHECK: vpcmpled (%rdi){1to16}
275 define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind {
276 %yb = load i32* %yb.ptr, align 4
277 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
278 %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
279 %mask = icmp sle <16 x i32> %x, %y
280 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
284 ; CHECK-LABEL: test26
285 ; CHECK: vpcmpgtd (%rdi){1to16}{{.*{%k[1-7]}}}
288 define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
289 %mask1 = icmp sge <16 x i32> %x1, %y1
290 %yb = load i32* %yb.ptr, align 4
291 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
292 %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
293 %mask0 = icmp sgt <16 x i32> %x, %y
294 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
295 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
299 ; CHECK-LABEL: test27
300 ; CHECK: vpcmpleq (%rdi){1to8}{{.*{%k[1-7]}}}
303 define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
304 %mask1 = icmp sge <8 x i64> %x1, %y1
305 %yb = load i64* %yb.ptr, align 4
306 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
307 %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
308 %mask0 = icmp sle <8 x i64> %x, %y
309 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
310 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1