1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl| FileCheck %s
5 ; CHECK-LABEL: vpaddq256_test
6 ; CHECK: vpaddq %ymm{{.*}}
8 define <4 x i64> @vpaddq256_test(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
9 %x = add <4 x i64> %i, %j
13 ; CHECK-LABEL: vpaddq256_fold_test
14 ; CHECK: vpaddq (%rdi), %ymm{{.*}}
16 define <4 x i64> @vpaddq256_fold_test(<4 x i64> %i, <4 x i64>* %j) nounwind {
17 %tmp = load <4 x i64>, <4 x i64>* %j, align 4
18 %x = add <4 x i64> %i, %tmp
22 ; CHECK-LABEL: vpaddq256_broadcast_test
23 ; CHECK: vpaddq LCP{{.*}}(%rip){1to4}, %ymm{{.*}}
25 define <4 x i64> @vpaddq256_broadcast_test(<4 x i64> %i) nounwind {
26 %x = add <4 x i64> %i, <i64 1, i64 1, i64 1, i64 1>
30 ; CHECK-LABEL: vpaddq256_broadcast2_test
31 ; CHECK: vpaddq (%rdi){1to4}, %ymm{{.*}}
33 define <4 x i64> @vpaddq256_broadcast2_test(<4 x i64> %i, i64* %j.ptr) nounwind {
34 %j = load i64, i64* %j.ptr
35 %j.0 = insertelement <4 x i64> undef, i64 %j, i32 0
36 %j.v = shufflevector <4 x i64> %j.0, <4 x i64> undef, <4 x i32> zeroinitializer
37 %x = add <4 x i64> %i, %j.v
41 ; CHECK-LABEL: vpaddd256_test
42 ; CHECK: vpaddd %ymm{{.*}}
44 define <8 x i32> @vpaddd256_test(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
45 %x = add <8 x i32> %i, %j
49 ; CHECK-LABEL: vpaddd256_fold_test
50 ; CHECK: vpaddd (%rdi), %ymm{{.*}}
52 define <8 x i32> @vpaddd256_fold_test(<8 x i32> %i, <8 x i32>* %j) nounwind {
53 %tmp = load <8 x i32>, <8 x i32>* %j, align 4
54 %x = add <8 x i32> %i, %tmp
58 ; CHECK-LABEL: vpaddd256_broadcast_test
59 ; CHECK: vpaddd LCP{{.*}}(%rip){1to8}, %ymm{{.*}}
61 define <8 x i32> @vpaddd256_broadcast_test(<8 x i32> %i) nounwind {
62 %x = add <8 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
66 ; CHECK-LABEL: vpaddd256_mask_test
67 ; CHECK: vpaddd %ymm{{.*%k[1-7].*}}
69 define <8 x i32> @vpaddd256_mask_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone {
70 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
71 %x = add <8 x i32> %i, %j
72 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i
76 ; CHECK-LABEL: vpaddd256_maskz_test
77 ; CHECK: vpaddd %ymm{{.*{%k[1-7]} {z}.*}}
79 define <8 x i32> @vpaddd256_maskz_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone {
80 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
81 %x = add <8 x i32> %i, %j
82 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
86 ; CHECK-LABEL: vpaddd256_mask_fold_test
87 ; CHECK: vpaddd (%rdi), %ymm{{.*%k[1-7]}}
89 define <8 x i32> @vpaddd256_mask_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone {
90 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
91 %j = load <8 x i32>, <8 x i32>* %j.ptr
92 %x = add <8 x i32> %i, %j
93 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i
97 ; CHECK-LABEL: vpaddd256_mask_broadcast_test
98 ; CHECK: vpaddd LCP{{.*}}(%rip){1to8}, %ymm{{.*{%k[1-7]}}}
100 define <8 x i32> @vpaddd256_mask_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone {
101 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
102 %x = add <8 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
103 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i
107 ; CHECK-LABEL: vpaddd256_maskz_fold_test
108 ; CHECK: vpaddd (%rdi), %ymm{{.*{%k[1-7]} {z}}}
110 define <8 x i32> @vpaddd256_maskz_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone {
111 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
112 %j = load <8 x i32>, <8 x i32>* %j.ptr
113 %x = add <8 x i32> %i, %j
114 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
118 ; CHECK-LABEL: vpaddd256_maskz_broadcast_test
119 ; CHECK: vpaddd LCP{{.*}}(%rip){1to8}, %ymm{{.*{%k[1-7]} {z}}}
121 define <8 x i32> @vpaddd256_maskz_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone {
122 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
123 %x = add <8 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
124 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
128 ; CHECK-LABEL: vpsubq256_test
129 ; CHECK: vpsubq %ymm{{.*}}
131 define <4 x i64> @vpsubq256_test(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
132 %x = sub <4 x i64> %i, %j
136 ; CHECK-LABEL: vpsubd256_test
137 ; CHECK: vpsubd %ymm{{.*}}
139 define <8 x i32> @vpsubd256_test(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
140 %x = sub <8 x i32> %i, %j
144 ; CHECK-LABEL: vpmulld256_test
145 ; CHECK: vpmulld %ymm{{.*}}
147 define <8 x i32> @vpmulld256_test(<8 x i32> %i, <8 x i32> %j) {
148 %x = mul <8 x i32> %i, %j
152 ; CHECK-LABEL: test_vaddpd_256
153 ; CHECK: vaddpd{{.*}}
155 define <4 x double> @test_vaddpd_256(<4 x double> %y, <4 x double> %x) {
157 %add.i = fadd <4 x double> %x, %y
158 ret <4 x double> %add.i
161 ; CHECK-LABEL: test_fold_vaddpd_256
162 ; CHECK: vaddpd LCP{{.*}}(%rip){{.*}}
164 define <4 x double> @test_fold_vaddpd_256(<4 x double> %y) {
166 %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 4.500000e+00, double 5.600000e+00>
167 ret <4 x double> %add.i
170 ; CHECK-LABEL: test_broadcast_vaddpd_256
171 ; CHECK: LCP{{.*}}(%rip){1to8}, %ymm0, %ymm0
173 define <8 x float> @test_broadcast_vaddpd_256(<8 x float> %a) nounwind {
174 %b = fadd <8 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
178 ; CHECK-LABEL: test_mask_vaddps_256
179 ; CHECK: vaddps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
181 define <8 x float> @test_mask_vaddps_256(<8 x float> %dst, <8 x float> %i,
182 <8 x float> %j, <8 x i32> %mask1)
184 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
185 %x = fadd <8 x float> %i, %j
186 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
190 ; CHECK-LABEL: test_mask_vmulps_256
191 ; CHECK: vmulps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
193 define <8 x float> @test_mask_vmulps_256(<8 x float> %dst, <8 x float> %i,
194 <8 x float> %j, <8 x i32> %mask1)
196 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
197 %x = fmul <8 x float> %i, %j
198 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
202 ; CHECK-LABEL: test_mask_vminps_256
203 ; CHECK: vminps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
205 define <8 x float> @test_mask_vminps_256(<8 x float> %dst, <8 x float> %i,
206 <8 x float> %j, <8 x i32> %mask1)
208 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
209 %cmp_res = fcmp olt <8 x float> %i, %j
210 %min = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j
211 %r = select <8 x i1> %mask, <8 x float> %min, <8 x float> %dst
215 ; CHECK-LABEL: test_mask_vmaxps_256
216 ; CHECK: vmaxps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
218 define <8 x float> @test_mask_vmaxps_256(<8 x float> %dst, <8 x float> %i,
219 <8 x float> %j, <8 x i32> %mask1)
221 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
222 %cmp_res = fcmp ogt <8 x float> %i, %j
223 %max = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j
224 %r = select <8 x i1> %mask, <8 x float> %max, <8 x float> %dst
228 ; CHECK-LABEL: test_mask_vsubps_256
229 ; CHECK: vsubps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
231 define <8 x float> @test_mask_vsubps_256(<8 x float> %dst, <8 x float> %i,
232 <8 x float> %j, <8 x i32> %mask1)
234 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
235 %x = fsub <8 x float> %i, %j
236 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
240 ; CHECK-LABEL: test_mask_vdivps_256
241 ; CHECK: vdivps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
243 define <8 x float> @test_mask_vdivps_256(<8 x float> %dst, <8 x float> %i,
244 <8 x float> %j, <8 x i32> %mask1)
246 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
247 %x = fdiv <8 x float> %i, %j
248 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
252 ; CHECK-LABEL: test_mask_vmulpd_256
253 ; CHECK: vmulpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
255 define <4 x double> @test_mask_vmulpd_256(<4 x double> %dst, <4 x double> %i,
256 <4 x double> %j, <4 x i64> %mask1)
258 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
259 %x = fmul <4 x double> %i, %j
260 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
264 ; CHECK-LABEL: test_mask_vminpd_256
265 ; CHECK: vminpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
267 define <4 x double> @test_mask_vminpd_256(<4 x double> %dst, <4 x double> %i,
268 <4 x double> %j, <4 x i64> %mask1)
270 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
271 %cmp_res = fcmp olt <4 x double> %i, %j
272 %min = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j
273 %r = select <4 x i1> %mask, <4 x double> %min, <4 x double> %dst
277 ; CHECK-LABEL: test_mask_vmaxpd_256
278 ; CHECK: vmaxpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
280 define <4 x double> @test_mask_vmaxpd_256(<4 x double> %dst, <4 x double> %i,
281 <4 x double> %j, <4 x i64> %mask1)
283 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
284 %cmp_res = fcmp ogt <4 x double> %i, %j
285 %max = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j
286 %r = select <4 x i1> %mask, <4 x double> %max, <4 x double> %dst
290 ; CHECK-LABEL: test_mask_vsubpd_256
291 ; CHECK: vsubpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
293 define <4 x double> @test_mask_vsubpd_256(<4 x double> %dst, <4 x double> %i,
294 <4 x double> %j, <4 x i64> %mask1)
296 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
297 %x = fsub <4 x double> %i, %j
298 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
302 ; CHECK-LABEL: test_mask_vdivpd_256
303 ; CHECK: vdivpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
305 define <4 x double> @test_mask_vdivpd_256(<4 x double> %dst, <4 x double> %i,
306 <4 x double> %j, <4 x i64> %mask1)
308 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
309 %x = fdiv <4 x double> %i, %j
310 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
314 ; CHECK-LABEL: test_mask_vaddpd_256
315 ; CHECK: vaddpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
317 define <4 x double> @test_mask_vaddpd_256(<4 x double> %dst, <4 x double> %i,
318 <4 x double> %j, <4 x i64> %mask1)
320 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
321 %x = fadd <4 x double> %i, %j
322 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
326 ; CHECK-LABEL: test_maskz_vaddpd_256
327 ; CHECK: vaddpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]} {z}}}
329 define <4 x double> @test_maskz_vaddpd_256(<4 x double> %i, <4 x double> %j,
330 <4 x i64> %mask1) nounwind readnone {
331 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
332 %x = fadd <4 x double> %i, %j
333 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
337 ; CHECK-LABEL: test_mask_fold_vaddpd_256
338 ; CHECK: vaddpd (%rdi), {{.*%ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}.*}}
340 define <4 x double> @test_mask_fold_vaddpd_256(<4 x double> %dst, <4 x double> %i,
341 <4 x double>* %j, <4 x i64> %mask1)
343 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
344 %tmp = load <4 x double>, <4 x double>* %j
345 %x = fadd <4 x double> %i, %tmp
346 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
350 ; CHECK-LABEL: test_maskz_fold_vaddpd_256
351 ; CHECK: vaddpd (%rdi), {{.*%ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]} {z}.*}}
353 define <4 x double> @test_maskz_fold_vaddpd_256(<4 x double> %i, <4 x double>* %j,
354 <4 x i64> %mask1) nounwind {
355 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
356 %tmp = load <4 x double>, <4 x double>* %j
357 %x = fadd <4 x double> %i, %tmp
358 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
362 ; CHECK-LABEL: test_broadcast2_vaddpd_256
363 ; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*}}
365 define <4 x double> @test_broadcast2_vaddpd_256(<4 x double> %i, double* %j) nounwind {
366 %tmp = load double, double* %j
367 %b = insertelement <4 x double> undef, double %tmp, i32 0
368 %c = shufflevector <4 x double> %b, <4 x double> undef,
369 <4 x i32> zeroinitializer
370 %x = fadd <4 x double> %c, %i
374 ; CHECK-LABEL: test_mask_broadcast_vaddpd_256
375 ; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*{%k[1-7]}.*}}
377 define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x double> %i,
378 double* %j, <4 x i64> %mask1) nounwind {
379 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
380 %tmp = load double, double* %j
381 %b = insertelement <4 x double> undef, double %tmp, i32 0
382 %c = shufflevector <4 x double> %b, <4 x double> undef,
383 <4 x i32> zeroinitializer
384 %x = fadd <4 x double> %c, %i
385 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %i
389 ; CHECK-LABEL: test_maskz_broadcast_vaddpd_256
390 ; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*{%k[1-7]} {z}.*}}
392 define <4 x double> @test_maskz_broadcast_vaddpd_256(<4 x double> %i, double* %j,
393 <4 x i64> %mask1) nounwind {
394 %mask = icmp ne <4 x i64> %mask1, zeroinitializer
395 %tmp = load double, double* %j
396 %b = insertelement <4 x double> undef, double %tmp, i32 0
397 %c = shufflevector <4 x double> %b, <4 x double> undef,
398 <4 x i32> zeroinitializer
399 %x = fadd <4 x double> %c, %i
400 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
406 ; CHECK-LABEL: vpaddq128_test
407 ; CHECK: vpaddq %xmm{{.*}}
409 define <2 x i64> @vpaddq128_test(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
410 %x = add <2 x i64> %i, %j
414 ; CHECK-LABEL: vpaddq128_fold_test
415 ; CHECK: vpaddq (%rdi), %xmm{{.*}}
417 define <2 x i64> @vpaddq128_fold_test(<2 x i64> %i, <2 x i64>* %j) nounwind {
418 %tmp = load <2 x i64>, <2 x i64>* %j, align 4
419 %x = add <2 x i64> %i, %tmp
423 ; CHECK-LABEL: vpaddq128_broadcast2_test
424 ; CHECK: vpaddq (%rdi){1to2}, %xmm{{.*}}
426 define <2 x i64> @vpaddq128_broadcast2_test(<2 x i64> %i, i64* %j) nounwind {
427 %tmp = load i64, i64* %j
428 %j.0 = insertelement <2 x i64> undef, i64 %tmp, i32 0
429 %j.1 = insertelement <2 x i64> %j.0, i64 %tmp, i32 1
430 %x = add <2 x i64> %i, %j.1
434 ; CHECK-LABEL: vpaddd128_test
435 ; CHECK: vpaddd %xmm{{.*}}
437 define <4 x i32> @vpaddd128_test(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
438 %x = add <4 x i32> %i, %j
442 ; CHECK-LABEL: vpaddd128_fold_test
443 ; CHECK: vpaddd (%rdi), %xmm{{.*}}
445 define <4 x i32> @vpaddd128_fold_test(<4 x i32> %i, <4 x i32>* %j) nounwind {
446 %tmp = load <4 x i32>, <4 x i32>* %j, align 4
447 %x = add <4 x i32> %i, %tmp
451 ; CHECK-LABEL: vpaddd128_broadcast_test
452 ; CHECK: vpaddd LCP{{.*}}(%rip){1to4}, %xmm{{.*}}
454 define <4 x i32> @vpaddd128_broadcast_test(<4 x i32> %i) nounwind {
455 %x = add <4 x i32> %i, <i32 1, i32 1, i32 1, i32 1>
459 ; CHECK-LABEL: vpaddd128_mask_test
460 ; CHECK: vpaddd %xmm{{.*%k[1-7].*}}
462 define <4 x i32> @vpaddd128_mask_test(<4 x i32> %i, <4 x i32> %j, <4 x i32> %mask1) nounwind readnone {
463 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
464 %x = add <4 x i32> %i, %j
465 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i
469 ; CHECK-LABEL: vpaddd128_maskz_test
470 ; CHECK: vpaddd %xmm{{.*{%k[1-7]} {z}.*}}
472 define <4 x i32> @vpaddd128_maskz_test(<4 x i32> %i, <4 x i32> %j, <4 x i32> %mask1) nounwind readnone {
473 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
474 %x = add <4 x i32> %i, %j
475 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
479 ; CHECK-LABEL: vpaddd128_mask_fold_test
480 ; CHECK: vpaddd (%rdi), %xmm{{.*%k[1-7]}}
482 define <4 x i32> @vpaddd128_mask_fold_test(<4 x i32> %i, <4 x i32>* %j.ptr, <4 x i32> %mask1) nounwind readnone {
483 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
484 %j = load <4 x i32>, <4 x i32>* %j.ptr
485 %x = add <4 x i32> %i, %j
486 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i
490 ; CHECK-LABEL: vpaddd128_mask_broadcast_test
491 ; CHECK: vpaddd LCP{{.*}}(%rip){1to4}, %xmm{{.*{%k[1-7]}}}
493 define <4 x i32> @vpaddd128_mask_broadcast_test(<4 x i32> %i, <4 x i32> %mask1) nounwind readnone {
494 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
495 %x = add <4 x i32> %i, <i32 1, i32 1, i32 1, i32 1>
496 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i
500 ; CHECK-LABEL: vpaddd128_maskz_fold_test
501 ; CHECK: vpaddd (%rdi), %xmm{{.*{%k[1-7]} {z}}}
503 define <4 x i32> @vpaddd128_maskz_fold_test(<4 x i32> %i, <4 x i32>* %j.ptr, <4 x i32> %mask1) nounwind readnone {
504 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
505 %j = load <4 x i32>, <4 x i32>* %j.ptr
506 %x = add <4 x i32> %i, %j
507 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
511 ; CHECK-LABEL: vpaddd128_maskz_broadcast_test
512 ; CHECK: vpaddd LCP{{.*}}(%rip){1to4}, %xmm{{.*{%k[1-7]} {z}}}
514 define <4 x i32> @vpaddd128_maskz_broadcast_test(<4 x i32> %i, <4 x i32> %mask1) nounwind readnone {
515 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
516 %x = add <4 x i32> %i, <i32 1, i32 1, i32 1, i32 1>
517 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
521 ; CHECK-LABEL: vpsubq128_test
522 ; CHECK: vpsubq %xmm{{.*}}
524 define <2 x i64> @vpsubq128_test(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
525 %x = sub <2 x i64> %i, %j
529 ; CHECK-LABEL: vpsubd128_test
530 ; CHECK: vpsubd %xmm{{.*}}
532 define <4 x i32> @vpsubd128_test(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
533 %x = sub <4 x i32> %i, %j
537 ; CHECK-LABEL: vpmulld128_test
538 ; CHECK: vpmulld %xmm{{.*}}
540 define <4 x i32> @vpmulld128_test(<4 x i32> %i, <4 x i32> %j) {
541 %x = mul <4 x i32> %i, %j
545 ; CHECK-LABEL: test_vaddpd_128
546 ; CHECK: vaddpd{{.*}}
548 define <2 x double> @test_vaddpd_128(<2 x double> %y, <2 x double> %x) {
550 %add.i = fadd <2 x double> %x, %y
551 ret <2 x double> %add.i
554 ; CHECK-LABEL: test_fold_vaddpd_128
555 ; CHECK: vaddpd LCP{{.*}}(%rip){{.*}}
557 define <2 x double> @test_fold_vaddpd_128(<2 x double> %y) {
559 %add.i = fadd <2 x double> %y, <double 4.500000e+00, double 3.400000e+00>
560 ret <2 x double> %add.i
563 ; CHECK-LABEL: test_broadcast_vaddpd_128
564 ; CHECK: LCP{{.*}}(%rip){1to4}, %xmm0, %xmm0
566 define <4 x float> @test_broadcast_vaddpd_128(<4 x float> %a) nounwind {
567 %b = fadd <4 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
571 ; CHECK-LABEL: test_mask_vaddps_128
572 ; CHECK: vaddps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
574 define <4 x float> @test_mask_vaddps_128(<4 x float> %dst, <4 x float> %i,
575 <4 x float> %j, <4 x i32> %mask1)
577 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
578 %x = fadd <4 x float> %i, %j
579 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
583 ; CHECK-LABEL: test_mask_vmulps_128
584 ; CHECK: vmulps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
586 define <4 x float> @test_mask_vmulps_128(<4 x float> %dst, <4 x float> %i,
587 <4 x float> %j, <4 x i32> %mask1)
589 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
590 %x = fmul <4 x float> %i, %j
591 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
595 ; CHECK-LABEL: test_mask_vminps_128
596 ; CHECK: vminps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
598 define <4 x float> @test_mask_vminps_128(<4 x float> %dst, <4 x float> %i,
599 <4 x float> %j, <4 x i32> %mask1)
601 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
602 %cmp_res = fcmp olt <4 x float> %i, %j
603 %min = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j
604 %r = select <4 x i1> %mask, <4 x float> %min, <4 x float> %dst
608 ; CHECK-LABEL: test_mask_vmaxps_128
609 ; CHECK: vmaxps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
611 define <4 x float> @test_mask_vmaxps_128(<4 x float> %dst, <4 x float> %i,
612 <4 x float> %j, <4 x i32> %mask1)
614 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
615 %cmp_res = fcmp ogt <4 x float> %i, %j
616 %max = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j
617 %r = select <4 x i1> %mask, <4 x float> %max, <4 x float> %dst
621 ; CHECK-LABEL: test_mask_vsubps_128
622 ; CHECK: vsubps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
624 define <4 x float> @test_mask_vsubps_128(<4 x float> %dst, <4 x float> %i,
625 <4 x float> %j, <4 x i32> %mask1)
627 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
628 %x = fsub <4 x float> %i, %j
629 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
634 ; CHECK-LABEL: test_mask_vdivps_128
635 ; CHECK: vdivps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
637 define <4 x float> @test_mask_vdivps_128(<4 x float> %dst, <4 x float> %i,
638 <4 x float> %j, <4 x i32> %mask1)
640 %mask = icmp ne <4 x i32> %mask1, zeroinitializer
641 %x = fdiv <4 x float> %i, %j
642 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
646 ; CHECK-LABEL: test_mask_vmulpd_128
647 ; CHECK: vmulpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
649 define <2 x double> @test_mask_vmulpd_128(<2 x double> %dst, <2 x double> %i,
650 <2 x double> %j, <2 x i64> %mask1)
652 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
653 %x = fmul <2 x double> %i, %j
654 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
658 ; CHECK-LABEL: test_mask_vminpd_128
659 ; CHECK: vminpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
661 define <2 x double> @test_mask_vminpd_128(<2 x double> %dst, <2 x double> %i,
662 <2 x double> %j, <2 x i64> %mask1)
664 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
665 %cmp_res = fcmp olt <2 x double> %i, %j
666 %min = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j
667 %r = select <2 x i1> %mask, <2 x double> %min, <2 x double> %dst
671 ; CHECK-LABEL: test_mask_vmaxpd_128
672 ; CHECK: vmaxpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
674 define <2 x double> @test_mask_vmaxpd_128(<2 x double> %dst, <2 x double> %i,
675 <2 x double> %j, <2 x i64> %mask1)
677 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
678 %cmp_res = fcmp ogt <2 x double> %i, %j
679 %max = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j
680 %r = select <2 x i1> %mask, <2 x double> %max, <2 x double> %dst
684 ; CHECK-LABEL: test_mask_vsubpd_128
685 ; CHECK: vsubpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
687 define <2 x double> @test_mask_vsubpd_128(<2 x double> %dst, <2 x double> %i,
688 <2 x double> %j, <2 x i64> %mask1)
690 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
691 %x = fsub <2 x double> %i, %j
692 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
696 ; CHECK-LABEL: test_mask_vdivpd_128
697 ; CHECK: vdivpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
699 define <2 x double> @test_mask_vdivpd_128(<2 x double> %dst, <2 x double> %i,
700 <2 x double> %j, <2 x i64> %mask1)
702 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
703 %x = fdiv <2 x double> %i, %j
704 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
708 ; CHECK-LABEL: test_mask_vaddpd_128
709 ; CHECK: vaddpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
711 define <2 x double> @test_mask_vaddpd_128(<2 x double> %dst, <2 x double> %i,
712 <2 x double> %j, <2 x i64> %mask1)
714 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
715 %x = fadd <2 x double> %i, %j
716 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
720 ; CHECK-LABEL: test_maskz_vaddpd_128
721 ; CHECK: vaddpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]} {z}}}
723 define <2 x double> @test_maskz_vaddpd_128(<2 x double> %i, <2 x double> %j,
724 <2 x i64> %mask1) nounwind readnone {
725 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
726 %x = fadd <2 x double> %i, %j
727 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
731 ; CHECK-LABEL: test_mask_fold_vaddpd_128
732 ; CHECK: vaddpd (%rdi), {{.*%xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}.*}}
734 define <2 x double> @test_mask_fold_vaddpd_128(<2 x double> %dst, <2 x double> %i,
735 <2 x double>* %j, <2 x i64> %mask1)
737 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
738 %tmp = load <2 x double>, <2 x double>* %j
739 %x = fadd <2 x double> %i, %tmp
740 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
744 ; CHECK-LABEL: test_maskz_fold_vaddpd_128
745 ; CHECK: vaddpd (%rdi), {{.*%xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]} {z}.*}}
747 define <2 x double> @test_maskz_fold_vaddpd_128(<2 x double> %i, <2 x double>* %j,
748 <2 x i64> %mask1) nounwind {
749 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
750 %tmp = load <2 x double>, <2 x double>* %j
751 %x = fadd <2 x double> %i, %tmp
752 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
756 ; CHECK-LABEL: test_broadcast2_vaddpd_128
757 ; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*}}
759 define <2 x double> @test_broadcast2_vaddpd_128(<2 x double> %i, double* %j) nounwind {
760 %tmp = load double, double* %j
761 %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
762 %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
763 %x = fadd <2 x double> %j.1, %i
767 ; CHECK-LABEL: test_mask_broadcast_vaddpd_128
768 ; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*{%k[1-7]}.*}}
770 define <2 x double> @test_mask_broadcast_vaddpd_128(<2 x double> %dst, <2 x double> %i,
771 double* %j, <2 x i64> %mask1)
773 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
774 %tmp = load double, double* %j
775 %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
776 %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
777 %x = fadd <2 x double> %j.1, %i
778 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %i
782 ; CHECK-LABEL: test_maskz_broadcast_vaddpd_128
783 ; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*{%k[1-7]} {z}.*}}
785 define <2 x double> @test_maskz_broadcast_vaddpd_128(<2 x double> %i, double* %j,
786 <2 x i64> %mask1) nounwind {
787 %mask = icmp ne <2 x i64> %mask1, zeroinitializer
788 %tmp = load double, double* %j
789 %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
790 %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
791 %x = fadd <2 x double> %j.1, %i
792 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer