1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX512F %s
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck --check-prefix=CHECK --check-prefix=AVX512VL %s
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=AVX512BW %s
4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck --check-prefix=CHECK --check-prefix=AVX512DQ %s
5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq -mattr=+avx512bw -mattr=+avx512vl| FileCheck --check-prefix=CHECK --check-prefix=SKX %s
7 define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
8 ; CHECK-LABEL: addpd512:
9 ; CHECK: ## BB#0: ## %entry
10 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
13 %add.i = fadd <8 x double> %x, %y
14 ret <8 x double> %add.i
17 define <8 x double> @addpd512fold(<8 x double> %y) {
18 ; CHECK-LABEL: addpd512fold:
19 ; CHECK: ## BB#0: ## %entry
20 ; CHECK-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0
23 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
24 ret <8 x double> %add.i
27 define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
28 ; CHECK-LABEL: addps512:
29 ; CHECK: ## BB#0: ## %entry
30 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
33 %add.i = fadd <16 x float> %x, %y
34 ret <16 x float> %add.i
37 define <16 x float> @addps512fold(<16 x float> %y) {
38 ; CHECK-LABEL: addps512fold:
39 ; CHECK: ## BB#0: ## %entry
40 ; CHECK-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0
43 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
44 ret <16 x float> %add.i
47 define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
48 ; CHECK-LABEL: subpd512:
49 ; CHECK: ## BB#0: ## %entry
50 ; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0
53 %sub.i = fsub <8 x double> %x, %y
54 ret <8 x double> %sub.i
57 define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
58 ; CHECK-LABEL: subpd512fold:
59 ; CHECK: ## BB#0: ## %entry
60 ; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0
63 %tmp2 = load <8 x double>, <8 x double>* %x, align 8
64 %sub.i = fsub <8 x double> %y, %tmp2
65 ret <8 x double> %sub.i
68 define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
69 ; CHECK-LABEL: subps512:
70 ; CHECK: ## BB#0: ## %entry
71 ; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0
74 %sub.i = fsub <16 x float> %x, %y
75 ret <16 x float> %sub.i
78 define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
79 ; CHECK-LABEL: subps512fold:
80 ; CHECK: ## BB#0: ## %entry
81 ; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0
84 %tmp2 = load <16 x float>, <16 x float>* %x, align 4
85 %sub.i = fsub <16 x float> %y, %tmp2
86 ret <16 x float> %sub.i
89 define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
90 ; AVX512F-LABEL: imulq512:
92 ; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm2
93 ; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm3
94 ; AVX512F-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
95 ; AVX512F-NEXT: vpsllq $32, %zmm3, %zmm3
96 ; AVX512F-NEXT: vpaddq %zmm3, %zmm2, %zmm2
97 ; AVX512F-NEXT: vpsrlq $32, %zmm1, %zmm1
98 ; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
99 ; AVX512F-NEXT: vpsllq $32, %zmm0, %zmm0
100 ; AVX512F-NEXT: vpaddq %zmm0, %zmm2, %zmm0
103 ; AVX512VL-LABEL: imulq512:
105 ; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm2
106 ; AVX512VL-NEXT: vpsrlq $32, %zmm0, %zmm3
107 ; AVX512VL-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
108 ; AVX512VL-NEXT: vpsllq $32, %zmm3, %zmm3
109 ; AVX512VL-NEXT: vpaddq %zmm3, %zmm2, %zmm2
110 ; AVX512VL-NEXT: vpsrlq $32, %zmm1, %zmm1
111 ; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
112 ; AVX512VL-NEXT: vpsllq $32, %zmm0, %zmm0
113 ; AVX512VL-NEXT: vpaddq %zmm0, %zmm2, %zmm0
114 ; AVX512VL-NEXT: retq
116 ; AVX512BW-LABEL: imulq512:
118 ; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm2
119 ; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm3
120 ; AVX512BW-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
121 ; AVX512BW-NEXT: vpsllq $32, %zmm3, %zmm3
122 ; AVX512BW-NEXT: vpaddq %zmm3, %zmm2, %zmm2
123 ; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm1
124 ; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
125 ; AVX512BW-NEXT: vpsllq $32, %zmm0, %zmm0
126 ; AVX512BW-NEXT: vpaddq %zmm0, %zmm2, %zmm0
127 ; AVX512BW-NEXT: retq
129 ; AVX512DQ-LABEL: imulq512:
131 ; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0
132 ; AVX512DQ-NEXT: retq
134 ; SKX-LABEL: imulq512:
136 ; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0
138 %z = mul <8 x i64>%x, %y
142 define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
143 ; CHECK-LABEL: mulpd512:
144 ; CHECK: ## BB#0: ## %entry
145 ; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0
148 %mul.i = fmul <8 x double> %x, %y
149 ret <8 x double> %mul.i
152 define <8 x double> @mulpd512fold(<8 x double> %y) {
153 ; CHECK-LABEL: mulpd512fold:
154 ; CHECK: ## BB#0: ## %entry
155 ; CHECK-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0
158 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
159 ret <8 x double> %mul.i
162 define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
163 ; CHECK-LABEL: mulps512:
164 ; CHECK: ## BB#0: ## %entry
165 ; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0
168 %mul.i = fmul <16 x float> %x, %y
169 ret <16 x float> %mul.i
172 define <16 x float> @mulps512fold(<16 x float> %y) {
173 ; CHECK-LABEL: mulps512fold:
174 ; CHECK: ## BB#0: ## %entry
175 ; CHECK-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0
178 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
179 ret <16 x float> %mul.i
182 define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
183 ; CHECK-LABEL: divpd512:
184 ; CHECK: ## BB#0: ## %entry
185 ; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0
188 %div.i = fdiv <8 x double> %x, %y
189 ret <8 x double> %div.i
192 define <8 x double> @divpd512fold(<8 x double> %y) {
193 ; CHECK-LABEL: divpd512fold:
194 ; CHECK: ## BB#0: ## %entry
195 ; CHECK-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0
198 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
199 ret <8 x double> %div.i
202 define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
203 ; CHECK-LABEL: divps512:
204 ; CHECK: ## BB#0: ## %entry
205 ; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0
208 %div.i = fdiv <16 x float> %x, %y
209 ret <16 x float> %div.i
212 define <16 x float> @divps512fold(<16 x float> %y) {
213 ; CHECK-LABEL: divps512fold:
214 ; CHECK: ## BB#0: ## %entry
215 ; CHECK-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0
218 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
219 ret <16 x float> %div.i
222 define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
223 ; CHECK-LABEL: vpaddq_test:
225 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
227 %x = add <8 x i64> %i, %j
231 define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
232 ; CHECK-LABEL: vpaddq_fold_test:
234 ; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0
236 %tmp = load <8 x i64>, <8 x i64>* %j, align 4
237 %x = add <8 x i64> %i, %tmp
241 define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
242 ; CHECK-LABEL: vpaddq_broadcast_test:
244 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
246 %x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
250 define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
251 ; CHECK-LABEL: vpaddq_broadcast2_test:
253 ; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0
255 %tmp = load i64, i64* %j
256 %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
257 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
258 %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
259 %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
260 %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
261 %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
262 %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
263 %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
264 %x = add <8 x i64> %i, %j.7
268 define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
269 ; CHECK-LABEL: vpaddd_test:
271 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
273 %x = add <16 x i32> %i, %j
277 define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
278 ; CHECK-LABEL: vpaddd_fold_test:
280 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0
282 %tmp = load <16 x i32>, <16 x i32>* %j, align 4
283 %x = add <16 x i32> %i, %tmp
287 define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
288 ; CHECK-LABEL: vpaddd_broadcast_test:
290 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
292 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
296 define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
297 ; CHECK-LABEL: vpaddd_mask_test:
299 ; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
300 ; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
301 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1}
303 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
304 %x = add <16 x i32> %i, %j
305 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
309 define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
310 ; CHECK-LABEL: vpaddd_maskz_test:
312 ; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
313 ; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
314 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
316 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
317 %x = add <16 x i32> %i, %j
318 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
322 define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
323 ; CHECK-LABEL: vpaddd_mask_fold_test:
325 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
326 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
327 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1}
329 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
330 %j = load <16 x i32>, <16 x i32>* %j.ptr
331 %x = add <16 x i32> %i, %j
332 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
336 define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
337 ; CHECK-LABEL: vpaddd_mask_broadcast_test:
339 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
340 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
341 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1}
343 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
344 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
345 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
349 define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
350 ; CHECK-LABEL: vpaddd_maskz_fold_test:
352 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
353 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
354 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
356 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
357 %j = load <16 x i32>, <16 x i32>* %j.ptr
358 %x = add <16 x i32> %i, %j
359 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
363 define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
364 ; CHECK-LABEL: vpaddd_maskz_broadcast_test:
366 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
367 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
368 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z}
370 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
371 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
372 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
376 define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
377 ; CHECK-LABEL: vpsubq_test:
379 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0
381 %x = sub <8 x i64> %i, %j
385 define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
386 ; CHECK-LABEL: vpsubd_test:
388 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0
390 %x = sub <16 x i32> %i, %j
394 define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
395 ; CHECK-LABEL: vpmulld_test:
397 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0
399 %x = mul <16 x i32> %i, %j
403 declare float @sqrtf(float) readnone
404 define float @sqrtA(float %a) nounwind uwtable readnone ssp {
405 ; CHECK-LABEL: sqrtA:
406 ; CHECK: ## BB#0: ## %entry
407 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
410 %conv1 = tail call float @sqrtf(float %a) nounwind readnone
414 declare double @sqrt(double) readnone
415 define double @sqrtB(double %a) nounwind uwtable readnone ssp {
416 ; CHECK-LABEL: sqrtB:
417 ; CHECK: ## BB#0: ## %entry
418 ; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
421 %call = tail call double @sqrt(double %a) nounwind readnone
425 declare float @llvm.sqrt.f32(float)
426 define float @sqrtC(float %a) nounwind {
427 ; CHECK-LABEL: sqrtC:
429 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
431 %b = call float @llvm.sqrt.f32(float %a)
435 declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
436 define <16 x float> @sqrtD(<16 x float> %a) nounwind {
437 ; CHECK-LABEL: sqrtD:
439 ; CHECK-NEXT: vsqrtps %zmm0, %zmm0
441 %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
445 declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
446 define <8 x double> @sqrtE(<8 x double> %a) nounwind {
447 ; CHECK-LABEL: sqrtE:
449 ; CHECK-NEXT: vsqrtpd %zmm0, %zmm0
451 %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
455 define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
456 ; CHECK-LABEL: fadd_broadcast:
458 ; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
460 %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
464 define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
465 ; CHECK-LABEL: addq_broadcast:
467 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
469 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
473 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
474 ; CHECK-LABEL: orq_broadcast:
476 ; CHECK-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
478 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
482 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
483 ; CHECK-LABEL: andd512fold:
484 ; CHECK: ## BB#0: ## %entry
485 ; CHECK-NEXT: vpandd (%rdi), %zmm0, %zmm0
488 %a = load <16 x i32>, <16 x i32>* %x, align 4
489 %b = and <16 x i32> %y, %a
493 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
494 ; CHECK-LABEL: andqbrst:
495 ; CHECK: ## BB#0: ## %entry
496 ; CHECK-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
499 %a = load i64, i64* %ap, align 8
500 %b = insertelement <8 x i64> undef, i64 %a, i32 0
501 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
502 %d = and <8 x i64> %p1, %c
506 define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
507 ; CHECK-LABEL: test_mask_vaddps:
509 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
510 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
511 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1}
513 <16 x float> %j, <16 x i32> %mask1)
515 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
516 %x = fadd <16 x float> %i, %j
517 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
521 define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
522 ; CHECK-LABEL: test_mask_vmulps:
524 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
525 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
526 ; CHECK-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1}
528 <16 x float> %j, <16 x i32> %mask1)
530 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
531 %x = fmul <16 x float> %i, %j
532 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
536 define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
537 ; CHECK-LABEL: test_mask_vminps:
539 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
540 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
541 ; CHECK-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1}
543 <16 x float> %j, <16 x i32> %mask1)
545 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
546 %cmp_res = fcmp olt <16 x float> %i, %j
547 %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
548 %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst
552 define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
553 ; AVX512F-LABEL: test_mask_vminpd:
555 ; AVX512F-NEXT: vpxor %ymm4, %ymm4, %ymm4
556 ; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
557 ; AVX512F-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
560 ; AVX512VL-LABEL: test_mask_vminpd:
562 ; AVX512VL-NEXT: vpxor %ymm4, %ymm4, %ymm4
563 ; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
564 ; AVX512VL-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
565 ; AVX512VL-NEXT: retq
567 ; AVX512BW-LABEL: test_mask_vminpd:
569 ; AVX512BW-NEXT: vpxor %ymm4, %ymm4, %ymm4
570 ; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
571 ; AVX512BW-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
572 ; AVX512BW-NEXT: retq
574 ; AVX512DQ-LABEL: test_mask_vminpd:
576 ; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4
577 ; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
578 ; AVX512DQ-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
579 ; AVX512DQ-NEXT: retq
581 ; SKX-LABEL: test_mask_vminpd:
583 ; SKX-NEXT: vpxor %ymm4, %ymm4, %ymm4
584 ; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
585 ; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
587 <8 x double> %j, <8 x i32> %mask1)
589 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
590 %cmp_res = fcmp olt <8 x double> %i, %j
591 %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
592 %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst
596 define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
597 ; CHECK-LABEL: test_mask_vmaxps:
599 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
600 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
601 ; CHECK-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1}
603 <16 x float> %j, <16 x i32> %mask1)
605 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
606 %cmp_res = fcmp ogt <16 x float> %i, %j
607 %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
608 %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst
612 define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
613 ; AVX512F-LABEL: test_mask_vmaxpd:
615 ; AVX512F-NEXT: vpxor %ymm4, %ymm4, %ymm4
616 ; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
617 ; AVX512F-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
620 ; AVX512VL-LABEL: test_mask_vmaxpd:
622 ; AVX512VL-NEXT: vpxor %ymm4, %ymm4, %ymm4
623 ; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
624 ; AVX512VL-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
625 ; AVX512VL-NEXT: retq
627 ; AVX512BW-LABEL: test_mask_vmaxpd:
629 ; AVX512BW-NEXT: vpxor %ymm4, %ymm4, %ymm4
630 ; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
631 ; AVX512BW-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
632 ; AVX512BW-NEXT: retq
634 ; AVX512DQ-LABEL: test_mask_vmaxpd:
636 ; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4
637 ; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
638 ; AVX512DQ-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
639 ; AVX512DQ-NEXT: retq
641 ; SKX-LABEL: test_mask_vmaxpd:
643 ; SKX-NEXT: vpxor %ymm4, %ymm4, %ymm4
644 ; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
645 ; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
647 <8 x double> %j, <8 x i32> %mask1)
649 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
650 %cmp_res = fcmp ogt <8 x double> %i, %j
651 %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
652 %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst
656 define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
657 ; CHECK-LABEL: test_mask_vsubps:
659 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
660 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
661 ; CHECK-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1}
663 <16 x float> %j, <16 x i32> %mask1)
665 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
666 %x = fsub <16 x float> %i, %j
667 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
671 define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
672 ; CHECK-LABEL: test_mask_vdivps:
674 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
675 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
676 ; CHECK-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1}
678 <16 x float> %j, <16 x i32> %mask1)
680 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
681 %x = fdiv <16 x float> %i, %j
682 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
686 define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
687 ; CHECK-LABEL: test_mask_vaddpd:
689 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
690 ; CHECK-NEXT: vpcmpneqq %zmm4, %zmm3, %k1
691 ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1}
693 <8 x double> %j, <8 x i64> %mask1)
695 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
696 %x = fadd <8 x double> %i, %j
697 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
701 define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
702 ; CHECK-LABEL: test_maskz_vaddpd:
704 ; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
705 ; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1
706 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z}
708 <8 x i64> %mask1) nounwind readnone {
709 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
710 %x = fadd <8 x double> %i, %j
711 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
715 define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
716 ; CHECK-LABEL: test_mask_fold_vaddpd:
718 ; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
719 ; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1
720 ; CHECK-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1}
722 <8 x double>* %j, <8 x i64> %mask1)
724 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
725 %tmp = load <8 x double>, <8 x double>* %j, align 8
726 %x = fadd <8 x double> %i, %tmp
727 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
731 define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
732 ; CHECK-LABEL: test_maskz_fold_vaddpd:
734 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
735 ; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1
736 ; CHECK-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z}
738 <8 x i64> %mask1) nounwind {
739 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
740 %tmp = load <8 x double>, <8 x double>* %j, align 8
741 %x = fadd <8 x double> %i, %tmp
742 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
746 define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
747 ; CHECK-LABEL: test_broadcast_vaddpd:
749 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0
751 %tmp = load double, double* %j
752 %b = insertelement <8 x double> undef, double %tmp, i32 0
753 %c = shufflevector <8 x double> %b, <8 x double> undef,
754 <8 x i32> zeroinitializer
755 %x = fadd <8 x double> %c, %i
759 define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
760 ; CHECK-LABEL: test_mask_broadcast_vaddpd:
762 ; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0
763 ; CHECK-NEXT: vpcmpneqq %zmm0, %zmm2, %k1
764 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1}
765 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
767 double* %j, <8 x i64> %mask1) nounwind {
768 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
769 %tmp = load double, double* %j
770 %b = insertelement <8 x double> undef, double %tmp, i32 0
771 %c = shufflevector <8 x double> %b, <8 x double> undef,
772 <8 x i32> zeroinitializer
773 %x = fadd <8 x double> %c, %i
774 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i
778 define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
779 ; CHECK-LABEL: test_maskz_broadcast_vaddpd:
781 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
782 ; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1
783 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
785 <8 x i64> %mask1) nounwind {
786 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
787 %tmp = load double, double* %j
788 %b = insertelement <8 x double> undef, double %tmp, i32 0
789 %c = shufflevector <8 x double> %b, <8 x double> undef,
790 <8 x i32> zeroinitializer
791 %x = fadd <8 x double> %c, %i
792 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
796 define <16 x float> @test_fxor(<16 x float> %a) {
797 ; AVX512F-LABEL: test_fxor:
799 ; AVX512F-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0
802 ; AVX512VL-LABEL: test_fxor:
804 ; AVX512VL-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0
805 ; AVX512VL-NEXT: retq
807 ; AVX512BW-LABEL: test_fxor:
809 ; AVX512BW-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0
810 ; AVX512BW-NEXT: retq
812 ; AVX512DQ-LABEL: test_fxor:
814 ; AVX512DQ-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
815 ; AVX512DQ-NEXT: retq
817 ; SKX-LABEL: test_fxor:
819 ; SKX-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
822 %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a