1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=AVX512
8 ; Patterns (+ fneg variants): add(mul(x,y),z), sub(mul(x,y),z)
11 define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
12 ; FMA-LABEL: test_x86_fmadd_ps:
14 ; FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0
17 ; FMA4-LABEL: test_x86_fmadd_ps:
19 ; FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
22 ; AVX512-LABEL: test_x86_fmadd_ps:
24 ; AVX512-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0
26 %x = fmul <4 x float> %a0, %a1
27 %res = fadd <4 x float> %x, %a2
31 define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
32 ; FMA-LABEL: test_x86_fmsub_ps:
34 ; FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0
37 ; FMA4-LABEL: test_x86_fmsub_ps:
39 ; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
42 ; AVX512-LABEL: test_x86_fmsub_ps:
44 ; AVX512-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0
46 %x = fmul <4 x float> %a0, %a1
47 %res = fsub <4 x float> %x, %a2
51 define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
52 ; FMA-LABEL: test_x86_fnmadd_ps:
54 ; FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
57 ; FMA4-LABEL: test_x86_fnmadd_ps:
59 ; FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
62 ; AVX512-LABEL: test_x86_fnmadd_ps:
64 ; AVX512-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
66 %x = fmul <4 x float> %a0, %a1
67 %res = fsub <4 x float> %a2, %x
71 define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
72 ; FMA-LABEL: test_x86_fnmsub_ps:
74 ; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
77 ; FMA4-LABEL: test_x86_fnmsub_ps:
79 ; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
82 ; AVX512-LABEL: test_x86_fnmsub_ps:
84 ; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
86 %x = fmul <4 x float> %a0, %a1
87 %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
88 %res = fsub <4 x float> %y, %a2
92 define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
93 ; FMA-LABEL: test_x86_fmadd_ps_y:
95 ; FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0
98 ; FMA4-LABEL: test_x86_fmadd_ps_y:
100 ; FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
103 ; AVX512-LABEL: test_x86_fmadd_ps_y:
105 ; AVX512-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0
107 %x = fmul <8 x float> %a0, %a1
108 %res = fadd <8 x float> %x, %a2
112 define <8 x float> @test_x86_fmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
113 ; FMA-LABEL: test_x86_fmsub_ps_y:
115 ; FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
118 ; FMA4-LABEL: test_x86_fmsub_ps_y:
120 ; FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
123 ; AVX512-LABEL: test_x86_fmsub_ps_y:
125 ; AVX512-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
127 %x = fmul <8 x float> %a0, %a1
128 %res = fsub <8 x float> %x, %a2
132 define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
133 ; FMA-LABEL: test_x86_fnmadd_ps_y:
135 ; FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
138 ; FMA4-LABEL: test_x86_fnmadd_ps_y:
140 ; FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
143 ; AVX512-LABEL: test_x86_fnmadd_ps_y:
145 ; AVX512-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
147 %x = fmul <8 x float> %a0, %a1
148 %res = fsub <8 x float> %a2, %x
152 define <8 x float> @test_x86_fnmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
153 ; FMA-LABEL: test_x86_fnmsub_ps_y:
155 ; FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0
158 ; FMA4-LABEL: test_x86_fnmsub_ps_y:
160 ; FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
163 ; AVX512-LABEL: test_x86_fnmsub_ps_y:
165 ; AVX512-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0
167 %x = fmul <8 x float> %a0, %a1
168 %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
169 %res = fsub <8 x float> %y, %a2
173 define <4 x double> @test_x86_fmadd_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
174 ; FMA-LABEL: test_x86_fmadd_pd_y:
176 ; FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
179 ; FMA4-LABEL: test_x86_fmadd_pd_y:
181 ; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
184 ; AVX512-LABEL: test_x86_fmadd_pd_y:
186 ; AVX512-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
188 %x = fmul <4 x double> %a0, %a1
189 %res = fadd <4 x double> %x, %a2
190 ret <4 x double> %res
193 define <4 x double> @test_x86_fmsub_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
194 ; FMA-LABEL: test_x86_fmsub_pd_y:
196 ; FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0
199 ; FMA4-LABEL: test_x86_fmsub_pd_y:
201 ; FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
204 ; AVX512-LABEL: test_x86_fmsub_pd_y:
206 ; AVX512-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0
208 %x = fmul <4 x double> %a0, %a1
209 %res = fsub <4 x double> %x, %a2
210 ret <4 x double> %res
213 define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
214 ; FMA-LABEL: test_x86_fmsub_pd:
216 ; FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0
219 ; FMA4-LABEL: test_x86_fmsub_pd:
221 ; FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
224 ; AVX512-LABEL: test_x86_fmsub_pd:
226 ; AVX512-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0
228 %x = fmul <2 x double> %a0, %a1
229 %res = fsub <2 x double> %x, %a2
230 ret <2 x double> %res
233 define float @test_x86_fnmadd_ss(float %a0, float %a1, float %a2) {
234 ; FMA-LABEL: test_x86_fnmadd_ss:
236 ; FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0
239 ; FMA4-LABEL: test_x86_fnmadd_ss:
241 ; FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
244 ; AVX512-LABEL: test_x86_fnmadd_ss:
246 ; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1
247 ; AVX512-NEXT: vmovaps %zmm1, %zmm0
249 %x = fmul float %a0, %a1
250 %res = fsub float %a2, %x
254 define double @test_x86_fnmadd_sd(double %a0, double %a1, double %a2) {
255 ; FMA-LABEL: test_x86_fnmadd_sd:
257 ; FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0
260 ; FMA4-LABEL: test_x86_fnmadd_sd:
262 ; FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
265 ; AVX512-LABEL: test_x86_fnmadd_sd:
267 ; AVX512-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1
268 ; AVX512-NEXT: vmovaps %zmm1, %zmm0
270 %x = fmul double %a0, %a1
271 %res = fsub double %a2, %x
275 define double @test_x86_fmsub_sd(double %a0, double %a1, double %a2) {
276 ; FMA-LABEL: test_x86_fmsub_sd:
278 ; FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0
281 ; FMA4-LABEL: test_x86_fmsub_sd:
283 ; FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
286 ; AVX512-LABEL: test_x86_fmsub_sd:
288 ; AVX512-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1
289 ; AVX512-NEXT: vmovaps %zmm1, %zmm0
291 %x = fmul double %a0, %a1
292 %res = fsub double %x, %a2
296 define float @test_x86_fnmsub_ss(float %a0, float %a1, float %a2) {
297 ; FMA-LABEL: test_x86_fnmsub_ss:
299 ; FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0
302 ; FMA4-LABEL: test_x86_fnmsub_ss:
304 ; FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
307 ; AVX512-LABEL: test_x86_fnmsub_ss:
309 ; AVX512-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1
310 ; AVX512-NEXT: vmovaps %zmm1, %zmm0
312 %x = fsub float -0.000000e+00, %a0
313 %y = fmul float %x, %a1
314 %res = fsub float %y, %a2
318 define <4 x float> @test_x86_fmadd_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
319 ; FMA-LABEL: test_x86_fmadd_ps_load:
321 ; FMA-NEXT: vfmadd132ps (%rdi), %xmm1, %xmm0
324 ; FMA4-LABEL: test_x86_fmadd_ps_load:
326 ; FMA4-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
329 ; AVX512-LABEL: test_x86_fmadd_ps_load:
331 ; AVX512-NEXT: vmovaps (%rdi), %xmm2
332 ; AVX512-NEXT: vfmadd213ps %xmm1, %xmm0, %xmm2
333 ; AVX512-NEXT: vmovaps %zmm2, %zmm0
335 %x = load <4 x float>, <4 x float>* %a0
336 %y = fmul <4 x float> %x, %a1
337 %res = fadd <4 x float> %y, %a2
341 define <4 x float> @test_x86_fmsub_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
342 ; FMA-LABEL: test_x86_fmsub_ps_load:
344 ; FMA-NEXT: vfmsub132ps (%rdi), %xmm1, %xmm0
347 ; FMA4-LABEL: test_x86_fmsub_ps_load:
349 ; FMA4-NEXT: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0
352 ; AVX512-LABEL: test_x86_fmsub_ps_load:
354 ; AVX512-NEXT: vmovaps (%rdi), %xmm2
355 ; AVX512-NEXT: vfmsub213ps %xmm1, %xmm0, %xmm2
356 ; AVX512-NEXT: vmovaps %zmm2, %zmm0
358 %x = load <4 x float>, <4 x float>* %a0
359 %y = fmul <4 x float> %x, %a1
360 %res = fsub <4 x float> %y, %a2
365 ; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
368 define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) {
369 ; FMA-LABEL: test_v4f32_mul_add_x_one_y:
371 ; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
374 ; FMA4-LABEL: test_v4f32_mul_add_x_one_y:
376 ; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
379 ; AVX512-LABEL: test_v4f32_mul_add_x_one_y:
381 ; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
383 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
384 %m = fmul <4 x float> %a, %y
388 define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) {
389 ; FMA-LABEL: test_v4f32_mul_y_add_x_one:
391 ; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
394 ; FMA4-LABEL: test_v4f32_mul_y_add_x_one:
396 ; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
399 ; AVX512-LABEL: test_v4f32_mul_y_add_x_one:
401 ; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
403 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
404 %m = fmul <4 x float> %y, %a
408 define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) {
409 ; FMA-LABEL: test_v4f32_mul_add_x_negone_y:
411 ; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
414 ; FMA4-LABEL: test_v4f32_mul_add_x_negone_y:
416 ; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
419 ; AVX512-LABEL: test_v4f32_mul_add_x_negone_y:
421 ; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
423 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
424 %m = fmul <4 x float> %a, %y
428 define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) {
429 ; FMA-LABEL: test_v4f32_mul_y_add_x_negone:
431 ; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
434 ; FMA4-LABEL: test_v4f32_mul_y_add_x_negone:
436 ; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
439 ; AVX512-LABEL: test_v4f32_mul_y_add_x_negone:
441 ; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
443 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
444 %m = fmul <4 x float> %y, %a
448 define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) {
449 ; FMA-LABEL: test_v4f32_mul_sub_one_x_y:
451 ; FMA-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0
454 ; FMA4-LABEL: test_v4f32_mul_sub_one_x_y:
456 ; FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
459 ; AVX512-LABEL: test_v4f32_mul_sub_one_x_y:
461 ; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0
463 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
464 %m = fmul <4 x float> %s, %y
468 define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) {
469 ; FMA-LABEL: test_v4f32_mul_y_sub_one_x:
471 ; FMA-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0
474 ; FMA4-LABEL: test_v4f32_mul_y_sub_one_x:
476 ; FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
479 ; AVX512-LABEL: test_v4f32_mul_y_sub_one_x:
481 ; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0
483 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
484 %m = fmul <4 x float> %y, %s
488 define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) {
489 ; FMA-LABEL: test_v4f32_mul_sub_negone_x_y:
491 ; FMA-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0
494 ; FMA4-LABEL: test_v4f32_mul_sub_negone_x_y:
496 ; FMA4-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
499 ; AVX512-LABEL: test_v4f32_mul_sub_negone_x_y:
501 ; AVX512-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0
503 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
504 %m = fmul <4 x float> %s, %y
508 define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) {
509 ; FMA-LABEL: test_v4f32_mul_y_sub_negone_x:
511 ; FMA-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0
514 ; FMA4-LABEL: test_v4f32_mul_y_sub_negone_x:
516 ; FMA4-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
519 ; AVX512-LABEL: test_v4f32_mul_y_sub_negone_x:
521 ; AVX512-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0
523 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
524 %m = fmul <4 x float> %y, %s
528 define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) {
529 ; FMA-LABEL: test_v4f32_mul_sub_x_one_y:
531 ; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
534 ; FMA4-LABEL: test_v4f32_mul_sub_x_one_y:
536 ; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
539 ; AVX512-LABEL: test_v4f32_mul_sub_x_one_y:
541 ; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
543 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
544 %m = fmul <4 x float> %s, %y
548 define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) {
549 ; FMA-LABEL: test_v4f32_mul_y_sub_x_one:
551 ; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
554 ; FMA4-LABEL: test_v4f32_mul_y_sub_x_one:
556 ; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
559 ; AVX512-LABEL: test_v4f32_mul_y_sub_x_one:
561 ; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
563 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
564 %m = fmul <4 x float> %y, %s
568 define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) {
569 ; FMA-LABEL: test_v4f32_mul_sub_x_negone_y:
571 ; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
574 ; FMA4-LABEL: test_v4f32_mul_sub_x_negone_y:
576 ; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
579 ; AVX512-LABEL: test_v4f32_mul_sub_x_negone_y:
581 ; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
583 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
584 %m = fmul <4 x float> %s, %y
588 define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) {
589 ; FMA-LABEL: test_v4f32_mul_y_sub_x_negone:
591 ; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
594 ; FMA4-LABEL: test_v4f32_mul_y_sub_x_negone:
596 ; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
599 ; AVX512-LABEL: test_v4f32_mul_y_sub_x_negone:
601 ; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
603 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
604 %m = fmul <4 x float> %y, %s
609 ; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
612 define float @test_f32_interp(float %x, float %y, float %t) {
613 ; FMA-LABEL: test_f32_interp:
615 ; FMA-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1
616 ; FMA-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0
619 ; FMA4-LABEL: test_f32_interp:
621 ; FMA4-NEXT: vfnmaddss %xmm1, %xmm1, %xmm2, %xmm1
622 ; FMA4-NEXT: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0
625 ; AVX512-LABEL: test_f32_interp:
627 ; AVX512-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1
628 ; AVX512-NEXT: vfmadd213ss %xmm1, %xmm0, %xmm2
629 ; AVX512-NEXT: vmovaps %zmm2, %zmm0
631 %t1 = fsub float 1.0, %t
632 %tx = fmul float %x, %t
633 %ty = fmul float %y, %t1
634 %r = fadd float %tx, %ty
638 define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) {
639 ; FMA-LABEL: test_v4f32_interp:
641 ; FMA-NEXT: vfnmadd213ps %xmm1, %xmm2, %xmm1
642 ; FMA-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0
645 ; FMA4-LABEL: test_v4f32_interp:
647 ; FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm2, %xmm1
648 ; FMA4-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0
651 ; AVX512-LABEL: test_v4f32_interp:
653 ; AVX512-NEXT: vmovaps %zmm2, %zmm3
654 ; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm3
655 ; AVX512-NEXT: vfmadd213ps %xmm3, %xmm2, %xmm0
657 %t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t
658 %tx = fmul <4 x float> %x, %t
659 %ty = fmul <4 x float> %y, %t1
660 %r = fadd <4 x float> %tx, %ty
664 define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) {
665 ; FMA-LABEL: test_v8f32_interp:
667 ; FMA-NEXT: vfnmadd213ps %ymm1, %ymm2, %ymm1
668 ; FMA-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0
671 ; FMA4-LABEL: test_v8f32_interp:
673 ; FMA4-NEXT: vfnmaddps %ymm1, %ymm1, %ymm2, %ymm1
674 ; FMA4-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0
677 ; AVX512-LABEL: test_v8f32_interp:
679 ; AVX512-NEXT: vmovaps %zmm2, %zmm3
680 ; AVX512-NEXT: vfnmadd213ps %ymm1, %ymm1, %ymm3
681 ; AVX512-NEXT: vfmadd213ps %ymm3, %ymm2, %ymm0
683 %t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
684 %tx = fmul <8 x float> %x, %t
685 %ty = fmul <8 x float> %y, %t1
686 %r = fadd <8 x float> %tx, %ty
690 define double @test_f64_interp(double %x, double %y, double %t) {
691 ; FMA-LABEL: test_f64_interp:
693 ; FMA-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1
694 ; FMA-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0
697 ; FMA4-LABEL: test_f64_interp:
699 ; FMA4-NEXT: vfnmaddsd %xmm1, %xmm1, %xmm2, %xmm1
700 ; FMA4-NEXT: vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0
703 ; AVX512-LABEL: test_f64_interp:
705 ; AVX512-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1
706 ; AVX512-NEXT: vfmadd213sd %xmm1, %xmm0, %xmm2
707 ; AVX512-NEXT: vmovaps %zmm2, %zmm0
709 %t1 = fsub double 1.0, %t
710 %tx = fmul double %x, %t
711 %ty = fmul double %y, %t1
712 %r = fadd double %tx, %ty
716 define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) {
717 ; FMA-LABEL: test_v2f64_interp:
719 ; FMA-NEXT: vfnmadd213pd %xmm1, %xmm2, %xmm1
720 ; FMA-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0
723 ; FMA4-LABEL: test_v2f64_interp:
725 ; FMA4-NEXT: vfnmaddpd %xmm1, %xmm1, %xmm2, %xmm1
726 ; FMA4-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0
729 ; AVX512-LABEL: test_v2f64_interp:
731 ; AVX512-NEXT: vmovaps %zmm2, %zmm3
732 ; AVX512-NEXT: vfnmadd213pd %xmm1, %xmm1, %xmm3
733 ; AVX512-NEXT: vfmadd213pd %xmm3, %xmm2, %xmm0
735 %t1 = fsub <2 x double> <double 1.0, double 1.0>, %t
736 %tx = fmul <2 x double> %x, %t
737 %ty = fmul <2 x double> %y, %t1
738 %r = fadd <2 x double> %tx, %ty
742 define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) {
743 ; FMA-LABEL: test_v4f64_interp:
745 ; FMA-NEXT: vfnmadd213pd %ymm1, %ymm2, %ymm1
746 ; FMA-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0
749 ; FMA4-LABEL: test_v4f64_interp:
751 ; FMA4-NEXT: vfnmaddpd %ymm1, %ymm1, %ymm2, %ymm1
752 ; FMA4-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0
755 ; AVX512-LABEL: test_v4f64_interp:
757 ; AVX512-NEXT: vmovaps %zmm2, %zmm3
758 ; AVX512-NEXT: vfnmadd213pd %ymm1, %ymm1, %ymm3
759 ; AVX512-NEXT: vfmadd213pd %ymm3, %ymm2, %ymm0
761 %t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t
762 %tx = fmul <4 x double> %x, %t
763 %ty = fmul <4 x double> %y, %t1
764 %r = fadd <4 x double> %tx, %ty
768 ; (fneg (fma x, y, z)) -> (fma x, -y, -z)
770 define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
771 ; FMA-LABEL: test_v4f32_fneg_fmadd:
773 ; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
776 ; FMA4-LABEL: test_v4f32_fneg_fmadd:
778 ; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
781 ; AVX512-LABEL: test_v4f32_fneg_fmadd:
783 ; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
785 %mul = fmul <4 x float> %a0, %a1
786 %add = fadd <4 x float> %mul, %a2
787 %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
791 define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
792 ; FMA-LABEL: test_v4f64_fneg_fmsub:
794 ; FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0
797 ; FMA4-LABEL: test_v4f64_fneg_fmsub:
799 ; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
802 ; AVX512-LABEL: test_v4f64_fneg_fmsub:
804 ; AVX512-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0
806 %mul = fmul <4 x double> %a0, %a1
807 %sub = fsub <4 x double> %mul, %a2
808 %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
809 ret <4 x double> %neg
812 define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
813 ; FMA-LABEL: test_v4f32_fneg_fnmadd:
815 ; FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0
818 ; FMA4-LABEL: test_v4f32_fneg_fnmadd:
820 ; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
823 ; AVX512-LABEL: test_v4f32_fneg_fnmadd:
825 ; AVX512-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0
827 %mul = fmul <4 x float> %a0, %a1
828 %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul
829 %add = fadd <4 x float> %neg0, %a2
830 %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
831 ret <4 x float> %neg1
834 define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
835 ; FMA-LABEL: test_v4f64_fneg_fnmsub:
837 ; FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
840 ; FMA4-LABEL: test_v4f64_fneg_fnmsub:
842 ; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
845 ; AVX512-LABEL: test_v4f64_fneg_fnmsub:
847 ; AVX512-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
849 %mul = fmul <4 x double> %a0, %a1
850 %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul
851 %sub = fsub <4 x double> %neg0, %a2
852 %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
853 ret <4 x double> %neg1
856 ; (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
858 define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 {
859 ; FMA-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
861 ; FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
864 ; FMA4-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
866 ; FMA4-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
869 ; AVX512-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
871 ; AVX512-NEXT: vmulps {{.*}}(%rip){1to4}, %xmm0, %xmm0
873 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
874 %m1 = fmul <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0>
875 %a = fadd <4 x float> %m0, %m1
879 ; (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
881 define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) #0 {
882 ; FMA-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
884 ; FMA-NEXT: vfmadd132ps {{.*}}(%rip), %xmm1, %xmm0
887 ; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
889 ; FMA4-NEXT: vfmaddps %xmm1, {{.*}}(%rip), %xmm0, %xmm0
892 ; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
894 ; AVX512-NEXT: vfmadd231ps {{.*}}(%rip), %xmm0, %xmm1
895 ; AVX512-NEXT: vmovaps %zmm1, %zmm0
897 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
898 %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0>
899 %a = fadd <4 x float> %m1, %y
903 attributes #0 = { "unsafe-fp-math"="true" }