1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=SKX
4 ; CHECK-LABEL: test_x86_fmadd_ps_z
5 ; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0
7 define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
8 %x = fmul <16 x float> %a0, %a1
9 %res = fadd <16 x float> %x, %a2
13 ; CHECK-LABEL: test_x86_fmsub_ps_z
14 ; CHECK: vfmsub213ps %zmm2, %zmm1, %zmm0
16 define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
17 %x = fmul <16 x float> %a0, %a1
18 %res = fsub <16 x float> %x, %a2
22 ; CHECK-LABEL: test_x86_fnmadd_ps_z
23 ; CHECK: vfnmadd213ps %zmm2, %zmm1, %zmm0
25 define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
26 %x = fmul <16 x float> %a0, %a1
27 %res = fsub <16 x float> %a2, %x
31 ; CHECK-LABEL: test_x86_fnmsub_ps_z
32 ; CHECK: vfnmsub213ps %zmm2, %zmm1, %zmm0
34 define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
35 %x = fmul <16 x float> %a0, %a1
36 %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
37 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
38 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
39 float -0.000000e+00>, %x
40 %res = fsub <16 x float> %y, %a2
44 ; CHECK-LABEL: test_x86_fmadd_pd_z
45 ; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0
47 define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
48 %x = fmul <8 x double> %a0, %a1
49 %res = fadd <8 x double> %x, %a2
53 ; CHECK-LABEL: test_x86_fmsub_pd_z
54 ; CHECK: vfmsub213pd %zmm2, %zmm1, %zmm0
56 define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
57 %x = fmul <8 x double> %a0, %a1
58 %res = fsub <8 x double> %x, %a2
62 define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
63 ; CHECK-LABEL: test_x86_fmsub_213:
65 ; CHECK-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1
66 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
68 %x = fmul double %a0, %a1
69 %res = fsub double %x, %a2
73 define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
74 ; CHECK-LABEL: test_x86_fmsub_213_m:
76 ; CHECK-NEXT: vfmsub213sd (%rdi), %xmm0, %xmm1
77 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
79 %a2 = load double , double *%a2_ptr
80 %x = fmul double %a0, %a1
81 %res = fsub double %x, %a2
85 define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) {
86 ; CHECK-LABEL: test_x86_fmsub_231_m:
88 ; CHECK-NEXT: vfmsub231sd (%rdi), %xmm0, %xmm1
89 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
91 %a2 = load double , double *%a2_ptr
92 %x = fmul double %a0, %a2
93 %res = fsub double %x, %a1
97 define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
98 ; CHECK-LABEL: test231_br:
100 ; CHECK-NEXT: vfmadd231ps {{.*}}(%rip){1to16}, %zmm0, %zmm1
101 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
103 %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
104 %b2 = fadd <16 x float> %b1, %a2
108 define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
109 ; CHECK-LABEL: test213_br:
111 ; CHECK-NEXT: vfmadd213ps {{.*}}(%rip){1to16}, %zmm1, %zmm0
113 %b1 = fmul <16 x float> %a1, %a2
114 %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
119 define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
120 ; CHECK-LABEL: test_x86_fmadd132_ps:
122 ; CHECK-NEXT: vpmovsxbd %xmm2, %zmm2
123 ; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
124 ; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
125 ; CHECK-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
128 ; SKX-LABEL: test_x86_fmadd132_ps:
130 ; SKX-NEXT: vpmovb2m %xmm2, %k1
131 ; SKX-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
133 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
134 %x = fmul <16 x float> %a0, %a2
135 %y = fadd <16 x float> %x, %a1
136 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
137 ret <16 x float> %res
141 define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
142 ; CHECK-LABEL: test_x86_fmadd231_ps:
144 ; CHECK-NEXT: vpmovsxbd %xmm2, %zmm2
145 ; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
146 ; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
147 ; CHECK-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
148 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
151 ; SKX-LABEL: test_x86_fmadd231_ps:
153 ; SKX-NEXT: vpmovb2m %xmm2, %k1
154 ; SKX-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
155 ; SKX-NEXT: vmovaps %zmm1, %zmm0
157 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
158 %x = fmul <16 x float> %a0, %a2
159 %y = fadd <16 x float> %x, %a1
160 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
161 ret <16 x float> %res
165 define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
166 ; CHECK-LABEL: test_x86_fmadd213_ps:
168 ; CHECK-NEXT: vpmovsxbd %xmm2, %zmm2
169 ; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
170 ; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
171 ; CHECK-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
172 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
175 ; SKX-LABEL: test_x86_fmadd213_ps:
177 ; SKX-NEXT: vpmovb2m %xmm2, %k1
178 ; SKX-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
179 ; SKX-NEXT: vmovaps %zmm1, %zmm0
181 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
182 %x = fmul <16 x float> %a1, %a0
183 %y = fadd <16 x float> %x, %a2
184 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
185 ret <16 x float> %res