1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=CHECK_FMA4
5 define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
6 ; CHECK-LABEL: test_x86_fmadd_ps:
8 ; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0
11 ; CHECK_FMA4-LABEL: test_x86_fmadd_ps:
13 ; CHECK_FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
14 ; CHECK_FMA4-NEXT: retq
15 %x = fmul <4 x float> %a0, %a1
16 %res = fadd <4 x float> %x, %a2
20 define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
21 ; CHECK-LABEL: test_x86_fmsub_ps:
23 ; CHECK-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0
26 ; CHECK_FMA4-LABEL: test_x86_fmsub_ps:
28 ; CHECK_FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
29 ; CHECK_FMA4-NEXT: retq
30 %x = fmul <4 x float> %a0, %a1
31 %res = fsub <4 x float> %x, %a2
35 define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
36 ; CHECK-LABEL: test_x86_fnmadd_ps:
38 ; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
41 ; CHECK_FMA4-LABEL: test_x86_fnmadd_ps:
43 ; CHECK_FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
44 ; CHECK_FMA4-NEXT: retq
45 %x = fmul <4 x float> %a0, %a1
46 %res = fsub <4 x float> %a2, %x
50 define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
51 ; CHECK-LABEL: test_x86_fnmsub_ps:
53 ; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
56 ; CHECK_FMA4-LABEL: test_x86_fnmsub_ps:
58 ; CHECK_FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
59 ; CHECK_FMA4-NEXT: retq
60 %x = fmul <4 x float> %a0, %a1
61 %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
62 %res = fsub <4 x float> %y, %a2
66 define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
67 ; CHECK-LABEL: test_x86_fmadd_ps_y:
69 ; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0
72 ; CHECK_FMA4-LABEL: test_x86_fmadd_ps_y:
74 ; CHECK_FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
75 ; CHECK_FMA4-NEXT: retq
76 %x = fmul <8 x float> %a0, %a1
77 %res = fadd <8 x float> %x, %a2
81 define <8 x float> @test_x86_fmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
82 ; CHECK-LABEL: test_x86_fmsub_ps_y:
84 ; CHECK-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
87 ; CHECK_FMA4-LABEL: test_x86_fmsub_ps_y:
89 ; CHECK_FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
90 ; CHECK_FMA4-NEXT: retq
91 %x = fmul <8 x float> %a0, %a1
92 %res = fsub <8 x float> %x, %a2
96 define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
97 ; CHECK-LABEL: test_x86_fnmadd_ps_y:
99 ; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
102 ; CHECK_FMA4-LABEL: test_x86_fnmadd_ps_y:
103 ; CHECK_FMA4: # BB#0:
104 ; CHECK_FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
105 ; CHECK_FMA4-NEXT: retq
106 %x = fmul <8 x float> %a0, %a1
107 %res = fsub <8 x float> %a2, %x
111 define <8 x float> @test_x86_fnmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
112 ; CHECK-LABEL: test_x86_fnmsub_ps_y:
114 ; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0
117 ; CHECK_FMA4-LABEL: test_x86_fnmsub_ps_y:
118 ; CHECK_FMA4: # BB#0:
119 ; CHECK_FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
120 ; CHECK_FMA4-NEXT: retq
121 %x = fmul <8 x float> %a0, %a1
122 %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
123 %res = fsub <8 x float> %y, %a2
127 define <4 x double> @test_x86_fmadd_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
128 ; CHECK-LABEL: test_x86_fmadd_pd_y:
130 ; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
133 ; CHECK_FMA4-LABEL: test_x86_fmadd_pd_y:
134 ; CHECK_FMA4: # BB#0:
135 ; CHECK_FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
136 ; CHECK_FMA4-NEXT: retq
137 %x = fmul <4 x double> %a0, %a1
138 %res = fadd <4 x double> %x, %a2
139 ret <4 x double> %res
142 define <4 x double> @test_x86_fmsub_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
143 ; CHECK-LABEL: test_x86_fmsub_pd_y:
145 ; CHECK-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0
148 ; CHECK_FMA4-LABEL: test_x86_fmsub_pd_y:
149 ; CHECK_FMA4: # BB#0:
150 ; CHECK_FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
151 ; CHECK_FMA4-NEXT: retq
152 %x = fmul <4 x double> %a0, %a1
153 %res = fsub <4 x double> %x, %a2
154 ret <4 x double> %res
157 define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
158 ; CHECK-LABEL: test_x86_fmsub_pd:
160 ; CHECK-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0
163 ; CHECK_FMA4-LABEL: test_x86_fmsub_pd:
164 ; CHECK_FMA4: # BB#0:
165 ; CHECK_FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
166 ; CHECK_FMA4-NEXT: retq
167 %x = fmul <2 x double> %a0, %a1
168 %res = fsub <2 x double> %x, %a2
169 ret <2 x double> %res
172 define float @test_x86_fnmadd_ss(float %a0, float %a1, float %a2) {
173 ; CHECK-LABEL: test_x86_fnmadd_ss:
175 ; CHECK-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0
178 ; CHECK_FMA4-LABEL: test_x86_fnmadd_ss:
179 ; CHECK_FMA4: # BB#0:
180 ; CHECK_FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
181 ; CHECK_FMA4-NEXT: retq
182 %x = fmul float %a0, %a1
183 %res = fsub float %a2, %x
187 define double @test_x86_fnmadd_sd(double %a0, double %a1, double %a2) {
188 ; CHECK-LABEL: test_x86_fnmadd_sd:
190 ; CHECK-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0
193 ; CHECK_FMA4-LABEL: test_x86_fnmadd_sd:
194 ; CHECK_FMA4: # BB#0:
195 ; CHECK_FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
196 ; CHECK_FMA4-NEXT: retq
197 %x = fmul double %a0, %a1
198 %res = fsub double %a2, %x
202 define double @test_x86_fmsub_sd(double %a0, double %a1, double %a2) {
203 ; CHECK-LABEL: test_x86_fmsub_sd:
205 ; CHECK-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0
208 ; CHECK_FMA4-LABEL: test_x86_fmsub_sd:
209 ; CHECK_FMA4: # BB#0:
210 ; CHECK_FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
211 ; CHECK_FMA4-NEXT: retq
212 %x = fmul double %a0, %a1
213 %res = fsub double %x, %a2
217 define float @test_x86_fnmsub_ss(float %a0, float %a1, float %a2) {
218 ; CHECK-LABEL: test_x86_fnmsub_ss:
220 ; CHECK-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0
223 ; CHECK_FMA4-LABEL: test_x86_fnmsub_ss:
224 ; CHECK_FMA4: # BB#0:
225 ; CHECK_FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
226 ; CHECK_FMA4-NEXT: retq
227 %x = fsub float -0.000000e+00, %a0
228 %y = fmul float %x, %a1
229 %res = fsub float %y, %a2
233 define <4 x float> @test_x86_fmadd_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
234 ; CHECK-LABEL: test_x86_fmadd_ps_load:
236 ; CHECK-NEXT: vmovaps (%rdi), %xmm2
237 ; CHECK-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0
240 ; CHECK_FMA4-LABEL: test_x86_fmadd_ps_load:
241 ; CHECK_FMA4: # BB#0:
242 ; CHECK_FMA4-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
243 ; CHECK_FMA4-NEXT: retq
244 %x = load <4 x float>, <4 x float>* %a0
245 %y = fmul <4 x float> %x, %a1
246 %res = fadd <4 x float> %y, %a2
250 define <4 x float> @test_x86_fmsub_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
251 ; CHECK-LABEL: test_x86_fmsub_ps_load:
253 ; CHECK-NEXT: vmovaps (%rdi), %xmm2
254 ; CHECK-NEXT: vfmsub213ps %xmm1, %xmm2, %xmm0
257 ; CHECK_FMA4-LABEL: test_x86_fmsub_ps_load:
258 ; CHECK_FMA4: # BB#0:
259 ; CHECK_FMA4-NEXT: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0
260 ; CHECK_FMA4-NEXT: retq
261 %x = load <4 x float>, <4 x float>* %a0
262 %y = fmul <4 x float> %x, %a1
263 %res = fsub <4 x float> %y, %a2