1 ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s
2 ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+fma,+fma4 | FileCheck %s
3 ; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s
5 attributes #0 = { nounwind }
7 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
8 define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
9 ; CHECK-LABEL: test_x86_fmadd_baa_ps:
11 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
12 ; CHECK-NEXT: vfmadd132ps (%rdx), %xmm0, %xmm0
14 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
18 define <4 x float> @test_x86_fmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
19 ; CHECK-LABEL: test_x86_fmadd_aba_ps:
21 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
22 ; CHECK-NEXT: vfmadd231ps (%rdx), %xmm0, %xmm0
24 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
28 define <4 x float> @test_x86_fmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
29 ; CHECK-LABEL: test_x86_fmadd_bba_ps:
31 ; CHECK-NEXT: vmovaps (%rdx), %xmm0
32 ; CHECK-NEXT: vfmadd213ps (%rcx), %xmm0, %xmm0
34 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
38 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
39 define <8 x float> @test_x86_fmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
40 ; CHECK-LABEL: test_x86_fmadd_baa_ps_y:
42 ; CHECK-NEXT: vmovaps (%rcx), %ymm0
43 ; CHECK-NEXT: vfmadd132ps (%rdx), %ymm0, %ymm0
45 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
49 define <8 x float> @test_x86_fmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
50 ; CHECK-LABEL: test_x86_fmadd_aba_ps_y:
52 ; CHECK-NEXT: vmovaps (%rcx), %ymm0
53 ; CHECK-NEXT: vfmadd231ps (%rdx), %ymm0, %ymm0
55 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
59 define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
60 ; CHECK-LABEL: test_x86_fmadd_bba_ps_y:
62 ; CHECK-NEXT: vmovaps (%rdx), %ymm0
63 ; CHECK-NEXT: vfmadd213ps (%rcx), %ymm0, %ymm0
65 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
69 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
70 define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
71 ; CHECK-LABEL: test_x86_fmadd_baa_pd:
73 ; CHECK-NEXT: vmovapd (%rcx), %xmm0
74 ; CHECK-NEXT: vfmadd132pd (%rdx), %xmm0, %xmm0
76 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
80 define <2 x double> @test_x86_fmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
81 ; CHECK-LABEL: test_x86_fmadd_aba_pd:
83 ; CHECK-NEXT: vmovapd (%rcx), %xmm0
84 ; CHECK-NEXT: vfmadd231pd (%rdx), %xmm0, %xmm0
86 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
90 define <2 x double> @test_x86_fmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
91 ; CHECK-LABEL: test_x86_fmadd_bba_pd:
93 ; CHECK-NEXT: vmovapd (%rdx), %xmm0
94 ; CHECK-NEXT: vfmadd213pd (%rcx), %xmm0, %xmm0
96 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
100 declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
101 define <4 x double> @test_x86_fmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
102 ; CHECK-LABEL: test_x86_fmadd_baa_pd_y:
104 ; CHECK-NEXT: vmovapd (%rcx), %ymm0
105 ; CHECK-NEXT: vfmadd132pd (%rdx), %ymm0, %ymm0
107 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
108 ret <4 x double> %res
111 define <4 x double> @test_x86_fmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
112 ; CHECK-LABEL: test_x86_fmadd_aba_pd_y:
114 ; CHECK-NEXT: vmovapd (%rcx), %ymm0
115 ; CHECK-NEXT: vfmadd231pd (%rdx), %ymm0, %ymm0
117 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
118 ret <4 x double> %res
121 define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
122 ; CHECK-LABEL: test_x86_fmadd_bba_pd_y:
124 ; CHECK-NEXT: vmovapd (%rdx), %ymm0
125 ; CHECK-NEXT: vfmadd213pd (%rcx), %ymm0, %ymm0
127 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
128 ret <4 x double> %res
133 declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
134 define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
135 ; CHECK-LABEL: test_x86_fnmadd_baa_ps:
137 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
138 ; CHECK-NEXT: vfnmadd132ps (%rdx), %xmm0, %xmm0
140 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
144 define <4 x float> @test_x86_fnmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
145 ; CHECK-LABEL: test_x86_fnmadd_aba_ps:
147 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
148 ; CHECK-NEXT: vfnmadd231ps (%rdx), %xmm0, %xmm0
150 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
154 define <4 x float> @test_x86_fnmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
155 ; CHECK-LABEL: test_x86_fnmadd_bba_ps:
157 ; CHECK-NEXT: vmovaps (%rdx), %xmm0
158 ; CHECK-NEXT: vfnmadd213ps (%rcx), %xmm0, %xmm0
160 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
164 declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
165 define <8 x float> @test_x86_fnmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
166 ; CHECK-LABEL: test_x86_fnmadd_baa_ps_y:
168 ; CHECK-NEXT: vmovaps (%rcx), %ymm0
169 ; CHECK-NEXT: vfnmadd132ps (%rdx), %ymm0, %ymm0
171 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
175 define <8 x float> @test_x86_fnmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
176 ; CHECK-LABEL: test_x86_fnmadd_aba_ps_y:
178 ; CHECK-NEXT: vmovaps (%rcx), %ymm0
179 ; CHECK-NEXT: vfnmadd231ps (%rdx), %ymm0, %ymm0
181 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
185 define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
186 ; CHECK-LABEL: test_x86_fnmadd_bba_ps_y:
188 ; CHECK-NEXT: vmovaps (%rdx), %ymm0
189 ; CHECK-NEXT: vfnmadd213ps (%rcx), %ymm0, %ymm0
191 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
195 declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
196 define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
197 ; CHECK-LABEL: test_x86_fnmadd_baa_pd:
199 ; CHECK-NEXT: vmovapd (%rcx), %xmm0
200 ; CHECK-NEXT: vfnmadd132pd (%rdx), %xmm0, %xmm0
202 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
203 ret <2 x double> %res
206 define <2 x double> @test_x86_fnmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
207 ; CHECK-LABEL: test_x86_fnmadd_aba_pd:
209 ; CHECK-NEXT: vmovapd (%rcx), %xmm0
210 ; CHECK-NEXT: vfnmadd231pd (%rdx), %xmm0, %xmm0
212 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
213 ret <2 x double> %res
216 define <2 x double> @test_x86_fnmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
217 ; CHECK-LABEL: test_x86_fnmadd_bba_pd:
219 ; CHECK-NEXT: vmovapd (%rdx), %xmm0
220 ; CHECK-NEXT: vfnmadd213pd (%rcx), %xmm0, %xmm0
222 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
223 ret <2 x double> %res
226 declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
227 define <4 x double> @test_x86_fnmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
228 ; CHECK-LABEL: test_x86_fnmadd_baa_pd_y:
230 ; CHECK-NEXT: vmovapd (%rcx), %ymm0
231 ; CHECK-NEXT: vfnmadd132pd (%rdx), %ymm0, %ymm0
233 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
234 ret <4 x double> %res
237 define <4 x double> @test_x86_fnmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
238 ; CHECK-LABEL: test_x86_fnmadd_aba_pd_y:
240 ; CHECK-NEXT: vmovapd (%rcx), %ymm0
241 ; CHECK-NEXT: vfnmadd231pd (%rdx), %ymm0, %ymm0
243 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
244 ret <4 x double> %res
247 define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
248 ; CHECK-LABEL: test_x86_fnmadd_bba_pd_y:
250 ; CHECK-NEXT: vmovapd (%rdx), %ymm0
251 ; CHECK-NEXT: vfnmadd213pd (%rcx), %ymm0, %ymm0
253 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
254 ret <4 x double> %res
258 declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
259 define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
260 ; CHECK-LABEL: test_x86_fmsub_baa_ps:
262 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
263 ; CHECK-NEXT: vfmsub132ps (%rdx), %xmm0, %xmm0
265 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
269 define <4 x float> @test_x86_fmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
270 ; CHECK-LABEL: test_x86_fmsub_aba_ps:
272 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
273 ; CHECK-NEXT: vfmsub231ps (%rdx), %xmm0, %xmm0
275 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
279 define <4 x float> @test_x86_fmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
280 ; CHECK-LABEL: test_x86_fmsub_bba_ps:
282 ; CHECK-NEXT: vmovaps (%rdx), %xmm0
283 ; CHECK-NEXT: vfmsub213ps (%rcx), %xmm0, %xmm0
285 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
289 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
290 define <8 x float> @test_x86_fmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
291 ; CHECK-LABEL: test_x86_fmsub_baa_ps_y:
293 ; CHECK-NEXT: vmovaps (%rcx), %ymm0
294 ; CHECK-NEXT: vfmsub132ps (%rdx), %ymm0, %ymm0
296 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
300 define <8 x float> @test_x86_fmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
301 ; CHECK-LABEL: test_x86_fmsub_aba_ps_y:
303 ; CHECK-NEXT: vmovaps (%rcx), %ymm0
304 ; CHECK-NEXT: vfmsub231ps (%rdx), %ymm0, %ymm0
306 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
310 define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
311 ; CHECK-LABEL: test_x86_fmsub_bba_ps_y:
313 ; CHECK-NEXT: vmovaps (%rdx), %ymm0
314 ; CHECK-NEXT: vfmsub213ps (%rcx), %ymm0, %ymm0
316 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
320 declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
321 define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
322 ; CHECK-LABEL: test_x86_fmsub_baa_pd:
324 ; CHECK-NEXT: vmovapd (%rcx), %xmm0
325 ; CHECK-NEXT: vfmsub132pd (%rdx), %xmm0, %xmm0
327 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
328 ret <2 x double> %res
331 define <2 x double> @test_x86_fmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
332 ; CHECK-LABEL: test_x86_fmsub_aba_pd:
334 ; CHECK-NEXT: vmovapd (%rcx), %xmm0
335 ; CHECK-NEXT: vfmsub231pd (%rdx), %xmm0, %xmm0
337 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
338 ret <2 x double> %res
341 define <2 x double> @test_x86_fmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
342 ; CHECK-LABEL: test_x86_fmsub_bba_pd:
344 ; CHECK-NEXT: vmovapd (%rdx), %xmm0
345 ; CHECK-NEXT: vfmsub213pd (%rcx), %xmm0, %xmm0
347 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
348 ret <2 x double> %res
351 declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
352 define <4 x double> @test_x86_fmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
353 ; CHECK-LABEL: test_x86_fmsub_baa_pd_y:
355 ; CHECK-NEXT: vmovapd (%rcx), %ymm0
356 ; CHECK-NEXT: vfmsub132pd (%rdx), %ymm0, %ymm0
358 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
359 ret <4 x double> %res
362 define <4 x double> @test_x86_fmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
363 ; CHECK-LABEL: test_x86_fmsub_aba_pd_y:
365 ; CHECK-NEXT: vmovapd (%rcx), %ymm0
366 ; CHECK-NEXT: vfmsub231pd (%rdx), %ymm0, %ymm0
368 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
369 ret <4 x double> %res
372 define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
373 ; CHECK-LABEL: test_x86_fmsub_bba_pd_y:
375 ; CHECK-NEXT: vmovapd (%rdx), %ymm0
376 ; CHECK-NEXT: vfmsub213pd (%rcx), %ymm0, %ymm0
378 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
379 ret <4 x double> %res
383 declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
384 define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
385 ; CHECK-LABEL: test_x86_fnmsub_baa_ps:
387 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
388 ; CHECK-NEXT: vfnmsub132ps (%rdx), %xmm0, %xmm0
390 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
394 define <4 x float> @test_x86_fnmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
395 ; CHECK-LABEL: test_x86_fnmsub_aba_ps:
397 ; CHECK-NEXT: vmovaps (%rcx), %xmm0
398 ; CHECK-NEXT: vfnmsub231ps (%rdx), %xmm0, %xmm0
400 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
404 define <4 x float> @test_x86_fnmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
405 ; CHECK-LABEL: test_x86_fnmsub_bba_ps:
407 ; CHECK-NEXT: vmovaps (%rdx), %xmm0
408 ; CHECK-NEXT: vfnmsub213ps (%rcx), %xmm0, %xmm0
410 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
414 declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
415 define <8 x float> @test_x86_fnmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
416 ; CHECK-LABEL: test_x86_fnmsub_baa_ps_y:
418 ; CHECK-NEXT: vmovaps (%rcx), %ymm0
419 ; CHECK-NEXT: vfnmsub132ps (%rdx), %ymm0, %ymm0
421 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
425 define <8 x float> @test_x86_fnmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
426 ; CHECK-LABEL: test_x86_fnmsub_aba_ps_y:
428 ; CHECK-NEXT: vmovaps (%rcx), %ymm0
429 ; CHECK-NEXT: vfnmsub231ps (%rdx), %ymm0, %ymm0
431 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
435 define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
436 ; CHECK-LABEL: test_x86_fnmsub_bba_ps_y:
438 ; CHECK-NEXT: vmovaps (%rdx), %ymm0
439 ; CHECK-NEXT: vfnmsub213ps (%rcx), %ymm0, %ymm0
441 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
445 declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
446 define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
447 ; CHECK-LABEL: test_x86_fnmsub_baa_pd:
449 ; CHECK-NEXT: vmovapd (%rcx), %xmm0
450 ; CHECK-NEXT: vfnmsub132pd (%rdx), %xmm0, %xmm0
452 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
453 ret <2 x double> %res
456 define <2 x double> @test_x86_fnmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
457 ; CHECK-LABEL: test_x86_fnmsub_aba_pd:
459 ; CHECK-NEXT: vmovapd (%rcx), %xmm0
460 ; CHECK-NEXT: vfnmsub231pd (%rdx), %xmm0, %xmm0
462 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
463 ret <2 x double> %res
466 define <2 x double> @test_x86_fnmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
467 ; CHECK-LABEL: test_x86_fnmsub_bba_pd:
469 ; CHECK-NEXT: vmovapd (%rdx), %xmm0
470 ; CHECK-NEXT: vfnmsub213pd (%rcx), %xmm0, %xmm0
472 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
473 ret <2 x double> %res
476 declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
477 define <4 x double> @test_x86_fnmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
478 ; CHECK-LABEL: test_x86_fnmsub_baa_pd_y:
480 ; CHECK-NEXT: vmovapd (%rcx), %ymm0
481 ; CHECK-NEXT: vfnmsub132pd (%rdx), %ymm0, %ymm0
483 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
484 ret <4 x double> %res
487 define <4 x double> @test_x86_fnmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
488 ; CHECK-LABEL: test_x86_fnmsub_aba_pd_y:
490 ; CHECK-NEXT: vmovapd (%rcx), %ymm0
491 ; CHECK-NEXT: vfnmsub231pd (%rdx), %ymm0, %ymm0
493 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
494 ret <4 x double> %res
497 define <4 x double> @test_x86_fnmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
498 ; CHECK-LABEL: test_x86_fnmsub_bba_pd_y:
500 ; CHECK-NEXT: vmovapd (%rdx), %ymm0
501 ; CHECK-NEXT: vfnmsub213pd (%rcx), %ymm0, %ymm0
503 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
504 ret <4 x double> %res