1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=core-avx2 -mattr=+fma,+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA
3 ; RUN: llc < %s -mtriple=x86_64-pc-windows -march=x86-64 -mcpu=core-avx2 -mattr=+fma,+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-WIN
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA4
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA4
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA
9 define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
10 ; CHECK-LABEL: test_x86_fma_vfmadd_ss:
13 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
14 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
15 ; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rdx), %xmm1, %xmm0
17 ; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0
19 ; CHECK-FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
22 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
26 define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
27 ; CHECK-LABEL: test_x86_fma_vfmadd_bac_ss:
30 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
31 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
32 ; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rcx), %xmm1, %xmm0
34 ; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1
35 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
37 ; CHECK-FMA4-NEXT: vfmaddss %xmm2, %xmm0, %xmm1, %xmm0
39 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
42 declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
44 define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
45 ; CHECK-LABEL: test_x86_fma_vfmadd_sd:
48 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
49 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
50 ; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rdx), %xmm1, %xmm0
52 ; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0
54 ; CHECK-FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
57 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
61 define <2 x double> @test_x86_fma_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
62 ; CHECK-LABEL: test_x86_fma_vfmadd_bac_sd:
65 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
66 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
67 ; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rcx), %xmm1, %xmm0
69 ; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1
70 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0
72 ; CHECK-FMA4-NEXT: vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0
75 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
78 declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
80 define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
81 ; CHECK-LABEL: test_x86_fma_vfmadd_ps:
84 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
85 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
86 ; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %xmm1, %xmm0
88 ; CHECK-FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0
90 ; CHECK-FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
93 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
96 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)
98 define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
99 ; CHECK-LABEL: test_x86_fma_vfmadd_pd:
100 ; CHECK-NEXT: # BB#0:
102 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
103 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
104 ; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %xmm1, %xmm0
106 ; CHECK-FMA-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0
108 ; CHECK-FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
111 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
112 ret <2 x double> %res
114 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>)
116 define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
117 ; CHECK-LABEL: test_x86_fma_vfmadd_ps_256:
118 ; CHECK-NEXT: # BB#0:
120 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
121 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
122 ; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %ymm1, %ymm0
124 ; CHECK-FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0
126 ; CHECK-FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
129 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
132 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
134 define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
135 ; CHECK-LABEL: test_x86_fma_vfmadd_pd_256:
136 ; CHECK-NEXT: # BB#0:
138 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
139 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
140 ; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %ymm1, %ymm0
142 ; CHECK-FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
144 ; CHECK-FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
147 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
148 ret <4 x double> %res
150 declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
153 define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
154 ; CHECK-LABEL: test_x86_fma_vfmsub_ss:
155 ; CHECK-NEXT: # BB#0:
157 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
158 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
159 ; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rdx), %xmm1, %xmm0
161 ; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0
163 ; CHECK-FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
166 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
170 define <4 x float> @test_x86_fma_vfmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
171 ; CHECK-LABEL: test_x86_fma_vfmsub_bac_ss:
172 ; CHECK-NEXT: # BB#0:
174 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
175 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
176 ; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rcx), %xmm1, %xmm0
178 ; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1
179 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
181 ; CHECK-FMA4-NEXT: vfmsubss %xmm2, %xmm0, %xmm1, %xmm0
184 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
187 declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>)
189 define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
190 ; CHECK-LABEL: test_x86_fma_vfmsub_sd:
191 ; CHECK-NEXT: # BB#0:
193 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
194 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
195 ; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rdx), %xmm1, %xmm0
197 ; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0
199 ; CHECK-FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
202 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
203 ret <2 x double> %res
206 define <2 x double> @test_x86_fma_vfmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
207 ; CHECK-LABEL: test_x86_fma_vfmsub_bac_sd:
208 ; CHECK-NEXT: # BB#0:
210 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
211 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
212 ; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rcx), %xmm1, %xmm0
214 ; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1
215 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0
217 ; CHECK-FMA4-NEXT: vfmsubsd %xmm2, %xmm0, %xmm1, %xmm0
220 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
221 ret <2 x double> %res
223 declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>)
225 define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
226 ; CHECK-LABEL: test_x86_fma_vfmsub_ps:
227 ; CHECK-NEXT: # BB#0:
229 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
230 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
231 ; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %xmm1, %xmm0
233 ; CHECK-FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0
235 ; CHECK-FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
238 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
241 declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>)
243 define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
244 ; CHECK-LABEL: test_x86_fma_vfmsub_pd:
245 ; CHECK-NEXT: # BB#0:
247 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
248 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
249 ; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %xmm1, %xmm0
251 ; CHECK-FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0
253 ; CHECK-FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
256 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
257 ret <2 x double> %res
259 declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>)
261 define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
262 ; CHECK-LABEL: test_x86_fma_vfmsub_ps_256:
263 ; CHECK-NEXT: # BB#0:
265 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
266 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
267 ; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %ymm1, %ymm0
269 ; CHECK-FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
271 ; CHECK-FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
274 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
277 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
279 define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
280 ; CHECK-LABEL: test_x86_fma_vfmsub_pd_256:
281 ; CHECK-NEXT: # BB#0:
283 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
284 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
285 ; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %ymm1, %ymm0
287 ; CHECK-FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0
289 ; CHECK-FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
292 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
293 ret <4 x double> %res
295 declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
298 define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
299 ; CHECK-LABEL: test_x86_fma_vfnmadd_ss:
300 ; CHECK-NEXT: # BB#0:
302 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
303 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
304 ; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rdx), %xmm1, %xmm0
306 ; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0
308 ; CHECK-FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
311 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
315 define <4 x float> @test_x86_fma_vfnmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
316 ; CHECK-LABEL: test_x86_fma_vfnmadd_bac_ss:
317 ; CHECK-NEXT: # BB#0:
319 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
320 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
321 ; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rcx), %xmm1, %xmm0
323 ; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1
324 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
326 ; CHECK-FMA4-NEXT: vfnmaddss %xmm2, %xmm0, %xmm1, %xmm0
329 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
332 declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>)
334 define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
335 ; CHECK-LABEL: test_x86_fma_vfnmadd_sd:
336 ; CHECK-NEXT: # BB#0:
338 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
339 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
340 ; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rdx), %xmm1, %xmm0
342 ; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0
344 ; CHECK-FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
347 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
348 ret <2 x double> %res
351 define <2 x double> @test_x86_fma_vfnmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
352 ; CHECK-LABEL: test_x86_fma_vfnmadd_bac_sd:
353 ; CHECK-NEXT: # BB#0:
355 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
356 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
357 ; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rcx), %xmm1, %xmm0
359 ; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1
360 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0
362 ; CHECK-FMA4-NEXT: vfnmaddsd %xmm2, %xmm0, %xmm1, %xmm0
365 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
366 ret <2 x double> %res
368 declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>)
370 define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
371 ; CHECK-LABEL: test_x86_fma_vfnmadd_ps:
372 ; CHECK-NEXT: # BB#0:
374 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
375 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
376 ; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %xmm1, %xmm0
378 ; CHECK-FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
380 ; CHECK-FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
383 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
386 declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>)
388 define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
389 ; CHECK-LABEL: test_x86_fma_vfnmadd_pd:
390 ; CHECK-NEXT: # BB#0:
392 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
393 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
394 ; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %xmm1, %xmm0
396 ; CHECK-FMA-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0
398 ; CHECK-FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
401 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
402 ret <2 x double> %res
404 declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>)
406 define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
407 ; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256:
408 ; CHECK-NEXT: # BB#0:
410 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
411 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
412 ; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %ymm1, %ymm0
414 ; CHECK-FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
416 ; CHECK-FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
419 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
422 declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
424 define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
425 ; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256:
426 ; CHECK-NEXT: # BB#0:
428 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
429 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
430 ; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %ymm1, %ymm0
432 ; CHECK-FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0
434 ; CHECK-FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
437 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
438 ret <4 x double> %res
440 declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
443 define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
444 ; CHECK-LABEL: test_x86_fma_vfnmsub_ss:
445 ; CHECK-NEXT: # BB#0:
447 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
448 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
449 ; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rdx), %xmm1, %xmm0
451 ; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0
453 ; CHECK-FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
456 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
460 define <4 x float> @test_x86_fma_vfnmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
461 ; CHECK-LABEL: test_x86_fma_vfnmsub_bac_ss:
462 ; CHECK-NEXT: # BB#0:
464 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
465 ; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
466 ; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rcx), %xmm1, %xmm0
468 ; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1
469 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
471 ; CHECK-FMA4-NEXT: vfnmsubss %xmm2, %xmm0, %xmm1, %xmm0
474 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
477 declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>)
479 define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
480 ; CHECK-LABEL: test_x86_fma_vfnmsub_sd:
481 ; CHECK-NEXT: # BB#0:
483 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
484 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
485 ; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rdx), %xmm1, %xmm0
487 ; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0
489 ; CHECK-FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
492 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
493 ret <2 x double> %res
496 define <2 x double> @test_x86_fma_vfnmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
497 ; CHECK-LABEL: test_x86_fma_vfnmsub_bac_sd:
498 ; CHECK-NEXT: # BB#0:
500 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
501 ; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
502 ; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rcx), %xmm1, %xmm0
504 ; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1
505 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0
507 ; CHECK-FMA4-NEXT: vfnmsubsd %xmm2, %xmm0, %xmm1, %xmm0
510 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
511 ret <2 x double> %res
513 declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>)
515 define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
516 ; CHECK-LABEL: test_x86_fma_vfnmsub_ps:
517 ; CHECK-NEXT: # BB#0:
519 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
520 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
521 ; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %xmm1, %xmm0
523 ; CHECK-FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
525 ; CHECK-FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
528 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
531 declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>)
533 define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
534 ; CHECK-LABEL: test_x86_fma_vfnmsub_pd:
535 ; CHECK-NEXT: # BB#0:
537 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
538 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
539 ; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %xmm1, %xmm0
541 ; CHECK-FMA-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0
543 ; CHECK-FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
546 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
547 ret <2 x double> %res
549 declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>)
551 define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
552 ; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256:
553 ; CHECK-NEXT: # BB#0:
555 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
556 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
557 ; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %ymm1, %ymm0
559 ; CHECK-FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0
561 ; CHECK-FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
564 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
567 declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
569 define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
570 ; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256:
571 ; CHECK-NEXT: # BB#0:
573 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
574 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
575 ; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %ymm1, %ymm0
577 ; CHECK-FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0
579 ; CHECK-FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
582 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
583 ret <4 x double> %res
585 declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
588 define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
589 ; CHECK-LABEL: test_x86_fma_vfmaddsub_ps:
590 ; CHECK-NEXT: # BB#0:
592 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
593 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
594 ; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %xmm1, %xmm0
596 ; CHECK-FMA-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0
598 ; CHECK-FMA4-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
601 %res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
604 declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>)
606 define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
607 ; CHECK-LABEL: test_x86_fma_vfmaddsub_pd:
608 ; CHECK-NEXT: # BB#0:
610 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
611 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
612 ; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %xmm1, %xmm0
614 ; CHECK-FMA-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0
616 ; CHECK-FMA4-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
619 %res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
620 ret <2 x double> %res
622 declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>)
624 define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
625 ; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256:
626 ; CHECK-NEXT: # BB#0:
628 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
629 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
630 ; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %ymm1, %ymm0
632 ; CHECK-FMA-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0
634 ; CHECK-FMA4-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
637 %res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
640 declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
642 define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
643 ; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256:
644 ; CHECK-NEXT: # BB#0:
646 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
647 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
648 ; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %ymm1, %ymm0
650 ; CHECK-FMA-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0
652 ; CHECK-FMA4-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
655 %res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
656 ret <4 x double> %res
658 declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
661 define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
662 ; CHECK-LABEL: test_x86_fma_vfmsubadd_ps:
663 ; CHECK-NEXT: # BB#0:
665 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
666 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
667 ; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %xmm1, %xmm0
669 ; CHECK-FMA-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0
671 ; CHECK-FMA4-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
674 %res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
677 declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>)
679 define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
680 ; CHECK-LABEL: test_x86_fma_vfmsubadd_pd:
681 ; CHECK-NEXT: # BB#0:
683 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
684 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
685 ; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %xmm1, %xmm0
687 ; CHECK-FMA-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0
689 ; CHECK-FMA4-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
692 %res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
693 ret <2 x double> %res
695 declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>)
697 define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
698 ; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256:
699 ; CHECK-NEXT: # BB#0:
701 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
702 ; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
703 ; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %ymm1, %ymm0
705 ; CHECK-FMA-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0
707 ; CHECK-FMA4-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
710 %res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
713 declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
715 define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
716 ; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256:
717 ; CHECK-NEXT: # BB#0:
719 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
720 ; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
721 ; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %ymm1, %ymm0
723 ; CHECK-FMA-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0
725 ; CHECK-FMA4-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
728 %res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
729 ret <4 x double> %res
731 declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
733 attributes #0 = { nounwind }