1 ; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx | FileCheck %s
2 ; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 | FileCheck -check-prefix=CHECK-FISL %s
4 ; Also run with -schedule-ppc-vsx-fma-mutation-early as a stress test for the
5 ; live-interval-updating logic.
6 ; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx -schedule-ppc-vsx-fma-mutation-early
7 target datalayout = "E-m:e-i64:64-n32:64"
8 target triple = "powerpc64-unknown-linux-gnu"
10 define void @test1(double %a, double %b, double %c, double %e, double* nocapture %d) #0 {
12 %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
13 store double %0, double* %d, align 8
14 %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
15 %arrayidx1 = getelementptr inbounds double, double* %d, i64 1
16 store double %1, double* %arrayidx1, align 8
20 ; CHECK-DAG: li [[C1:[0-9]+]], 8
21 ; CHECK-DAG: xsmaddmdp 3, 2, 1
22 ; CHECK-DAG: xsmaddadp 1, 2, 4
23 ; CHECK-DAG: stxsdx 3, 0, 7
24 ; CHECK-DAG: stxsdx 1, 7, [[C1]]
27 ; CHECK-FISL-LABEL: @test1
28 ; CHECK-FISL-DAG: fmr 0, 1
29 ; CHECK-FISL-DAG: xsmaddadp 0, 2, 3
30 ; CHECK-FISL-DAG: stxsdx 0, 0, 7
31 ; CHECK-FISL-DAG: xsmaddadp 1, 2, 4
32 ; CHECK-FISL-DAG: li [[C1:[0-9]+]], 8
33 ; CHECK-FISL-DAG: stxsdx 1, 7, [[C1]]
37 define void @test2(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 {
39 %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
40 store double %0, double* %d, align 8
41 %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
42 %arrayidx1 = getelementptr inbounds double, double* %d, i64 1
43 store double %1, double* %arrayidx1, align 8
44 %2 = tail call double @llvm.fma.f64(double %b, double %f, double %a)
45 %arrayidx2 = getelementptr inbounds double, double* %d, i64 2
46 store double %2, double* %arrayidx2, align 8
50 ; CHECK-DAG: li [[C1:[0-9]+]], 8
51 ; CHECK-DAG: li [[C2:[0-9]+]], 16
52 ; FIXME: We no longer get this because of copy ordering at the MI level.
53 ; CHECX-DAG: xsmaddmdp 3, 2, 1
54 ; CHECX-DAG: xsmaddmdp 4, 2, 1
55 ; CHECX-DAG: xsmaddadp 1, 2, 5
56 ; CHECX-DAG: stxsdx 3, 0, 8
57 ; CHECX-DAG: stxsdx 4, 8, [[C1]]
58 ; CHECX-DAG: stxsdx 1, 8, [[C2]]
61 ; CHECK-FISL-LABEL: @test2
62 ; CHECK-FISL-DAG: fmr 0, 1
63 ; CHECK-FISL-DAG: xsmaddadp 0, 2, 3
64 ; CHECK-FISL-DAG: stxsdx 0, 0, 8
65 ; CHECK-FISL-DAG: fmr 0, 1
66 ; CHECK-FISL-DAG: xsmaddadp 0, 2, 4
67 ; CHECK-FISL-DAG: li [[C1:[0-9]+]], 8
68 ; CHECK-FISL-DAG: stxsdx 0, 8, [[C1]]
69 ; CHECK-FISL-DAG: xsmaddadp 1, 2, 5
70 ; CHECK-FISL-DAG: li [[C2:[0-9]+]], 16
71 ; CHECK-FISL-DAG: stxsdx 1, 8, [[C2]]
75 define void @test3(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 {
77 %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
78 store double %0, double* %d, align 8
79 %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
80 %2 = tail call double @llvm.fma.f64(double %b, double %c, double %1)
81 %arrayidx1 = getelementptr inbounds double, double* %d, i64 3
82 store double %2, double* %arrayidx1, align 8
83 %3 = tail call double @llvm.fma.f64(double %b, double %f, double %a)
84 %arrayidx2 = getelementptr inbounds double, double* %d, i64 2
85 store double %3, double* %arrayidx2, align 8
86 %arrayidx3 = getelementptr inbounds double, double* %d, i64 1
87 store double %1, double* %arrayidx3, align 8
91 ; CHECK-DAG: fmr [[F1:[0-9]+]], 1
92 ; CHECK-DAG: li [[C1:[0-9]+]], 24
93 ; CHECK-DAG: li [[C2:[0-9]+]], 16
94 ; CHECK-DAG: li [[C3:[0-9]+]], 8
95 ; CHECK-DAG: xsmaddmdp 4, 2, 1
96 ; CHECK-DAG: xsmaddadp 1, 2, 5
98 ; Note: We could convert this next FMA to M-type as well, but it would require
99 ; re-ordering the instructions.
100 ; CHECK-DAG: xsmaddadp [[F1]], 2, 3
102 ; CHECK-DAG: xsmaddmdp 3, 2, 4
103 ; CHECK-DAG: stxsdx [[F1]], 0, 8
104 ; CHECK-DAG: stxsdx 3, 8, [[C1]]
105 ; CHECK-DAG: stxsdx 1, 8, [[C2]]
106 ; CHECK-DAG: stxsdx 4, 8, [[C3]]
109 ; CHECK-FISL-LABEL: @test3
110 ; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
111 ; CHECK-FISL-DAG: xsmaddadp [[F1]], 2, 4
112 ; CHECK-FISL-DAG: fmr 4, [[F1]]
113 ; CHECK-FISL-DAG: xsmaddadp 4, 2, 3
114 ; CHECK-FISL-DAG: li [[C1:[0-9]+]], 24
115 ; CHECK-FISL-DAG: stxsdx 4, 8, [[C1]]
116 ; CHECK-FISL-DAG: xsmaddadp 1, 2, 5
117 ; CHECK-FISL-DAG: li [[C2:[0-9]+]], 16
118 ; CHECK-FISL-DAG: stxsdx 1, 8, [[C2]]
119 ; CHECK-FISL-DAG: li [[C3:[0-9]+]], 8
120 ; CHECK-FISL-DAG: stxsdx 0, 8, [[C3]]
124 define void @test4(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 {
126 %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
127 store double %0, double* %d, align 8
128 %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
129 %arrayidx1 = getelementptr inbounds double, double* %d, i64 1
130 store double %1, double* %arrayidx1, align 8
131 %2 = tail call double @llvm.fma.f64(double %b, double %c, double %1)
132 %arrayidx3 = getelementptr inbounds double, double* %d, i64 3
133 store double %2, double* %arrayidx3, align 8
134 %3 = tail call double @llvm.fma.f64(double %b, double %f, double %a)
135 %arrayidx4 = getelementptr inbounds double, double* %d, i64 2
136 store double %3, double* %arrayidx4, align 8
139 ; CHECK-LABEL: @test4
140 ; CHECK-DAG: fmr [[F1:[0-9]+]], 1
141 ; CHECK-DAG: li [[C1:[0-9]+]], 8
142 ; CHECK-DAG: li [[C2:[0-9]+]], 16
143 ; CHECK-DAG: xsmaddmdp 4, 2, 1
145 ; Note: We could convert this next FMA to M-type as well, but it would require
146 ; re-ordering the instructions.
147 ; CHECK-DAG: xsmaddadp 1, 2, 5
149 ; CHECK-DAG: xsmaddadp [[F1]], 2, 3
150 ; CHECK-DAG: stxsdx [[F1]], 0, 8
151 ; CHECK-DAG: stxsdx 4, 8, [[C1]]
152 ; CHECK-DAG: li [[C3:[0-9]+]], 24
153 ; CHECK-DAG: xsmaddadp 4, 2, 3
154 ; CHECK-DAG: stxsdx 4, 8, [[C3]]
155 ; CHECK-DAG: stxsdx 1, 8, [[C2]]
158 ; CHECK-FISL-LABEL: @test4
159 ; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
160 ; CHECK-FISL-DAG: xsmaddadp [[F1]], 2, 3
161 ; CHECK-FISL-DAG: stxsdx 0, 0, 8
162 ; CHECK-FISL-DAG: fmr [[F1]], 1
163 ; CHECK-FISL-DAG: xsmaddadp [[F1]], 2, 4
164 ; CHECK-FISL-DAG: li [[C3:[0-9]+]], 8
165 ; CHECK-FISL-DAG: stxsdx 0, 8, [[C3]]
166 ; CHECK-FISL-DAG: xsmaddadp 0, 2, 3
167 ; CHECK-FISL-DAG: li [[C1:[0-9]+]], 24
168 ; CHECK-FISL-DAG: stxsdx 0, 8, [[C1]]
169 ; CHECK-FISL-DAG: xsmaddadp 1, 2, 5
170 ; CHECK-FISL-DAG: li [[C2:[0-9]+]], 16
171 ; CHECK-FISL-DAG: stxsdx 1, 8, [[C2]]
175 declare double @llvm.fma.f64(double, double, double) #0
177 define void @testv1(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double>* nocapture %d) #0 {
179 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
180 store <2 x double> %0, <2 x double>* %d, align 8
181 %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
182 %arrayidx1 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 1
183 store <2 x double> %1, <2 x double>* %arrayidx1, align 8
186 ; CHECK-LABEL: @testv1
187 ; CHECK-DAG: xvmaddmdp 36, 35, 34
188 ; CHECK-DAG: xvmaddadp 34, 35, 37
189 ; CHECK-DAG: li [[C1:[0-9]+]], 16
190 ; CHECK-DAG: stxvd2x 36, 0, 3
191 ; CHECK-DAG: stxvd2x 34, 3, [[C1:[0-9]+]]
194 ; CHECK-FISL-LABEL: @testv1
195 ; CHECK-FISL-DAG: xxlor 0, 34, 34
196 ; CHECK-FISL-DAG: xvmaddadp 0, 35, 36
197 ; CHECK-FISL-DAG: stxvd2x 0, 0, 3
198 ; CHECK-FISL-DAG: xvmaddadp 34, 35, 37
199 ; CHECK-FISL-DAG: li [[C1:[0-9]+]], 16
200 ; CHECK-FISL-DAG: stxvd2x 34, 3, [[C1:[0-9]+]]
204 define void @testv2(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
206 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
207 store <2 x double> %0, <2 x double>* %d, align 8
208 %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
209 %arrayidx1 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 1
210 store <2 x double> %1, <2 x double>* %arrayidx1, align 8
211 %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
212 %arrayidx2 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 2
213 store <2 x double> %2, <2 x double>* %arrayidx2, align 8
216 ; CHECK-LABEL: @testv2
217 ; FIXME: We currently don't get this because of copy ordering on the MI level.
218 ; CHECX-DAG: xvmaddmdp 36, 35, 34
219 ; CHECX-DAG: xvmaddmdp 37, 35, 34
220 ; CHECX-DAG: li [[C1:[0-9]+]], 16
221 ; CHECX-DAG: li [[C2:[0-9]+]], 32
222 ; CHECX-DAG: xvmaddadp 34, 35, 38
223 ; CHECX-DAG: stxvd2x 36, 0, 3
224 ; CHECX-DAG: stxvd2x 37, 3, [[C1:[0-9]+]]
225 ; CHECX-DAG: stxvd2x 34, 3, [[C2:[0-9]+]]
228 ; CHECK-FISL-LABEL: @testv2
229 ; CHECK-FISL-DAG: xxlor 0, 34, 34
230 ; CHECK-FISL-DAG: xvmaddadp 0, 35, 36
231 ; CHECK-FISL-DAG: stxvd2x 0, 0, 3
232 ; CHECK-FISL-DAG: xxlor 0, 34, 34
233 ; CHECK-FISL-DAG: xvmaddadp 0, 35, 37
234 ; CHECK-FISL-DAG: li [[C1:[0-9]+]], 16
235 ; CHECK-FISL-DAG: stxvd2x 0, 3, [[C1:[0-9]+]]
236 ; CHECK-FISL-DAG: xvmaddadp 34, 35, 38
237 ; CHECK-FISL-DAG: li [[C2:[0-9]+]], 32
238 ; CHECK-FISL-DAG: stxvd2x 34, 3, [[C2:[0-9]+]]
242 define void @testv3(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
244 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
245 store <2 x double> %0, <2 x double>* %d, align 8
246 %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
247 %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1)
248 %arrayidx1 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 3
249 store <2 x double> %2, <2 x double>* %arrayidx1, align 8
250 %3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
251 %arrayidx2 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 2
252 store <2 x double> %3, <2 x double>* %arrayidx2, align 8
253 %arrayidx3 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 1
254 store <2 x double> %1, <2 x double>* %arrayidx3, align 8
257 ; Note: There is some unavoidable changeability in this variant. If the
258 ; FMAs are reordered differently, the algorithm can pick a different
259 ; multiplicand to destroy, changing the register assignment. There isn't
260 ; a good way to express this possibility, so hopefully this doesn't change
263 ; CHECK-LABEL: @testv3
264 ; CHECK-DAG: xxlor [[V1:[0-9]+]], 34, 34
265 ; CHECK-DAG: li [[C1:[0-9]+]], 48
266 ; CHECK-DAG: li [[C2:[0-9]+]], 32
267 ; CHECK-DAG: xvmaddmdp 37, 35, 34
268 ; CHECK-DAG: li [[C3:[0-9]+]], 16
270 ; Note: We could convert this next FMA to M-type as well, but it would require
271 ; re-ordering the instructions.
272 ; CHECK-DAG: xvmaddadp [[V1]], 35, 36
274 ; CHECK-DAG: xvmaddmdp 36, 35, 37
275 ; CHECK-DAG: xvmaddadp 34, 35, 38
276 ; CHECK-DAG: stxvd2x 32, 0, 3
277 ; CHECK-DAG: stxvd2x 36, 3, [[C1]]
278 ; CHECK-DAG: stxvd2x 34, 3, [[C2]]
279 ; CHECK-DAG: stxvd2x 37, 3, [[C3]]
282 ; CHECK-FISL-LABEL: @testv3
283 ; CHECK-FISL-DAG: xxlor [[V1:[0-9]+]], 34, 34
284 ; CHECK-FISL-DAG: xvmaddadp [[V1]], 35, 36
285 ; CHECK-FISL-DAG: stxvd2x [[V1]], 0, 3
286 ; CHECK-FISL-DAG: xxlor [[V2:[0-9]+]], 34, 34
287 ; CHECK-FISL-DAG: xvmaddadp [[V2]], 35, 37
288 ; CHECK-FISL-DAG: xxlor [[V3:[0-9]+]], 0, 0
289 ; CHECK-FISL-DAG: xvmaddadp [[V3]], 35, 36
290 ; CHECK-FISL-DAG: li [[C1:[0-9]+]], 48
291 ; CHECK-FISL-DAG: stxvd2x [[V3]], 3, [[C1]]
292 ; CHECK-FISL-DAG: xvmaddadp 34, 35, 38
293 ; CHECK-FISL-DAG: li [[C2:[0-9]+]], 32
294 ; CHECK-FISL-DAG: stxvd2x 34, 3, [[C2]]
295 ; CHECK-FISL-DAG: li [[C3:[0-9]+]], 16
296 ; CHECK-FISL-DAG: stxvd2x 0, 3, [[C3]]
300 define void @testv4(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
302 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
303 store <2 x double> %0, <2 x double>* %d, align 8
304 %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
305 %arrayidx1 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 1
306 store <2 x double> %1, <2 x double>* %arrayidx1, align 8
307 %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1)
308 %arrayidx3 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 3
309 store <2 x double> %2, <2 x double>* %arrayidx3, align 8
310 %3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
311 %arrayidx4 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 2
312 store <2 x double> %3, <2 x double>* %arrayidx4, align 8
315 ; CHECK-LABEL: @testv4
316 ; CHECK-DAG: xxlor [[V1:[0-9]+]], 34, 34
317 ; CHECK-DAG: xvmaddmdp 37, 35, 34
318 ; CHECK-DAG: li [[C1:[0-9]+]], 16
319 ; CHECK-DAG: li [[C2:[0-9]+]], 32
320 ; CHECK-DAG: xvmaddadp 34, 35, 38
322 ; Note: We could convert this next FMA to M-type as well, but it would require
323 ; re-ordering the instructions.
324 ; CHECK-DAG: xvmaddadp [[V1]], 35, 36
326 ; CHECK-DAG: stxvd2x 32, 0, 3
327 ; CHECK-DAG: stxvd2x 37, 3, [[C1]]
328 ; CHECK-DAG: li [[C3:[0-9]+]], 48
329 ; CHECK-DAG: xvmaddadp 37, 35, 36
330 ; CHECK-DAG: stxvd2x 37, 3, [[C3]]
331 ; CHECK-DAG: stxvd2x 34, 3, [[C2]]
334 ; CHECK-FISL-LABEL: @testv4
335 ; CHECK-FISL-DAG: xxlor [[V1:[0-9]+]], 34, 34
336 ; CHECK-FISL-DAG: xvmaddadp [[V1]], 35, 36
337 ; CHECK-FISL-DAG: stxvd2x 0, 0, 3
338 ; CHECK-FISL-DAG: xxlor [[V2:[0-9]+]], 34, 34
339 ; CHECK-FISL-DAG: xvmaddadp [[V2]], 35, 37
340 ; CHECK-FISL-DAG: li [[C1:[0-9]+]], 16
341 ; CHECK-FISL-DAG: stxvd2x 0, 3, [[C1]]
342 ; CHECK-FISL-DAG: xvmaddadp 0, 35, 37
343 ; CHECK-FISL-DAG: li [[C3:[0-9]+]], 48
344 ; CHECK-FISL-DAG: stxvd2x 0, 3, [[C3]]
345 ; CHECK-FISL-DAG: xvmaddadp 0, 35, 36
346 ; CHECK-FISL-DAG: li [[C2:[0-9]+]], 32
347 ; CHECK-FISL-DAG: stxvd2x 34, 3, [[C2]]
351 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
353 attributes #0 = { nounwind readnone }