1 ; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN
2 ; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD
4 ; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so
5 ; our test strategy is to:
6 ; * Force the pass to always perform register swapping even if the dest register is of the
7 ; correct color already (-force-all)
8 ; * Force the pass to ignore all hints it obtained from regalloc (-deterministic-balance),
9 ; and run it twice, once where it always hints odd, and once where it always hints even.
11 ; We then use regex magic to check that in the two cases the register allocation is
12 ; different; this is what gives us the testing coverage and distinguishes cases where
13 ; the pass has done some work versus accidental regalloc.
15 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
16 target triple = "aarch64"
18 ; Non-overlapping groups - shouldn't need any changing at all.
21 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
22 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
28 define void @f1(double* nocapture readonly %p, double* nocapture %q) #0 {
30 %0 = load double* %p, align 8
31 %arrayidx1 = getelementptr inbounds double* %p, i64 1
32 %1 = load double* %arrayidx1, align 8
33 %arrayidx2 = getelementptr inbounds double* %p, i64 2
34 %2 = load double* %arrayidx2, align 8
35 %arrayidx3 = getelementptr inbounds double* %p, i64 3
36 %3 = load double* %arrayidx3, align 8
37 %arrayidx4 = getelementptr inbounds double* %p, i64 4
38 %4 = load double* %arrayidx4, align 8
39 %mul = fmul fast double %0, %1
40 %add = fadd fast double %mul, %4
41 %mul5 = fmul fast double %1, %2
42 %add6 = fadd fast double %mul5, %add
43 %mul7 = fmul fast double %1, %3
44 %sub = fsub fast double %add6, %mul7
45 %mul8 = fmul fast double %2, %3
46 %add9 = fadd fast double %mul8, %sub
47 store double %add9, double* %q, align 8
48 %arrayidx11 = getelementptr inbounds double* %p, i64 5
49 %5 = load double* %arrayidx11, align 8
50 %arrayidx12 = getelementptr inbounds double* %p, i64 6
51 %6 = load double* %arrayidx12, align 8
52 %arrayidx13 = getelementptr inbounds double* %p, i64 7
53 %7 = load double* %arrayidx13, align 8
54 %mul15 = fmul fast double %6, %7
55 %mul16 = fmul fast double %0, %5
56 %add17 = fadd fast double %mul16, %mul15
57 %mul18 = fmul fast double %5, %6
58 %add19 = fadd fast double %mul18, %add17
59 %arrayidx20 = getelementptr inbounds double* %q, i64 1
60 store double %add19, double* %arrayidx20, align 8
64 ; Overlapping groups - coloring needed.
67 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
68 ; CHECK-EVEN: fmul [[y:d[0-9]*[13579]]]
69 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
70 ; CHECK-ODD: fmul [[y:d[0-9]*[02468]]]
76 ; CHECK: stp [[x]], [[y]]
78 define void @f2(double* nocapture readonly %p, double* nocapture %q) #0 {
80 %0 = load double* %p, align 8
81 %arrayidx1 = getelementptr inbounds double* %p, i64 1
82 %1 = load double* %arrayidx1, align 8
83 %arrayidx2 = getelementptr inbounds double* %p, i64 2
84 %2 = load double* %arrayidx2, align 8
85 %arrayidx3 = getelementptr inbounds double* %p, i64 3
86 %3 = load double* %arrayidx3, align 8
87 %arrayidx4 = getelementptr inbounds double* %p, i64 4
88 %4 = load double* %arrayidx4, align 8
89 %arrayidx5 = getelementptr inbounds double* %p, i64 5
90 %5 = load double* %arrayidx5, align 8
91 %arrayidx6 = getelementptr inbounds double* %p, i64 6
92 %6 = load double* %arrayidx6, align 8
93 %arrayidx7 = getelementptr inbounds double* %p, i64 7
94 %7 = load double* %arrayidx7, align 8
95 %mul = fmul fast double %0, %1
96 %add = fadd fast double %mul, %7
97 %mul8 = fmul fast double %5, %6
98 %mul9 = fmul fast double %1, %2
99 %add10 = fadd fast double %mul9, %add
100 %mul11 = fmul fast double %3, %4
101 %add12 = fadd fast double %mul11, %mul8
102 %mul13 = fmul fast double %1, %3
103 %sub = fsub fast double %add10, %mul13
104 %mul14 = fmul fast double %4, %5
105 %add15 = fadd fast double %mul14, %add12
106 %mul16 = fmul fast double %2, %3
107 %add17 = fadd fast double %mul16, %sub
108 store double %add17, double* %q, align 8
109 %arrayidx19 = getelementptr inbounds double* %q, i64 1
110 store double %add15, double* %arrayidx19, align 8
114 ; Dest register is live on block exit - fixup needed.
117 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
118 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
121 ; CHECK: fmadd [[y:d[0-9]+]], {{.*}}, [[x]]
124 define void @f3(double* nocapture readonly %p, double* nocapture %q) #0 {
126 %0 = load double* %p, align 8
127 %arrayidx1 = getelementptr inbounds double* %p, i64 1
128 %1 = load double* %arrayidx1, align 8
129 %arrayidx2 = getelementptr inbounds double* %p, i64 2
130 %2 = load double* %arrayidx2, align 8
131 %arrayidx3 = getelementptr inbounds double* %p, i64 3
132 %3 = load double* %arrayidx3, align 8
133 %arrayidx4 = getelementptr inbounds double* %p, i64 4
134 %4 = load double* %arrayidx4, align 8
135 %mul = fmul fast double %0, %1
136 %add = fadd fast double %mul, %4
137 %mul5 = fmul fast double %1, %2
138 %add6 = fadd fast double %mul5, %add
139 %mul7 = fmul fast double %1, %3
140 %sub = fsub fast double %add6, %mul7
141 %mul8 = fmul fast double %2, %3
142 %add9 = fadd fast double %mul8, %sub
143 %cmp = fcmp oeq double %3, 0.000000e+00
144 br i1 %cmp, label %if.then, label %if.end
146 if.then: ; preds = %entry
147 tail call void bitcast (void (...)* @g to void ()*)() #2
150 if.end: ; preds = %if.then, %entry
151 store double %add9, double* %q, align 8
155 declare void @g(...) #1
157 ; Single precision version of f2.
160 ; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
161 ; CHECK-EVEN: fmul [[y:s[0-9]*[13579]]]
162 ; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
163 ; CHECK-ODD: fmul [[y:s[0-9]*[02468]]]
169 ; CHECK: stp [[x]], [[y]]
171 define void @f4(float* nocapture readonly %p, float* nocapture %q) #0 {
173 %0 = load float* %p, align 4
174 %arrayidx1 = getelementptr inbounds float* %p, i64 1
175 %1 = load float* %arrayidx1, align 4
176 %arrayidx2 = getelementptr inbounds float* %p, i64 2
177 %2 = load float* %arrayidx2, align 4
178 %arrayidx3 = getelementptr inbounds float* %p, i64 3
179 %3 = load float* %arrayidx3, align 4
180 %arrayidx4 = getelementptr inbounds float* %p, i64 4
181 %4 = load float* %arrayidx4, align 4
182 %arrayidx5 = getelementptr inbounds float* %p, i64 5
183 %5 = load float* %arrayidx5, align 4
184 %arrayidx6 = getelementptr inbounds float* %p, i64 6
185 %6 = load float* %arrayidx6, align 4
186 %arrayidx7 = getelementptr inbounds float* %p, i64 7
187 %7 = load float* %arrayidx7, align 4
188 %mul = fmul fast float %0, %1
189 %add = fadd fast float %mul, %7
190 %mul8 = fmul fast float %5, %6
191 %mul9 = fmul fast float %1, %2
192 %add10 = fadd fast float %mul9, %add
193 %mul11 = fmul fast float %3, %4
194 %add12 = fadd fast float %mul11, %mul8
195 %mul13 = fmul fast float %1, %3
196 %sub = fsub fast float %add10, %mul13
197 %mul14 = fmul fast float %4, %5
198 %add15 = fadd fast float %mul14, %add12
199 %mul16 = fmul fast float %2, %3
200 %add17 = fadd fast float %mul16, %sub
201 store float %add17, float* %q, align 4
202 %arrayidx19 = getelementptr inbounds float* %q, i64 1
203 store float %add15, float* %arrayidx19, align 4
207 ; Single precision version of f3
210 ; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
211 ; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
214 ; CHECK: fmadd [[y:s[0-9]+]], {{.*}}, [[x]]
217 define void @f5(float* nocapture readonly %p, float* nocapture %q) #0 {
219 %0 = load float* %p, align 4
220 %arrayidx1 = getelementptr inbounds float* %p, i64 1
221 %1 = load float* %arrayidx1, align 4
222 %arrayidx2 = getelementptr inbounds float* %p, i64 2
223 %2 = load float* %arrayidx2, align 4
224 %arrayidx3 = getelementptr inbounds float* %p, i64 3
225 %3 = load float* %arrayidx3, align 4
226 %arrayidx4 = getelementptr inbounds float* %p, i64 4
227 %4 = load float* %arrayidx4, align 4
228 %mul = fmul fast float %0, %1
229 %add = fadd fast float %mul, %4
230 %mul5 = fmul fast float %1, %2
231 %add6 = fadd fast float %mul5, %add
232 %mul7 = fmul fast float %1, %3
233 %sub = fsub fast float %add6, %mul7
234 %mul8 = fmul fast float %2, %3
235 %add9 = fadd fast float %mul8, %sub
236 %cmp = fcmp oeq float %3, 0.000000e+00
237 br i1 %cmp, label %if.then, label %if.end
239 if.then: ; preds = %entry
240 tail call void bitcast (void (...)* @g to void ()*)() #2
243 if.end: ; preds = %if.then, %entry
244 store float %add9, float* %q, align 4
248 ; Test that regmask clobbering stops a chain sequence.
251 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
252 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
255 ; CHECK: fmadd d0, {{.*}}, [[x]]
259 define void @f6(double* nocapture readonly %p, double* nocapture %q) #0 {
261 %0 = load double* %p, align 8
262 %arrayidx1 = getelementptr inbounds double* %p, i64 1
263 %1 = load double* %arrayidx1, align 8
264 %arrayidx2 = getelementptr inbounds double* %p, i64 2
265 %2 = load double* %arrayidx2, align 8
266 %arrayidx3 = getelementptr inbounds double* %p, i64 3
267 %3 = load double* %arrayidx3, align 8
268 %arrayidx4 = getelementptr inbounds double* %p, i64 4
269 %4 = load double* %arrayidx4, align 8
270 %mul = fmul fast double %0, %1
271 %add = fadd fast double %mul, %4
272 %mul5 = fmul fast double %1, %2
273 %add6 = fadd fast double %mul5, %add
274 %mul7 = fmul fast double %1, %3
275 %sub = fsub fast double %add6, %mul7
276 %mul8 = fmul fast double %2, %3
277 %add9 = fadd fast double %mul8, %sub
278 %call = tail call double @hh(double %add9) #2
279 store double %call, double* %q, align 8
283 declare double @hh(double) #1
285 ; Check that we correctly deal with repeated operands.
286 ; The following testcase creates:
287 ; %D1<def> = FADDDrr %D0<kill>, %D0
288 ; We'll get a crash if we naively look at the first operand, remove it
289 ; from the substitution list then look at the second operand.
291 ; CHECK: fmadd [[x:d[0-9]+]]
292 ; CHECK: fadd d1, [[x]], [[x]]
294 define void @f7(double* nocapture readonly %p, double* nocapture %q) #0 {
296 %0 = load double* %p, align 8
297 %arrayidx1 = getelementptr inbounds double* %p, i64 1
298 %1 = load double* %arrayidx1, align 8
299 %arrayidx2 = getelementptr inbounds double* %p, i64 2
300 %2 = load double* %arrayidx2, align 8
301 %arrayidx3 = getelementptr inbounds double* %p, i64 3
302 %3 = load double* %arrayidx3, align 8
303 %arrayidx4 = getelementptr inbounds double* %p, i64 4
304 %4 = load double* %arrayidx4, align 8
305 %mul = fmul fast double %0, %1
306 %add = fadd fast double %mul, %4
307 %mul5 = fmul fast double %1, %2
308 %add6 = fadd fast double %mul5, %add
309 %mul7 = fmul fast double %1, %3
310 %sub = fsub fast double %add6, %mul7
311 %mul8 = fmul fast double %2, %3
312 %add9 = fadd fast double %mul8, %sub
313 %add10 = fadd fast double %add9, %add9
314 call void @hhh(double 0.0, double %add10)
318 declare void @hhh(double, double)
320 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
321 attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
322 attributes #2 = { nounwind }