test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll

   1 ; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN
   2 ; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD
   3
   4 ; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so
   5 ; our test strategy is to:
   6 ;   * Force the pass to always perform register swapping even if the dest register is of the
   7 ;     correct color already (-force-all)
   8 ;   * Force the pass to ignore all hints it obtained from regalloc (-deterministic-balance),
   9 ;     and run it twice, once where it always hints odd, and once where it always hints even.
  10 ;
  11 ; We then use regex magic to check that in the two cases the register allocation is
  12 ; different; this is what gives us the testing coverage and distinguishes cases where
  13 ; the pass has done some work versus accidental regalloc.
  14
  15 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
  16 target triple = "aarch64"
  17
  18 ; Non-overlapping groups - shouldn't need any changing at all.
  19
  20 ; CHECK-LABEL: f1:
  21 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
  22 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
  23 ; CHECK: fmadd [[x]]
  24 ; CHECK: fmsub [[x]]
  25 ; CHECK: fmadd [[x]]
  26 ; CHECK: str [[x]]
  27
  28 define void @f1(double* nocapture readonly %p, double* nocapture %q) #0 {
  29 entry:
  30   %0 = load double* %p, align 8
  31   %arrayidx1 = getelementptr inbounds double* %p, i64 1
  32   %1 = load double* %arrayidx1, align 8
  33   %arrayidx2 = getelementptr inbounds double* %p, i64 2
  34   %2 = load double* %arrayidx2, align 8
  35   %arrayidx3 = getelementptr inbounds double* %p, i64 3
  36   %3 = load double* %arrayidx3, align 8
  37   %arrayidx4 = getelementptr inbounds double* %p, i64 4
  38   %4 = load double* %arrayidx4, align 8
  39   %mul = fmul fast double %0, %1
  40   %add = fadd fast double %mul, %4
  41   %mul5 = fmul fast double %1, %2
  42   %add6 = fadd fast double %mul5, %add
  43   %mul7 = fmul fast double %1, %3
  44   %sub = fsub fast double %add6, %mul7
  45   %mul8 = fmul fast double %2, %3
  46   %add9 = fadd fast double %mul8, %sub
  47   store double %add9, double* %q, align 8
  48   %arrayidx11 = getelementptr inbounds double* %p, i64 5
  49   %5 = load double* %arrayidx11, align 8
  50   %arrayidx12 = getelementptr inbounds double* %p, i64 6
  51   %6 = load double* %arrayidx12, align 8
  52   %arrayidx13 = getelementptr inbounds double* %p, i64 7
  53   %7 = load double* %arrayidx13, align 8
  54   %mul15 = fmul fast double %6, %7
  55   %mul16 = fmul fast double %0, %5
  56   %add17 = fadd fast double %mul16, %mul15
  57   %mul18 = fmul fast double %5, %6
  58   %add19 = fadd fast double %mul18, %add17
  59   %arrayidx20 = getelementptr inbounds double* %q, i64 1
  60   store double %add19, double* %arrayidx20, align 8
  61   ret void
  62 }
  63
  64 ; Overlapping groups - coloring needed.
  65
  66 ; CHECK-LABEL: f2:
  67 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
  68 ; CHECK-EVEN: fmul [[y:d[0-9]*[13579]]]
  69 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
  70 ; CHECK-ODD: fmul [[y:d[0-9]*[02468]]]
  71 ; CHECK: fmadd [[x]]
  72 ; CHECK: fmadd [[y]]
  73 ; CHECK: fmsub [[x]]
  74 ; CHECK: fmadd [[y]]
  75 ; CHECK: fmadd [[x]]
  76 ; CHECK: stp [[x]], [[y]]
  77
  78 define void @f2(double* nocapture readonly %p, double* nocapture %q) #0 {
  79 entry:
  80   %0 = load double* %p, align 8
  81   %arrayidx1 = getelementptr inbounds double* %p, i64 1
  82   %1 = load double* %arrayidx1, align 8
  83   %arrayidx2 = getelementptr inbounds double* %p, i64 2
  84   %2 = load double* %arrayidx2, align 8
  85   %arrayidx3 = getelementptr inbounds double* %p, i64 3
  86   %3 = load double* %arrayidx3, align 8
  87   %arrayidx4 = getelementptr inbounds double* %p, i64 4
  88   %4 = load double* %arrayidx4, align 8
  89   %arrayidx5 = getelementptr inbounds double* %p, i64 5
  90   %5 = load double* %arrayidx5, align 8
  91   %arrayidx6 = getelementptr inbounds double* %p, i64 6
  92   %6 = load double* %arrayidx6, align 8
  93   %arrayidx7 = getelementptr inbounds double* %p, i64 7
  94   %7 = load double* %arrayidx7, align 8
  95   %mul = fmul fast double %0, %1
  96   %add = fadd fast double %mul, %7
  97   %mul8 = fmul fast double %5, %6
  98   %mul9 = fmul fast double %1, %2
  99   %add10 = fadd fast double %mul9, %add
 100   %mul11 = fmul fast double %3, %4
 101   %add12 = fadd fast double %mul11, %mul8
 102   %mul13 = fmul fast double %1, %3
 103   %sub = fsub fast double %add10, %mul13
 104   %mul14 = fmul fast double %4, %5
 105   %add15 = fadd fast double %mul14, %add12
 106   %mul16 = fmul fast double %2, %3
 107   %add17 = fadd fast double %mul16, %sub
 108   store double %add17, double* %q, align 8
 109   %arrayidx19 = getelementptr inbounds double* %q, i64 1
 110   store double %add15, double* %arrayidx19, align 8
 111   ret void
 112 }
 113
 114 ; Dest register is live on block exit - fixup needed.
 115
 116 ; CHECK-LABEL: f3:
 117 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
 118 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
 119 ; CHECK: fmadd [[x]]
 120 ; CHECK: fmsub [[x]]
 121 ; CHECK: fmadd [[y:d[0-9]+]], {{.*}}, [[x]]
 122 ; CHECK: str [[y]]
 123
 124 define void @f3(double* nocapture readonly %p, double* nocapture %q) #0 {
 125 entry:
 126   %0 = load double* %p, align 8
 127   %arrayidx1 = getelementptr inbounds double* %p, i64 1
 128   %1 = load double* %arrayidx1, align 8
 129   %arrayidx2 = getelementptr inbounds double* %p, i64 2
 130   %2 = load double* %arrayidx2, align 8
 131   %arrayidx3 = getelementptr inbounds double* %p, i64 3
 132   %3 = load double* %arrayidx3, align 8
 133   %arrayidx4 = getelementptr inbounds double* %p, i64 4
 134   %4 = load double* %arrayidx4, align 8
 135   %mul = fmul fast double %0, %1
 136   %add = fadd fast double %mul, %4
 137   %mul5 = fmul fast double %1, %2
 138   %add6 = fadd fast double %mul5, %add
 139   %mul7 = fmul fast double %1, %3
 140   %sub = fsub fast double %add6, %mul7
 141   %mul8 = fmul fast double %2, %3
 142   %add9 = fadd fast double %mul8, %sub
 143   %cmp = fcmp oeq double %3, 0.000000e+00
 144   br i1 %cmp, label %if.then, label %if.end
 145
 146 if.then:                                          ; preds = %entry
 147   tail call void bitcast (void (...)* @g to void ()*)() #2
 148   br label %if.end
 149
 150 if.end:                                           ; preds = %if.then, %entry
 151   store double %add9, double* %q, align 8
 152   ret void
 153 }
 154
 155 declare void @g(...) #1
 156
 157 ; Single precision version of f2.
 158
 159 ; CHECK-LABEL: f4:
 160 ; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
 161 ; CHECK-EVEN: fmul [[y:s[0-9]*[13579]]]
 162 ; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
 163 ; CHECK-ODD: fmul [[y:s[0-9]*[02468]]]
 164 ; CHECK: fmadd [[x]]
 165 ; CHECK: fmadd [[y]]
 166 ; CHECK: fmsub [[x]]
 167 ; CHECK: fmadd [[y]]
 168 ; CHECK: fmadd [[x]]
 169 ; CHECK: stp [[x]], [[y]]
 170
 171 define void @f4(float* nocapture readonly %p, float* nocapture %q) #0 {
 172 entry:
 173   %0 = load float* %p, align 4
 174   %arrayidx1 = getelementptr inbounds float* %p, i64 1
 175   %1 = load float* %arrayidx1, align 4
 176   %arrayidx2 = getelementptr inbounds float* %p, i64 2
 177   %2 = load float* %arrayidx2, align 4
 178   %arrayidx3 = getelementptr inbounds float* %p, i64 3
 179   %3 = load float* %arrayidx3, align 4
 180   %arrayidx4 = getelementptr inbounds float* %p, i64 4
 181   %4 = load float* %arrayidx4, align 4
 182   %arrayidx5 = getelementptr inbounds float* %p, i64 5
 183   %5 = load float* %arrayidx5, align 4
 184   %arrayidx6 = getelementptr inbounds float* %p, i64 6
 185   %6 = load float* %arrayidx6, align 4
 186   %arrayidx7 = getelementptr inbounds float* %p, i64 7
 187   %7 = load float* %arrayidx7, align 4
 188   %mul = fmul fast float %0, %1
 189   %add = fadd fast float %mul, %7
 190   %mul8 = fmul fast float %5, %6
 191   %mul9 = fmul fast float %1, %2
 192   %add10 = fadd fast float %mul9, %add
 193   %mul11 = fmul fast float %3, %4
 194   %add12 = fadd fast float %mul11, %mul8
 195   %mul13 = fmul fast float %1, %3
 196   %sub = fsub fast float %add10, %mul13
 197   %mul14 = fmul fast float %4, %5
 198   %add15 = fadd fast float %mul14, %add12
 199   %mul16 = fmul fast float %2, %3
 200   %add17 = fadd fast float %mul16, %sub
 201   store float %add17, float* %q, align 4
 202   %arrayidx19 = getelementptr inbounds float* %q, i64 1
 203   store float %add15, float* %arrayidx19, align 4
 204   ret void
 205 }
 206
 207 ; Single precision version of f3
 208
 209 ; CHECK-LABEL: f5:
 210 ; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
 211 ; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
 212 ; CHECK: fmadd [[x]]
 213 ; CHECK: fmsub [[x]]
 214 ; CHECK: fmadd [[y:s[0-9]+]], {{.*}}, [[x]]
 215 ; CHECK: str [[y]]
 216
 217 define void @f5(float* nocapture readonly %p, float* nocapture %q) #0 {
 218 entry:
 219   %0 = load float* %p, align 4
 220   %arrayidx1 = getelementptr inbounds float* %p, i64 1
 221   %1 = load float* %arrayidx1, align 4
 222   %arrayidx2 = getelementptr inbounds float* %p, i64 2
 223   %2 = load float* %arrayidx2, align 4
 224   %arrayidx3 = getelementptr inbounds float* %p, i64 3
 225   %3 = load float* %arrayidx3, align 4
 226   %arrayidx4 = getelementptr inbounds float* %p, i64 4
 227   %4 = load float* %arrayidx4, align 4
 228   %mul = fmul fast float %0, %1
 229   %add = fadd fast float %mul, %4
 230   %mul5 = fmul fast float %1, %2
 231   %add6 = fadd fast float %mul5, %add
 232   %mul7 = fmul fast float %1, %3
 233   %sub = fsub fast float %add6, %mul7
 234   %mul8 = fmul fast float %2, %3
 235   %add9 = fadd fast float %mul8, %sub
 236   %cmp = fcmp oeq float %3, 0.000000e+00
 237   br i1 %cmp, label %if.then, label %if.end
 238
 239 if.then:                                          ; preds = %entry
 240   tail call void bitcast (void (...)* @g to void ()*)() #2
 241   br label %if.end
 242
 243 if.end:                                           ; preds = %if.then, %entry
 244   store float %add9, float* %q, align 4
 245   ret void
 246 }
 247
 248 ; Test that regmask clobbering stops a chain sequence.
 249
 250 ; CHECK-LABEL: f6:
 251 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
 252 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
 253 ; CHECK: fmadd [[x]]
 254 ; CHECK: fmsub [[x]]
 255 ; CHECK: fmadd d0, {{.*}}, [[x]]
 256 ; CHECK: bl hh
 257 ; CHECK: str d0
 258
 259 define void @f6(double* nocapture readonly %p, double* nocapture %q) #0 {
 260 entry:
 261   %0 = load double* %p, align 8
 262   %arrayidx1 = getelementptr inbounds double* %p, i64 1
 263   %1 = load double* %arrayidx1, align 8
 264   %arrayidx2 = getelementptr inbounds double* %p, i64 2
 265   %2 = load double* %arrayidx2, align 8
 266   %arrayidx3 = getelementptr inbounds double* %p, i64 3
 267   %3 = load double* %arrayidx3, align 8
 268   %arrayidx4 = getelementptr inbounds double* %p, i64 4
 269   %4 = load double* %arrayidx4, align 8
 270   %mul = fmul fast double %0, %1
 271   %add = fadd fast double %mul, %4
 272   %mul5 = fmul fast double %1, %2
 273   %add6 = fadd fast double %mul5, %add
 274   %mul7 = fmul fast double %1, %3
 275   %sub = fsub fast double %add6, %mul7
 276   %mul8 = fmul fast double %2, %3
 277   %add9 = fadd fast double %mul8, %sub
 278   %call = tail call double @hh(double %add9) #2
 279   store double %call, double* %q, align 8
 280   ret void
 281 }
 282
 283 declare double @hh(double) #1
 284
 285 ; Check that we correctly deal with repeated operands.
 286 ; The following testcase creates:
 287 ;   %D1<def> = FADDDrr %D0<kill>, %D0
 288 ; We'll get a crash if we naively look at the first operand, remove it
 289 ; from the substitution list then look at the second operand.
 290
 291 ; CHECK: fmadd [[x:d[0-9]+]]
 292 ; CHECK: fadd d1, [[x]], [[x]]
 293
 294 define void @f7(double* nocapture readonly %p, double* nocapture %q) #0 {
 295 entry:
 296   %0 = load double* %p, align 8
 297   %arrayidx1 = getelementptr inbounds double* %p, i64 1
 298   %1 = load double* %arrayidx1, align 8
 299   %arrayidx2 = getelementptr inbounds double* %p, i64 2
 300   %2 = load double* %arrayidx2, align 8
 301   %arrayidx3 = getelementptr inbounds double* %p, i64 3
 302   %3 = load double* %arrayidx3, align 8
 303   %arrayidx4 = getelementptr inbounds double* %p, i64 4
 304   %4 = load double* %arrayidx4, align 8
 305   %mul = fmul fast double %0, %1
 306   %add = fadd fast double %mul, %4
 307   %mul5 = fmul fast double %1, %2
 308   %add6 = fadd fast double %mul5, %add
 309   %mul7 = fmul fast double %1, %3
 310   %sub = fsub fast double %add6, %mul7
 311   %mul8 = fmul fast double %2, %3
 312   %add9 = fadd fast double %mul8, %sub
 313   %add10 = fadd fast double %add9, %add9
 314   call void @hhh(double 0.0, double %add10)
 315   ret void
 316 }
 317
 318 declare void @hhh(double, double)
 319
 320 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
 321 attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
 322 attributes #2 = { nounwind }
 323