test/CodeGen/AArch64/machine-combiner.ll

   1 ; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 -enable-unsafe-fp-math < %s | FileCheck %s
   2
   3 ; Verify that the first two adds are independent regardless of how the inputs are
   4 ; commuted. The destination registers are used as source registers for the third add.
   5
   6 define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
   7 ; CHECK-LABEL:   reassociate_adds1:
   8 ; CHECK:         fadd  s0, s0, s1
   9 ; CHECK-NEXT:    fadd  s1, s2, s3
  10 ; CHECK-NEXT:    fadd  s0, s0, s1
  11 ; CHECK-NEXT:    ret
  12   %t0 = fadd float %x0, %x1
  13   %t1 = fadd float %t0, %x2
  14   %t2 = fadd float %t1, %x3
  15   ret float %t2
  16 }
  17
  18 define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
  19 ; CHECK-LABEL:   reassociate_adds2:
  20 ; CHECK:         fadd  s0, s0, s1
  21 ; CHECK-NEXT:    fadd  s1, s2, s3
  22 ; CHECK-NEXT:    fadd  s0, s0, s1
  23 ; CHECK-NEXT:    ret
  24   %t0 = fadd float %x0, %x1
  25   %t1 = fadd float %x2, %t0
  26   %t2 = fadd float %t1, %x3
  27   ret float %t2
  28 }
  29
  30 define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
  31 ; CHECK-LABEL:   reassociate_adds3:
  32 ; CHECK:         s0, s0, s1
  33 ; CHECK-NEXT:    s1, s2, s3
  34 ; CHECK-NEXT:    s0, s0, s1
  35 ; CHECK-NEXT:    ret
  36   %t0 = fadd float %x0, %x1
  37   %t1 = fadd float %t0, %x2
  38   %t2 = fadd float %x3, %t1
  39   ret float %t2
  40 }
  41
  42 define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
  43 ; CHECK-LABEL:   reassociate_adds4:
  44 ; CHECK:         s0, s0, s1
  45 ; CHECK-NEXT:    s1, s2, s3
  46 ; CHECK-NEXT:    s0, s0, s1
  47 ; CHECK-NEXT:    ret
  48   %t0 = fadd float %x0, %x1
  49   %t1 = fadd float %x2, %t0
  50   %t2 = fadd float %x3, %t1
  51   ret float %t2
  52 }
  53
  54 ; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
  55 ; produced because that would cost more compile time.
  56
  57 define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
  58 ; CHECK-LABEL:   reassociate_adds5:
  59 ; CHECK:         fadd  s0, s0, s1
  60 ; CHECK-NEXT:    fadd  s1, s2, s3
  61 ; CHECK-NEXT:    fadd  s0, s0, s1
  62 ; CHECK-NEXT:    fadd  s1, s4, s5
  63 ; CHECK-NEXT:    fadd  s1, s1, s6
  64 ; CHECK-NEXT:    fadd  s0, s0, s1
  65 ; CHECK-NEXT:    fadd  s0, s0, s7
  66 ; CHECK-NEXT:    ret
  67   %t0 = fadd float %x0, %x1
  68   %t1 = fadd float %t0, %x2
  69   %t2 = fadd float %t1, %x3
  70   %t3 = fadd float %t2, %x4
  71   %t4 = fadd float %t3, %x5
  72   %t5 = fadd float %t4, %x6
  73   %t6 = fadd float %t5, %x7
  74   ret float %t6
  75 }
  76
  77 ; Verify that we only need two associative operations to reassociate the operands.
  78 ; Also, we should reassociate such that the result of the high latency division
  79 ; is used by the final 'add' rather than reassociating the %x3 operand with the
  80 ; division. The latter reassociation would not improve anything.
  81
  82 define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
  83 ; CHECK-LABEL:   reassociate_adds6:
  84 ; CHECK:         fdiv  s0, s0, s1
  85 ; CHECK-NEXT:    fadd  s1, s2, s3
  86 ; CHECK-NEXT:    fadd  s0, s0, s1
  87 ; CHECK-NEXT:    ret
  88   %t0 = fdiv float %x0, %x1
  89   %t1 = fadd float %x2, %t0
  90   %t2 = fadd float %x3, %t1
  91   ret float %t2
  92 }
  93
  94 ; Verify that scalar single-precision multiplies are reassociated.
  95
  96 define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
  97 ; CHECK-LABEL:   reassociate_muls1:
  98 ; CHECK:         fdiv  s0, s0, s1
  99 ; CHECK-NEXT:    fmul  s1, s2, s3
 100 ; CHECK-NEXT:    fmul  s0, s0, s1
 101 ; CHECK-NEXT:    ret
 102   %t0 = fdiv float %x0, %x1
 103   %t1 = fmul float %x2, %t0
 104   %t2 = fmul float %x3, %t1
 105   ret float %t2
 106 }
 107
 108 ; Verify that scalar double-precision adds are reassociated.
 109
 110 define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
 111 ; CHECK-LABEL:   reassociate_adds_double:
 112 ; CHECK:         fdiv  d0, d0, d1
 113 ; CHECK-NEXT:    fadd  d1, d2, d3
 114 ; CHECK-NEXT:    fadd  d0, d0, d1
 115 ; CHECK-NEXT:    ret
 116   %t0 = fdiv double %x0, %x1
 117   %t1 = fadd double %x2, %t0
 118   %t2 = fadd double %x3, %t1
 119   ret double %t2
 120 }
 121
 122 ; Verify that scalar double-precision multiplies are reassociated.
 123
 124 define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
 125 ; CHECK-LABEL:   reassociate_muls_double:
 126 ; CHECK:         fdiv  d0, d0, d1
 127 ; CHECK-NEXT:    fmul  d1, d2, d3
 128 ; CHECK-NEXT:    fmul  d0, d0, d1
 129 ; CHECK-NEXT:    ret
 130   %t0 = fdiv double %x0, %x1
 131   %t1 = fmul double %x2, %t0
 132   %t2 = fmul double %x3, %t1
 133   ret double %t2
 134 }
 135
 136 ; Verify that we reassociate vector instructions too.
 137
 138 define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 139 ; CHECK-LABEL:   vector_reassociate_adds1:
 140 ; CHECK:         fadd  v0.4s, v0.4s, v1.4s
 141 ; CHECK-NEXT:    fadd  v1.4s, v2.4s, v3.4s
 142 ; CHECK-NEXT:    fadd  v0.4s, v0.4s, v1.4s
 143 ; CHECK-NEXT:    ret
 144   %t0 = fadd <4 x float> %x0, %x1
 145   %t1 = fadd <4 x float> %t0, %x2
 146   %t2 = fadd <4 x float> %t1, %x3
 147   ret <4 x float> %t2
 148 }
 149
 150 define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 151 ; CHECK-LABEL:   vector_reassociate_adds2:
 152 ; CHECK:         fadd  v0.4s, v0.4s, v1.4s
 153 ; CHECK-NEXT:    fadd  v1.4s, v2.4s, v3.4s
 154 ; CHECK-NEXT:    fadd  v0.4s, v0.4s, v1.4s
 155   %t0 = fadd <4 x float> %x0, %x1
 156   %t1 = fadd <4 x float> %x2, %t0
 157   %t2 = fadd <4 x float> %t1, %x3
 158   ret <4 x float> %t2
 159 }
 160
 161 define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 162 ; CHECK-LABEL:   vector_reassociate_adds3:
 163 ; CHECK:         fadd  v0.4s, v0.4s, v1.4s
 164 ; CHECK-NEXT:    fadd  v1.4s, v2.4s, v3.4s
 165 ; CHECK-NEXT:    fadd  v0.4s, v0.4s, v1.4s
 166   %t0 = fadd <4 x float> %x0, %x1
 167   %t1 = fadd <4 x float> %t0, %x2
 168   %t2 = fadd <4 x float> %x3, %t1
 169   ret <4 x float> %t2
 170 }
 171
 172 define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 173 ; CHECK-LABEL:   vector_reassociate_adds4:
 174 ; CHECK:         fadd  v0.4s, v0.4s, v1.4s
 175 ; CHECK-NEXT:    fadd  v1.4s, v2.4s, v3.4s
 176 ; CHECK-NEXT:    fadd  v0.4s, v0.4s, v1.4s
 177   %t0 = fadd <4 x float> %x0, %x1
 178   %t1 = fadd <4 x float> %x2, %t0
 179   %t2 = fadd <4 x float> %x3, %t1
 180   ret <4 x float> %t2
 181 }
 182 ; Verify that 128-bit vector single-precision multiplies are reassociated.
 183
 184 define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 185 ; CHECK-LABEL:   reassociate_muls_v4f32:
 186 ; CHECK:         fadd  v0.4s, v0.4s, v1.4s
 187 ; CHECK-NEXT:    fmul  v1.4s, v2.4s, v3.4s
 188 ; CHECK-NEXT:    fmul  v0.4s, v0.4s, v1.4s
 189 ; CHECK-NEXT:    ret
 190   %t0 = fadd <4 x float> %x0, %x1
 191   %t1 = fmul <4 x float> %x2, %t0
 192   %t2 = fmul <4 x float> %x3, %t1
 193   ret <4 x float> %t2
 194 }
 195
 196 ; Verify that 128-bit vector double-precision multiplies are reassociated.
 197
 198 define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
 199 ; CHECK-LABEL:   reassociate_muls_v2f64:
 200 ; CHECK:         fadd  v0.2d, v0.2d, v1.2d
 201 ; CHECK-NEXT:    fmul  v1.2d, v2.2d, v3.2d
 202 ; CHECK-NEXT:    fmul  v0.2d, v0.2d, v1.2d
 203 ; CHECK-NEXT:    ret
 204   %t0 = fadd <2 x double> %x0, %x1
 205   %t1 = fmul <2 x double> %x2, %t0
 206   %t2 = fmul <2 x double> %x3, %t1
 207   ret <2 x double> %t2
 208 }
 209
 210 ; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016
 211 ; Verify that reassociation is not happening needlessly or wrongly.
 212
 213 declare double @bar()
 214
 215 define double @reassociate_adds_from_calls() {
 216 ; CHECK-LABEL: reassociate_adds_from_calls:
 217 ; CHECK:       bl   bar
 218 ; CHECK-NEXT:  mov  v8.16b, v0.16b
 219 ; CHECK-NEXT:  bl   bar
 220 ; CHECK-NEXT:  mov  v9.16b, v0.16b
 221 ; CHECK-NEXT:  bl   bar
 222 ; CHECK-NEXT:  mov  v10.16b, v0.16b
 223 ; CHECK-NEXT:  bl   bar
 224 ; CHECK:       fadd d1, d8, d9
 225 ; CHECK-NEXT:  fadd d0, d10, d0
 226 ; CHECK-NEXT:  fadd d0, d1, d0
 227   %x0 = call double @bar()
 228   %x1 = call double @bar()
 229   %x2 = call double @bar()
 230   %x3 = call double @bar()
 231   %t0 = fadd double %x0, %x1
 232   %t1 = fadd double %t0, %x2
 233   %t2 = fadd double %t1, %x3
 234   ret double %t2
 235 }
 236
 237 define double @already_reassociated() {
 238 ; CHECK-LABEL: already_reassociated:
 239 ; CHECK:       bl   bar
 240 ; CHECK-NEXT:  mov  v8.16b, v0.16b
 241 ; CHECK-NEXT:  bl   bar
 242 ; CHECK-NEXT:  mov  v9.16b, v0.16b
 243 ; CHECK-NEXT:  bl   bar
 244 ; CHECK-NEXT:  mov  v10.16b, v0.16b
 245 ; CHECK-NEXT:  bl   bar
 246 ; CHECK:       fadd d1, d8, d9
 247 ; CHECK-NEXT:  fadd d0, d10, d0
 248 ; CHECK-NEXT:  fadd d0, d1, d0
 249   %x0 = call double @bar()
 250   %x1 = call double @bar()
 251   %x2 = call double @bar()
 252   %x3 = call double @bar()
 253   %t0 = fadd double %x0, %x1
 254   %t1 = fadd double %x2, %x3
 255   %t2 = fadd double %t0, %t1
 256   ret double %t2
 257 }
 258