test/CodeGen/X86/machine-combiner.ll

   1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=SSE
   2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=AVX
   3
   4 ; Verify that the first two adds are independent regardless of how the inputs are
   5 ; commuted. The destination registers are used as source registers for the third add.
   6
   7 define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
   8 ; SSE-LABEL: reassociate_adds1:
   9 ; SSE:       # BB#0:
  10 ; SSE-NEXT:    addss %xmm1, %xmm0
  11 ; SSE-NEXT:    addss %xmm3, %xmm2
  12 ; SSE-NEXT:    addss %xmm2, %xmm0
  13 ; SSE-NEXT:    retq
  14 ;
  15 ; AVX-LABEL: reassociate_adds1:
  16 ; AVX:       # BB#0:
  17 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  18 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  19 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  20 ; AVX-NEXT:    retq
  21   %t0 = fadd float %x0, %x1
  22   %t1 = fadd float %t0, %x2
  23   %t2 = fadd float %t1, %x3
  24   ret float %t2
  25 }
  26
  27 define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
  28 ; SSE-LABEL: reassociate_adds2:
  29 ; SSE:       # BB#0:
  30 ; SSE-NEXT:    addss %xmm1, %xmm0
  31 ; SSE-NEXT:    addss %xmm3, %xmm2
  32 ; SSE-NEXT:    addss %xmm2, %xmm0
  33 ; SSE-NEXT:    retq
  34 ;
  35 ; AVX-LABEL: reassociate_adds2:
  36 ; AVX:       # BB#0:
  37 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  38 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  39 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  40 ; AVX-NEXT:    retq
  41   %t0 = fadd float %x0, %x1
  42   %t1 = fadd float %x2, %t0
  43   %t2 = fadd float %t1, %x3
  44   ret float %t2
  45 }
  46
  47 define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
  48 ; SSE-LABEL: reassociate_adds3:
  49 ; SSE:       # BB#0:
  50 ; SSE-NEXT:    addss %xmm1, %xmm0
  51 ; SSE-NEXT:    addss %xmm3, %xmm2
  52 ; SSE-NEXT:    addss %xmm2, %xmm0
  53 ; SSE-NEXT:    retq
  54 ;
  55 ; AVX-LABEL: reassociate_adds3:
  56 ; AVX:       # BB#0:
  57 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  58 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  59 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  60 ; AVX-NEXT:    retq
  61   %t0 = fadd float %x0, %x1
  62   %t1 = fadd float %t0, %x2
  63   %t2 = fadd float %x3, %t1
  64   ret float %t2
  65 }
  66
  67 define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
  68 ; SSE-LABEL: reassociate_adds4:
  69 ; SSE:       # BB#0:
  70 ; SSE-NEXT:    addss %xmm1, %xmm0
  71 ; SSE-NEXT:    addss %xmm3, %xmm2
  72 ; SSE-NEXT:    addss %xmm2, %xmm0
  73 ; SSE-NEXT:    retq
  74 ;
  75 ; AVX-LABEL: reassociate_adds4:
  76 ; AVX:       # BB#0:
  77 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  78 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  79 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  80 ; AVX-NEXT:    retq
  81   %t0 = fadd float %x0, %x1
  82   %t1 = fadd float %x2, %t0
  83   %t2 = fadd float %x3, %t1
  84   ret float %t2
  85 }
  86
  87 ; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
  88 ; produced because that would cost more compile time.
  89
  90 define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
  91 ; SSE-LABEL: reassociate_adds5:
  92 ; SSE:       # BB#0:
  93 ; SSE-NEXT:    addss %xmm1, %xmm0
  94 ; SSE-NEXT:    addss %xmm3, %xmm2
  95 ; SSE-NEXT:    addss %xmm2, %xmm0
  96 ; SSE-NEXT:    addss %xmm5, %xmm4
  97 ; SSE-NEXT:    addss %xmm6, %xmm4
  98 ; SSE-NEXT:    addss %xmm4, %xmm0
  99 ; SSE-NEXT:    addss %xmm7, %xmm0
 100 ; SSE-NEXT:    retq
 101 ;
 102 ; AVX-LABEL: reassociate_adds5:
 103 ; AVX:       # BB#0:
 104 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 105 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
 106 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 107 ; AVX-NEXT:    vaddss %xmm5, %xmm4, %xmm1
 108 ; AVX-NEXT:    vaddss %xmm6, %xmm1, %xmm1
 109 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 110 ; AVX-NEXT:    vaddss %xmm7, %xmm0, %xmm0
 111 ; AVX-NEXT:    retq
 112   %t0 = fadd float %x0, %x1
 113   %t1 = fadd float %t0, %x2
 114   %t2 = fadd float %t1, %x3
 115   %t3 = fadd float %t2, %x4
 116   %t4 = fadd float %t3, %x5
 117   %t5 = fadd float %t4, %x6
 118   %t6 = fadd float %t5, %x7
 119   ret float %t6
 120 }
 121
 122 ; Verify that we only need two associative operations to reassociate the operands.
 123 ; Also, we should reassociate such that the result of the high latency division
 124 ; is used by the final 'add' rather than reassociating the %x3 operand with the
 125 ; division. The latter reassociation would not improve anything.
 126
 127 define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
 128 ; SSE-LABEL: reassociate_adds6:
 129 ; SSE:       # BB#0:
 130 ; SSE-NEXT:    divss %xmm1, %xmm0
 131 ; SSE-NEXT:    addss %xmm3, %xmm2
 132 ; SSE-NEXT:    addss %xmm2, %xmm0
 133 ; SSE-NEXT:    retq
 134 ;
 135 ; AVX-LABEL: reassociate_adds6:
 136 ; AVX:       # BB#0:
 137 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 138 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
 139 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 140 ; AVX-NEXT:    retq
 141   %t0 = fdiv float %x0, %x1
 142   %t1 = fadd float %x2, %t0
 143   %t2 = fadd float %x3, %t1
 144   ret float %t2
 145 }
 146
 147 ; Verify that SSE and AVX scalar single-precision multiplies are reassociated.
 148
 149 define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
 150 ; SSE-LABEL: reassociate_muls1:
 151 ; SSE:       # BB#0:
 152 ; SSE-NEXT:    divss %xmm1, %xmm0
 153 ; SSE-NEXT:    mulss %xmm3, %xmm2
 154 ; SSE-NEXT:    mulss %xmm2, %xmm0
 155 ; SSE-NEXT:    retq
 156 ;
 157 ; AVX-LABEL: reassociate_muls1:
 158 ; AVX:       # BB#0:
 159 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 160 ; AVX-NEXT:    vmulss %xmm3, %xmm2, %xmm1
 161 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 162 ; AVX-NEXT:    retq
 163   %t0 = fdiv float %x0, %x1
 164   %t1 = fmul float %x2, %t0
 165   %t2 = fmul float %x3, %t1
 166   ret float %t2
 167 }
 168
 169 ; Verify that SSE and AVX scalar double-precision adds are reassociated.
 170
 171 define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
 172 ; SSE-LABEL: reassociate_adds_double:
 173 ; SSE:       # BB#0:
 174 ; SSE-NEXT:    divsd %xmm1, %xmm0
 175 ; SSE-NEXT:    addsd %xmm3, %xmm2
 176 ; SSE-NEXT:    addsd %xmm2, %xmm0
 177 ; SSE-NEXT:    retq
 178 ;
 179 ; AVX-LABEL: reassociate_adds_double:
 180 ; AVX:       # BB#0:
 181 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 182 ; AVX-NEXT:    vaddsd %xmm3, %xmm2, %xmm1
 183 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 184 ; AVX-NEXT:    retq
 185   %t0 = fdiv double %x0, %x1
 186   %t1 = fadd double %x2, %t0
 187   %t2 = fadd double %x3, %t1
 188   ret double %t2
 189 }
 190
 191 ; Verify that SSE and AVX scalar double-precision multiplies are reassociated.
 192
 193 define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
 194 ; SSE-LABEL: reassociate_muls_double:
 195 ; SSE:       # BB#0:
 196 ; SSE-NEXT:    divsd %xmm1, %xmm0
 197 ; SSE-NEXT:    mulsd %xmm3, %xmm2
 198 ; SSE-NEXT:    mulsd %xmm2, %xmm0
 199 ; SSE-NEXT:    retq
 200 ;
 201 ; AVX-LABEL: reassociate_muls_double:
 202 ; AVX:       # BB#0:
 203 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 204 ; AVX-NEXT:    vmulsd %xmm3, %xmm2, %xmm1
 205 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 206 ; AVX-NEXT:    retq
 207   %t0 = fdiv double %x0, %x1
 208   %t1 = fmul double %x2, %t0
 209   %t2 = fmul double %x3, %t1
 210   ret double %t2
 211 }
 212
 213 ; Verify that SSE and AVX 128-bit vector single-precision adds are reassociated.
 214
 215 define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 216 ; SSE-LABEL: reassociate_adds_v4f32:
 217 ; SSE:       # BB#0:
 218 ; SSE-NEXT:    mulps %xmm1, %xmm0
 219 ; SSE-NEXT:    addps %xmm3, %xmm2
 220 ; SSE-NEXT:    addps %xmm2, %xmm0
 221 ; SSE-NEXT:    retq
 222 ;
 223 ; AVX-LABEL: reassociate_adds_v4f32:
 224 ; AVX:       # BB#0:
 225 ; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
 226 ; AVX-NEXT:    vaddps %xmm3, %xmm2, %xmm1
 227 ; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
 228 ; AVX-NEXT:    retq
 229   %t0 = fmul <4 x float> %x0, %x1
 230   %t1 = fadd <4 x float> %x2, %t0
 231   %t2 = fadd <4 x float> %x3, %t1
 232   ret <4 x float> %t2
 233 }
 234
 235 ; Verify that SSE and AVX 128-bit vector double-precision adds are reassociated.
 236
 237 define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
 238 ; SSE-LABEL: reassociate_adds_v2f64:
 239 ; SSE:       # BB#0:
 240 ; SSE-NEXT:    mulpd %xmm1, %xmm0
 241 ; SSE-NEXT:    addpd %xmm3, %xmm2
 242 ; SSE-NEXT:    addpd %xmm2, %xmm0
 243 ; SSE-NEXT:    retq
 244 ;
 245 ; AVX-LABEL: reassociate_adds_v2f64:
 246 ; AVX:       # BB#0:
 247 ; AVX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
 248 ; AVX-NEXT:    vaddpd %xmm3, %xmm2, %xmm1
 249 ; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
 250 ; AVX-NEXT:    retq
 251   %t0 = fmul <2 x double> %x0, %x1
 252   %t1 = fadd <2 x double> %x2, %t0
 253   %t2 = fadd <2 x double> %x3, %t1
 254   ret <2 x double> %t2
 255 }
 256
 257 ; Verify that SSE and AVX 128-bit vector single-precision multiplies are reassociated.
 258
 259 define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 260 ; SSE-LABEL: reassociate_muls_v4f32:
 261 ; SSE:       # BB#0:
 262 ; SSE-NEXT:    addps %xmm1, %xmm0
 263 ; SSE-NEXT:    mulps %xmm3, %xmm2
 264 ; SSE-NEXT:    mulps %xmm2, %xmm0
 265 ; SSE-NEXT:    retq
 266 ;
 267 ; AVX-LABEL: reassociate_muls_v4f32:
 268 ; AVX:       # BB#0:
 269 ; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
 270 ; AVX-NEXT:    vmulps %xmm3, %xmm2, %xmm1
 271 ; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
 272 ; AVX-NEXT:    retq
 273   %t0 = fadd <4 x float> %x0, %x1
 274   %t1 = fmul <4 x float> %x2, %t0
 275   %t2 = fmul <4 x float> %x3, %t1
 276   ret <4 x float> %t2
 277 }
 278
 279 ; Verify that SSE and AVX 128-bit vector double-precision multiplies are reassociated.
 280
 281 define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
 282 ; SSE-LABEL: reassociate_muls_v2f64:
 283 ; SSE:       # BB#0:
 284 ; SSE-NEXT:    addpd %xmm1, %xmm0
 285 ; SSE-NEXT:    mulpd %xmm3, %xmm2
 286 ; SSE-NEXT:    mulpd %xmm2, %xmm0
 287 ; SSE-NEXT:    retq
 288 ;
 289 ; AVX-LABEL: reassociate_muls_v2f64:
 290 ; AVX:       # BB#0:
 291 ; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
 292 ; AVX-NEXT:    vmulpd %xmm3, %xmm2, %xmm1
 293 ; AVX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
 294 ; AVX-NEXT:    retq
 295   %t0 = fadd <2 x double> %x0, %x1
 296   %t1 = fmul <2 x double> %x2, %t0
 297   %t2 = fmul <2 x double> %x3, %t1
 298   ret <2 x double> %t2
 299 }
 300
 301 ; Verify that AVX 256-bit vector single-precision adds are reassociated.
 302
 303 define <8 x float> @reassociate_adds_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
 304 ; AVX-LABEL: reassociate_adds_v8f32:
 305 ; AVX:       # BB#0:
 306 ; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
 307 ; AVX-NEXT:    vaddps %ymm3, %ymm2, %ymm1
 308 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
 309 ; AVX-NEXT:    retq
 310   %t0 = fmul <8 x float> %x0, %x1
 311   %t1 = fadd <8 x float> %x2, %t0
 312   %t2 = fadd <8 x float> %x3, %t1
 313   ret <8 x float> %t2
 314 }
 315
 316 ; Verify that AVX 256-bit vector double-precision adds are reassociated.
 317
 318 define <4 x double> @reassociate_adds_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
 319 ; AVX-LABEL: reassociate_adds_v4f64:
 320 ; AVX:       # BB#0:
 321 ; AVX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
 322 ; AVX-NEXT:    vaddpd %ymm3, %ymm2, %ymm1
 323 ; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
 324 ; AVX-NEXT:    retq
 325   %t0 = fmul <4 x double> %x0, %x1
 326   %t1 = fadd <4 x double> %x2, %t0
 327   %t2 = fadd <4 x double> %x3, %t1
 328   ret <4 x double> %t2
 329 }
 330
 331 ; Verify that AVX 256-bit vector single-precision multiplies are reassociated.
 332
 333 define <8 x float> @reassociate_muls_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
 334 ; AVX-LABEL: reassociate_muls_v8f32:
 335 ; AVX:       # BB#0:
 336 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
 337 ; AVX-NEXT:    vmulps %ymm3, %ymm2, %ymm1
 338 ; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
 339 ; AVX-NEXT:    retq
 340   %t0 = fadd <8 x float> %x0, %x1
 341   %t1 = fmul <8 x float> %x2, %t0
 342   %t2 = fmul <8 x float> %x3, %t1
 343   ret <8 x float> %t2
 344 }
 345
 346 ; Verify that AVX 256-bit vector double-precision multiplies are reassociated.
 347
 348 define <4 x double> @reassociate_muls_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
 349 ; AVX-LABEL: reassociate_muls_v4f64:
 350 ; AVX:       # BB#0:
 351 ; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
 352 ; AVX-NEXT:    vmulpd %ymm3, %ymm2, %ymm1
 353 ; AVX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
 354 ; AVX-NEXT:    retq
 355   %t0 = fadd <4 x double> %x0, %x1
 356   %t1 = fmul <4 x double> %x2, %t0
 357   %t2 = fmul <4 x double> %x3, %t1
 358   ret <4 x double> %t2
 359 }
 360
 361 ; Verify that SSE and AVX scalar single-precision minimum ops are reassociated.
 362
 363 define float @reassociate_mins_single(float %x0, float %x1, float %x2, float %x3) {
 364 ; SSE-LABEL: reassociate_mins_single:
 365 ; SSE:       # BB#0:
 366 ; SSE-NEXT:    divss %xmm1, %xmm0
 367 ; SSE-NEXT:    minss %xmm3, %xmm2
 368 ; SSE-NEXT:    minss %xmm2, %xmm0
 369 ; SSE-NEXT:    retq
 370 ;
 371 ; AVX-LABEL: reassociate_mins_single:
 372 ; AVX:       # BB#0:
 373 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 374 ; AVX-NEXT:    vminss %xmm3, %xmm2, %xmm1
 375 ; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
 376 ; AVX-NEXT:    retq
 377   %t0 = fdiv float %x0, %x1
 378   %cmp1 = fcmp olt float %x2, %t0
 379   %sel1 = select i1 %cmp1, float %x2, float %t0
 380   %cmp2 = fcmp olt float %x3, %sel1
 381   %sel2 = select i1 %cmp2, float %x3, float %sel1
 382   ret float %sel2
 383 }
 384
 385 ; Verify that SSE and AVX scalar single-precision maximum ops are reassociated.
 386
 387 define float @reassociate_maxs_single(float %x0, float %x1, float %x2, float %x3) {
 388 ; SSE-LABEL: reassociate_maxs_single:
 389 ; SSE:       # BB#0:
 390 ; SSE-NEXT:    divss %xmm1, %xmm0
 391 ; SSE-NEXT:    maxss %xmm3, %xmm2
 392 ; SSE-NEXT:    maxss %xmm2, %xmm0
 393 ; SSE-NEXT:    retq
 394 ;
 395 ; AVX-LABEL: reassociate_maxs_single:
 396 ; AVX:       # BB#0:
 397 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 398 ; AVX-NEXT:    vmaxss %xmm3, %xmm2, %xmm1
 399 ; AVX-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
 400 ; AVX-NEXT:    retq
 401   %t0 = fdiv float %x0, %x1
 402   %cmp1 = fcmp ogt float %x2, %t0
 403   %sel1 = select i1 %cmp1, float %x2, float %t0
 404   %cmp2 = fcmp ogt float %x3, %sel1
 405   %sel2 = select i1 %cmp2, float %x3, float %sel1
 406   ret float %sel2
 407 }
 408