test/CodeGen/X86/machine-combiner.ll

   1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s
   2
   3 ; Verify that the first two adds are independent regardless of how the inputs are
   4 ; commuted. The destination registers are used as source registers for the third add.
   5
   6 define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
   7 ; CHECK-LABEL: reassociate_adds1:
   8 ; CHECK:       # BB#0:
   9 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  10 ; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  11 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  12 ; CHECK-NEXT:    retq
  13   %t0 = fadd float %x0, %x1
  14   %t1 = fadd float %t0, %x2
  15   %t2 = fadd float %t1, %x3
  16   ret float %t2
  17 }
  18
  19 define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
  20 ; CHECK-LABEL: reassociate_adds2:
  21 ; CHECK:       # BB#0:
  22 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  23 ; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  24 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  25 ; CHECK-NEXT:    retq
  26   %t0 = fadd float %x0, %x1
  27   %t1 = fadd float %x2, %t0
  28   %t2 = fadd float %t1, %x3
  29   ret float %t2
  30 }
  31
  32 define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
  33 ; CHECK-LABEL: reassociate_adds3:
  34 ; CHECK:       # BB#0:
  35 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  36 ; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  37 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  38 ; CHECK-NEXT:    retq
  39   %t0 = fadd float %x0, %x1
  40   %t1 = fadd float %t0, %x2
  41   %t2 = fadd float %x3, %t1
  42   ret float %t2
  43 }
  44
  45 define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
  46 ; CHECK-LABEL: reassociate_adds4:
  47 ; CHECK:       # BB#0:
  48 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  49 ; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  50 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  51 ; CHECK-NEXT:    retq
  52   %t0 = fadd float %x0, %x1
  53   %t1 = fadd float %x2, %t0
  54   %t2 = fadd float %x3, %t1
  55   ret float %t2
  56 }
  57
  58 ; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
  59 ; produced because that would cost more compile time.
  60
  61 define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
  62 ; CHECK-LABEL: reassociate_adds5:
  63 ; CHECK:       # BB#0:
  64 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  65 ; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  66 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  67 ; CHECK-NEXT:    vaddss %xmm5, %xmm4, %xmm1
  68 ; CHECK-NEXT:    vaddss %xmm6, %xmm1, %xmm1
  69 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  70 ; CHECK-NEXT:    vaddss %xmm7, %xmm0, %xmm0
  71 ; CHECK-NEXT:    retq
  72   %t0 = fadd float %x0, %x1
  73   %t1 = fadd float %t0, %x2
  74   %t2 = fadd float %t1, %x3
  75   %t3 = fadd float %t2, %x4
  76   %t4 = fadd float %t3, %x5
  77   %t5 = fadd float %t4, %x6
  78   %t6 = fadd float %t5, %x7
  79   ret float %t6
  80 }
  81
  82 ; Verify that we only need two associative operations to reassociate the operands.
  83 ; Also, we should reassociate such that the result of the high latency division
  84 ; is used by the final 'add' rather than reassociating the %x3 operand with the
  85 ; division. The latter reassociation would not improve anything.
  86
  87 define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
  88 ; CHECK-LABEL: reassociate_adds6:
  89 ; CHECK:       # BB#0:
  90 ; CHECK-NEXT:    vdivss %xmm1, %xmm0, %xmm0
  91 ; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  92 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  93 ; CHECK-NEXT:    retq
  94   %t0 = fdiv float %x0, %x1
  95   %t1 = fadd float %x2, %t0
  96   %t2 = fadd float %x3, %t1
  97   ret float %t2
  98 }
  99