test/CodeGen/X86/machine-combiner.ll

   1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=SSE
   2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=AVX
   3
   4 ; Verify that the first two adds are independent regardless of how the inputs are
   5 ; commuted. The destination registers are used as source registers for the third add.
   6
   7 define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
   8 ; SSE-LABEL: reassociate_adds1:
   9 ; SSE:       # BB#0:
  10 ; SSE-NEXT:    addss %xmm1, %xmm0
  11 ; SSE-NEXT:    addss %xmm3, %xmm2
  12 ; SSE-NEXT:    addss %xmm2, %xmm0
  13 ; SSE-NEXT:    retq
  14 ;
  15 ; AVX-LABEL: reassociate_adds1:
  16 ; AVX:       # BB#0:
  17 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  18 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  19 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  20 ; AVX-NEXT:    retq
  21   %t0 = fadd float %x0, %x1
  22   %t1 = fadd float %t0, %x2
  23   %t2 = fadd float %t1, %x3
  24   ret float %t2
  25 }
  26
  27 define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
  28 ; SSE-LABEL: reassociate_adds2:
  29 ; SSE:       # BB#0:
  30 ; SSE-NEXT:    addss %xmm1, %xmm0
  31 ; SSE-NEXT:    addss %xmm3, %xmm2
  32 ; SSE-NEXT:    addss %xmm2, %xmm0
  33 ; SSE-NEXT:    retq
  34 ;
  35 ; AVX-LABEL: reassociate_adds2:
  36 ; AVX:       # BB#0:
  37 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  38 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  39 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  40 ; AVX-NEXT:    retq
  41   %t0 = fadd float %x0, %x1
  42   %t1 = fadd float %x2, %t0
  43   %t2 = fadd float %t1, %x3
  44   ret float %t2
  45 }
  46
  47 define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
  48 ; SSE-LABEL: reassociate_adds3:
  49 ; SSE:       # BB#0:
  50 ; SSE-NEXT:    addss %xmm1, %xmm0
  51 ; SSE-NEXT:    addss %xmm3, %xmm2
  52 ; SSE-NEXT:    addss %xmm2, %xmm0
  53 ; SSE-NEXT:    retq
  54 ;
  55 ; AVX-LABEL: reassociate_adds3:
  56 ; AVX:       # BB#0:
  57 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  58 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  59 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  60 ; AVX-NEXT:    retq
  61   %t0 = fadd float %x0, %x1
  62   %t1 = fadd float %t0, %x2
  63   %t2 = fadd float %x3, %t1
  64   ret float %t2
  65 }
  66
  67 define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
  68 ; SSE-LABEL: reassociate_adds4:
  69 ; SSE:       # BB#0:
  70 ; SSE-NEXT:    addss %xmm1, %xmm0
  71 ; SSE-NEXT:    addss %xmm3, %xmm2
  72 ; SSE-NEXT:    addss %xmm2, %xmm0
  73 ; SSE-NEXT:    retq
  74 ;
  75 ; AVX-LABEL: reassociate_adds4:
  76 ; AVX:       # BB#0:
  77 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  78 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  79 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  80 ; AVX-NEXT:    retq
  81   %t0 = fadd float %x0, %x1
  82   %t1 = fadd float %x2, %t0
  83   %t2 = fadd float %x3, %t1
  84   ret float %t2
  85 }
  86
  87 ; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
  88 ; produced because that would cost more compile time.
  89
  90 define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
  91 ; SSE-LABEL: reassociate_adds5:
  92 ; SSE:       # BB#0:
  93 ; SSE-NEXT:    addss %xmm1, %xmm0
  94 ; SSE-NEXT:    addss %xmm3, %xmm2
  95 ; SSE-NEXT:    addss %xmm2, %xmm0
  96 ; SSE-NEXT:    addss %xmm5, %xmm4
  97 ; SSE-NEXT:    addss %xmm6, %xmm4
  98 ; SSE-NEXT:    addss %xmm4, %xmm0
  99 ; SSE-NEXT:    addss %xmm7, %xmm0
 100 ; SSE-NEXT:    retq
 101 ;
 102 ; AVX-LABEL: reassociate_adds5:
 103 ; AVX:       # BB#0:
 104 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 105 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
 106 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 107 ; AVX-NEXT:    vaddss %xmm5, %xmm4, %xmm1
 108 ; AVX-NEXT:    vaddss %xmm6, %xmm1, %xmm1
 109 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 110 ; AVX-NEXT:    vaddss %xmm7, %xmm0, %xmm0
 111 ; AVX-NEXT:    retq
 112   %t0 = fadd float %x0, %x1
 113   %t1 = fadd float %t0, %x2
 114   %t2 = fadd float %t1, %x3
 115   %t3 = fadd float %t2, %x4
 116   %t4 = fadd float %t3, %x5
 117   %t5 = fadd float %t4, %x6
 118   %t6 = fadd float %t5, %x7
 119   ret float %t6
 120 }
 121
 122 ; Verify that we only need two associative operations to reassociate the operands.
 123 ; Also, we should reassociate such that the result of the high latency division
 124 ; is used by the final 'add' rather than reassociating the %x3 operand with the
 125 ; division. The latter reassociation would not improve anything.
 126
 127 define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
 128 ; SSE-LABEL: reassociate_adds6:
 129 ; SSE:       # BB#0:
 130 ; SSE-NEXT:    divss %xmm1, %xmm0
 131 ; SSE-NEXT:    addss %xmm3, %xmm2
 132 ; SSE-NEXT:    addss %xmm2, %xmm0
 133 ; SSE-NEXT:    retq
 134 ;
 135 ; AVX-LABEL: reassociate_adds6:
 136 ; AVX:       # BB#0:
 137 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 138 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
 139 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 140 ; AVX-NEXT:    retq
 141   %t0 = fdiv float %x0, %x1
 142   %t1 = fadd float %x2, %t0
 143   %t2 = fadd float %x3, %t1
 144   ret float %t2
 145 }
 146
 147 ; Verify that SSE and AVX scalar single precison multiplies are reassociated.
 148
 149 define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
 150 ; SSE-LABEL: reassociate_muls1:
 151 ; SSE:       # BB#0:
 152 ; SSE-NEXT:    divss %xmm1, %xmm0
 153 ; SSE-NEXT:    mulss %xmm3, %xmm2
 154 ; SSE-NEXT:    mulss %xmm2, %xmm0
 155 ; SSE-NEXT:    retq
 156 ;
 157 ; AVX-LABEL: reassociate_muls1:
 158 ; AVX:       # BB#0:
 159 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 160 ; AVX-NEXT:    vmulss %xmm3, %xmm2, %xmm1
 161 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 162 ; AVX-NEXT:    retq
 163   %t0 = fdiv float %x0, %x1
 164   %t1 = fmul float %x2, %t0
 165   %t2 = fmul float %x3, %t1
 166   ret float %t2
 167 }