test/CodeGen/X86/recip-fastmath.ll

   1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core2 | FileCheck %s
   2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
   3
   4 ; If the target's divss/divps instructions are substantially
   5 ; slower than rcpss/rcpps with a Newton-Raphson refinement,
   6 ; we should generate the estimate sequence.
   7
   8 ; See PR21385 ( http://llvm.org/bugs/show_bug.cgi?id=21385 )
   9 ; for details about the accuracy, speed, and implementation
  10 ; differences of x86 reciprocal estimates.
  11
  12 define float @reciprocal_estimate(float %x) #0 {
  13   %div = fdiv fast float 1.0, %x
  14   ret float %div
  15
  16 ; CHECK-LABEL: reciprocal_estimate:
  17 ; CHECK: movss
  18 ; CHECK-NEXT: divss
  19 ; CHECK-NEXT: movaps
  20 ; CHECK-NEXT: retq
  21
  22 ; BTVER2-LABEL: reciprocal_estimate:
  23 ; BTVER2: vrcpss
  24 ; BTVER2-NEXT: vmulss
  25 ; BTVER2-NEXT: vsubss
  26 ; BTVER2-NEXT: vmulss
  27 ; BTVER2-NEXT: vaddss
  28 ; BTVER2-NEXT: retq
  29 }
  30
  31 define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 {
  32   %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
  33   ret <4 x float> %div
  34
  35 ; CHECK-LABEL: reciprocal_estimate_v4f32:
  36 ; CHECK: movaps
  37 ; CHECK-NEXT: divps
  38 ; CHECK-NEXT: movaps
  39 ; CHECK-NEXT: retq
  40
  41 ; BTVER2-LABEL: reciprocal_estimate_v4f32:
  42 ; BTVER2: vrcpps
  43 ; BTVER2-NEXT: vmulps
  44 ; BTVER2-NEXT: vsubps
  45 ; BTVER2-NEXT: vmulps
  46 ; BTVER2-NEXT: vaddps
  47 ; BTVER2-NEXT: retq
  48 }
  49
  50 define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 {
  51   %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
  52   ret <8 x float> %div
  53
  54 ; CHECK-LABEL: reciprocal_estimate_v8f32:
  55 ; CHECK: movaps
  56 ; CHECK: movaps
  57 ; CHECK-NEXT: divps
  58 ; CHECK-NEXT: divps
  59 ; CHECK-NEXT: movaps
  60 ; CHECK-NEXT: movaps
  61 ; CHECK-NEXT: retq
  62
  63 ; BTVER2-LABEL: reciprocal_estimate_v8f32:
  64 ; BTVER2: vrcpps
  65 ; BTVER2-NEXT: vmulps
  66 ; BTVER2-NEXT: vsubps
  67 ; BTVER2-NEXT: vmulps
  68 ; BTVER2-NEXT: vaddps
  69 ; BTVER2-NEXT: retq
  70 }
  71
  72 attributes #0 = { "unsafe-fp-math"="true" }