test/CodeGen/R600/llvm.AMDGPU.rcp.ll

   1 ; RUN: llc -march=r600 -mcpu=SI -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
   2 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
   3
   4 declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone
   5 declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
   6
   7
   8 declare float @llvm.sqrt.f32(float) nounwind readnone
   9 declare double @llvm.sqrt.f64(double) nounwind readnone
  10
  11 ; FUNC-LABEL: @rcp_f32
  12 ; SI: V_RCP_F32_e32
  13 define void @rcp_f32(float addrspace(1)* %out, float %src) nounwind {
  14   %rcp = call float @llvm.AMDGPU.rcp.f32(float %src) nounwind readnone
  15   store float %rcp, float addrspace(1)* %out, align 4
  16   ret void
  17 }
  18
  19 ; FUNC-LABEL: @rcp_f64
  20 ; SI: V_RCP_F64_e32
  21 define void @rcp_f64(double addrspace(1)* %out, double %src) nounwind {
  22   %rcp = call double @llvm.AMDGPU.rcp.f64(double %src) nounwind readnone
  23   store double %rcp, double addrspace(1)* %out, align 8
  24   ret void
  25 }
  26
  27 ; FUNC-LABEL: @rcp_pat_f32
  28 ; SI-UNSAFE-NOT: V_MUL_F32
  29 ; SI-UNSAFE: V_RCP_F32_e32
  30 ; SI-UNSAFE-NOT: V_MUL_F32
  31
  32 ; Check for surrounding multiplies the correct divide has.
  33 ; SI-SAFE: V_MUL_F32
  34 ; SI-SAFE: V_RCP_F32_e32
  35 ; SI-SAFE: V_MUL_F32
  36
  37 define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
  38   %rcp = fdiv float 1.0, %src
  39   store float %rcp, float addrspace(1)* %out, align 4
  40   ret void
  41 }
  42
  43 ; FUNC-LABEL: @rcp_pat_f64
  44 ; SI: V_RCP_F64_e32
  45 define void @rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
  46   %rcp = fdiv double 1.0, %src
  47   store double %rcp, double addrspace(1)* %out, align 8
  48   ret void
  49 }
  50
  51 ; FUNC-LABEL: @rsq_rcp_pat_f32
  52 ; SI-UNSAFE: V_RSQ_F32_e32
  53 ; SI-SAFE: V_SQRT_F32_e32
  54 ; SI-SAFE: V_RCP_F32_e32
  55 define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
  56   %sqrt = call float @llvm.sqrt.f32(float %src) nounwind readnone
  57   %rcp = call float @llvm.AMDGPU.rcp.f32(float %sqrt) nounwind readnone
  58   store float %rcp, float addrspace(1)* %out, align 4
  59   ret void
  60 }
  61
  62 ; FUNC-LABEL: @rsq_rcp_pat_f64
  63 ; SI: V_RSQ_F64_e32
  64 define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
  65   %sqrt = call double @llvm.sqrt.f64(double %src) nounwind readnone
  66   %rcp = call double @llvm.AMDGPU.rcp.f64(double %sqrt) nounwind readnone
  67   store double %rcp, double addrspace(1)* %out, align 8
  68   ret void
  69 }