R600/SI: Add pattern for AMDGPUurecip

author Michel Danzer <michel.daenzer@amd.com>

Wed, 10 Apr 2013 17:17:56 +0000 (17:17 +0000)

committer Michel Danzer <michel.daenzer@amd.com>

Wed, 10 Apr 2013 17:17:56 +0000 (17:17 +0000)
author Michel Danzer <michel.daenzer@amd.com>
Wed, 10 Apr 2013 17:17:56 +0000 (17:17 +0000)
committer Michel Danzer <michel.daenzer@amd.com>
Wed, 10 Apr 2013 17:17:56 +0000 (17:17 +0000)
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td

index e740348717c7e19462e225e450e7190cd3776774..fa890c15e6d33e7b4bc82721dd241a9d9dbd5deb 100644 (file)
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -94,6 +94,7 @@ class Constants {
  int TWO_PI = 0x40c90fdb;
  int PI = 0x40490fdb;
  int TWO_PI_INV = 0x3e22f983;
+int FP_UINT_MAX_PLUS_1 = 0x4f800000;   // 1 << 32 in floating point encoding
  }
  def CONST : Constants;
  
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td

index b4c45e18fc10c7b10d34236bc971acb1873b4ff9..8ede6ccc5a2910e702edf02c8391d2788f12af57 100644 (file)
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -1923,10 +1923,11 @@ def : COS_PAT <COS_cm>;
  defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
  
  // RECIP_UINT emulation for Cayman
+// The multiplication scales from [0,1] to the unsigned integer range
  def : Pat <
    (AMDGPUurecip R600_Reg32:$src0),
    (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)),
-                            (MOV_IMM_I32 0x4f800000)))
+                            (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
  >;
  
  
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td

index e2a08fcca97f4d41cf6f3ed4862905d5550204fa..e481ef96fead5d896be6637f35a1b575b4476115 100644 (file)
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -602,8 +602,8 @@ defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
  defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
    [(set VReg_32:$dst, (sint_to_fp VSrc_32:$src0))]
  >;
-//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
-//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
+defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
+defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
  defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
    [(set (i32 VReg_32:$dst), (fp_to_sint VSrc_32:$src0))]
  >;
@@ -1514,6 +1514,14 @@ def : Pat <
    (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0)
  >;
  
+// The multiplication scales from [0,1] to the unsigned integer range
+def : Pat <
+  (AMDGPUurecip i32:$src0),
+  (V_CVT_U32_F32_e32
+    (V_MUL_F32_e32 CONST.FP_UINT_MAX_PLUS_1,
+                   (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
+>;
+
  /********** ================== **********/
  /**********   VOP3 Patterns    **********/
  /********** ================== **********/
diff --git a/test/CodeGen/R600/urecip.ll b/test/CodeGen/R600/urecip.ll

new file mode 100644 (file)

index 0000000..dad02dd
--- /dev/null
+++ b/test/CodeGen/R600/urecip.ll
@@ -0,0 +1,12 @@
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+
+;CHECK: V_RCP_IFLAG_F32_e32
+
+define void @test(i32 %p, i32 %q) {
+   %i = udiv i32 %p, %q
+   %r = bitcast i32 %i to float
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
+   ret void
+}
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
author	Michel Danzer <michel.daenzer@amd.com>
	Wed, 10 Apr 2013 17:17:56 +0000 (17:17 +0000)
committer	Michel Danzer <michel.daenzer@amd.com>
	Wed, 10 Apr 2013 17:17:56 +0000 (17:17 +0000)
lib/Target/R600/AMDGPUInstructions.td		patch \| blob \| history
lib/Target/R600/R600Instructions.td		patch \| blob \| history
lib/Target/R600/SIInstructions.td		patch \| blob \| history
test/CodeGen/R600/urecip.ll	[new file with mode: 0644]	patch \| blob