def hasLDU : Predicate<"Subtarget.hasLDU()">;
def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
-def doF32FTZ : Predicate<"UseF32FTZ">;
+def doF32FTZ : Predicate<"UseF32FTZ==1">;
+def doNoF32FTZ : Predicate<"UseF32FTZ==0">;
def doFMAF32 : Predicate<"doFMAF32">;
def doFMAF32_ftz : Predicate<"(doFMAF32 && UseF32FTZ)">;
(fdiv Float32Regs:$a, fpimm:$b))]>,
Requires<[reqPTX20]>;
+//
+// F32 rsqrt
+//
+
+def RSQRTF32approx1r : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$b),
+ "rsqrt.approx.f32 \t$dst, $b;", []>;
+
+def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$b)),
+ (RSQRTF32approx1r Float32Regs:$b)>,
+ Requires<[do_DIVF32_FULL, do_SQRTF32_APPROX, doNoF32FTZ]>;
multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
def rrr : NVPTXInst<(outs Float32Regs:$dst),
--- /dev/null
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-prec-divf32=1 -nvptx-prec-sqrtf32=0 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+declare float @llvm.nvvm.sqrt.f(float)
+
+define float @foo(float %a) {
+; CHECK: rsqrt.approx.f32
+ %val = tail call float @llvm.nvvm.sqrt.f(float %a)
+ %ret = fdiv float 1.0, %val
+ ret float %ret
+}
+
\ No newline at end of file