From: Matt Arsenault Date: Tue, 6 Jan 2015 23:00:41 +0000 (+0000) Subject: R600/SI: Pattern match isinf to v_cmp_class instructions X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=42d9f7cf0a06dc7cba9286b33bd60e67a1de79d6;p=oota-llvm.git R600/SI: Pattern match isinf to v_cmp_class instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225307 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 9f0d7936785..0690792fb72 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -1400,6 +1400,37 @@ SDValue SITargetLowering::performMin3Max3Combine(SDNode *N, return SDValue(); } +SDValue SITargetLowering::performSetCCCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDLoc SL(N); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + EVT VT = LHS.getValueType(); + + if (VT != MVT::f32 && VT != MVT::f64) + return SDValue(); + + // Match isinf pattern + // (fcmp oeq (fabs x), inf) -> (fp_class x, (p_infinity | n_infinity)) + ISD::CondCode CC = cast(N->getOperand(2))->get(); + if (CC == ISD::SETOEQ && LHS.getOpcode() == ISD::FABS) { + const ConstantFPSDNode *CRHS = dyn_cast(RHS); + if (!CRHS) + return SDValue(); + + const APFloat &APF = CRHS->getValueAPF(); + if (APF.isInfinity() && !APF.isNegative()) { + unsigned Mask = SIInstrFlags::P_INFINITY | SIInstrFlags::N_INFINITY; + return DAG.getNode(AMDGPUISD::FP_CLASS, SL, MVT::i1, + LHS.getOperand(0), DAG.getConstant(Mask, MVT::i32)); + } + } + + return SDValue(); +} + SDValue SITargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -1408,6 +1439,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, switch (N->getOpcode()) { default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); + case ISD::SETCC: + return performSetCCCombine(N, DCI); case ISD::FMAXNUM: // TODO: What about fmax_legacy? case ISD::FMINNUM: case AMDGPUISD::SMAX: diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 4da86280711..44b25dcd844 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -62,6 +62,7 @@ class SITargetLowering : public AMDGPUTargetLowering { SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performMin3Max3Combine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const; public: SITargetLowering(TargetMachine &tm); diff --git a/test/CodeGen/R600/fp-classify.ll b/test/CodeGen/R600/fp-classify.ll new file mode 100644 index 00000000000..e6ca5efd767 --- /dev/null +++ b/test/CodeGen/R600/fp-classify.ll @@ -0,0 +1,45 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare i1 @llvm.AMDGPU.class.f32(float, i32) #1 +declare i1 @llvm.AMDGPU.class.f64(double, i32) #1 +declare i32 @llvm.r600.read.tidig.x() #1 +declare float @llvm.fabs.f32(float) #1 +declare double @llvm.fabs.f64(double) #1 + +; SI-LABEL: {{^}}test_isinf_pattern: +; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x204{{$}} +; SI: v_cmp_class_f32_e32 vcc, s{{[0-9]+}}, [[MASK]] +; SI-NOT: v_cmp +; SI: s_endpgm +define void @test_isinf_pattern(i32 addrspace(1)* nocapture %out, float %x) #0 { + %fabs = tail call float @llvm.fabs.f32(float %x) #1 + %cmp = fcmp oeq float %fabs, 0x7FF0000000000000 + %ext = zext i1 %cmp to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_not_isinf_pattern_0: +; SI-NOT: v_cmp_class +; SI: s_endpgm +define void @test_not_isinf_pattern_0(i32 addrspace(1)* nocapture %out, float %x) #0 { + %fabs = tail call float @llvm.fabs.f32(float %x) #1 + %cmp = fcmp ueq float %fabs, 0x7FF0000000000000 + %ext = zext i1 %cmp to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_not_isinf_pattern_1: +; SI-NOT: v_cmp_class +; SI: s_endpgm +define void @test_not_isinf_pattern_1(i32 addrspace(1)* nocapture %out, float %x) #0 { + %fabs = tail call float @llvm.fabs.f32(float %x) #1 + %cmp = fcmp oeq float %fabs, 0xFFF0000000000000 + %ext = zext i1 %cmp to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone }