From: Tom Stellard Date: Sat, 28 Sep 2013 02:50:50 +0000 (+0000) Subject: R600: Fix handling of NAN in comparison instructions X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=9c598cfebcc3387676995873e65ae4fed96b3edc;p=oota-llvm.git R600: Fix handling of NAN in comparison instructions We were completely ignoring the unorder/ordered attributes of condition codes and also incorrectly lowering seto and setuo. Reviewed-by: Vincent Lejeune git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191603 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index e30abc08bdb..5778a8c2b23 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -43,12 +43,23 @@ def COND_EQ : PatLeaf < case ISD::SETEQ: return true;}}}] >; +def COND_OEQ : PatLeaf < + (cond), + [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}] +>; + def COND_NE : PatLeaf < (cond), [{switch(N->get()){{default: return false; case ISD::SETONE: case ISD::SETUNE: case ISD::SETNE: return true;}}}] >; + +def COND_UNE : PatLeaf < + (cond), + [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}] +>; + def COND_GT : PatLeaf < (cond), [{switch(N->get()){{default: return false; @@ -56,6 +67,11 @@ def COND_GT : PatLeaf < case ISD::SETGT: return true;}}}] >; +def COND_OGT : PatLeaf < + (cond), + [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}] +>; + def COND_GE : PatLeaf < (cond), [{switch(N->get()){{default: return false; @@ -63,6 +79,11 @@ def COND_GE : PatLeaf < case ISD::SETGE: return true;}}}] >; +def COND_OGE : PatLeaf < + (cond), + [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}] +>; + def COND_LT : PatLeaf < (cond), [{switch(N->get()){{default: return false; diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 5e9048a7019..e17dcd4b73f 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -38,10 +38,17 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : computeRegisterProperties(); - setCondCodeAction(ISD::SETLE, MVT::f32, Expand); + // Set condition code actions + setCondCodeAction(ISD::SETO, MVT::f32, Expand); + setCondCodeAction(ISD::SETUO, MVT::f32, Expand); setCondCodeAction(ISD::SETLT, MVT::f32, Expand); + setCondCodeAction(ISD::SETLE, MVT::f32, Expand); setCondCodeAction(ISD::SETOLT, MVT::f32, Expand); setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); + setCondCodeAction(ISD::SETONE, MVT::f32, Expand); + setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); + setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); + setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); setCondCodeAction(ISD::SETULT, MVT::f32, Expand); setCondCodeAction(ISD::SETULE, MVT::f32, Expand); diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 65ea04bd1e4..e92385dd1d3 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -689,42 +689,42 @@ def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>; // XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics. def SETE : R600_2OP < 0x08, "SETE", - [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_EQ))] + [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OEQ))] >; def SGT : R600_2OP < 0x09, "SETGT", - [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GT))] + [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGT))] >; def SGE : R600_2OP < 0xA, "SETGE", - [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GE))] + [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGE))] >; def SNE : R600_2OP < 0xB, "SETNE", - [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_NE))] + [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_UNE))] >; def SETE_DX10 : R600_2OP < 0xC, "SETE_DX10", - [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_EQ))] + [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OEQ))] >; def SETGT_DX10 : R600_2OP < 0xD, "SETGT_DX10", - [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GT))] + [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGT))] >; def SETGE_DX10 : R600_2OP < 0xE, "SETGE_DX10", - [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GE))] + [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGE))] >; def SETNE_DX10 : R600_2OP < 0xF, "SETNE_DX10", - [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_NE))] + [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_UNE))] >; def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>; @@ -920,19 +920,19 @@ class MULADD_IEEE_Common inst> : R600_3OP < class CNDE_Common inst> : R600_3OP < inst, "CNDE", - [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_EQ))] + [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))] >; class CNDGT_Common inst> : R600_3OP < inst, "CNDGT", - [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GT))] + [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGT))] > { let Itinerary = VecALU; } class CNDGE_Common inst> : R600_3OP < inst, "CNDGE", - [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GE))] + [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGE))] > { let Itinerary = VecALU; } @@ -2324,38 +2324,6 @@ def KIL : Pat < (MASK_WRITE (KILLGT (f32 ZERO), $src0)) >; -// The next two patterns are special cases for handling 'true if ordered' and -// 'true if unordered' conditionals. The assumption here is that the behavior of -// SETE and SNE conforms to the Direct3D 10 rules for floating point values -// described here: -// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit -// We assume that SETE returns false when one of the operands is NAN and -// SNE returns true when on of the operands is NAN - -//SETE - 'true if ordered' -def : Pat < - (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETO), - (SETE $src0, $src1) ->; - -//SETE_DX10 - 'true if ordered' -def : Pat < - (selectcc f32:$src0, f32:$src1, -1, 0, SETO), - (SETE_DX10 $src0, $src1) ->; - -//SNE - 'true if unordered' -def : Pat < - (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETUO), - (SNE $src0, $src1) ->; - -//SETNE_DX10 - 'true if ordered' -def : Pat < - (selectcc f32:$src0, f32:$src1, -1, 0, SETUO), - (SETNE_DX10 $src0, $src1) ->; - def : Extract_Element ; def : Extract_Element ; def : Extract_Element ; diff --git a/test/CodeGen/R600/fmax.ll b/test/CodeGen/R600/fmax.ll index 8b704e56484..be25c9ce8d8 100644 --- a/test/CodeGen/R600/fmax.ll +++ b/test/CodeGen/R600/fmax.ll @@ -5,7 +5,7 @@ define void @test() { %r0 = call float @llvm.R600.load.input(i32 0) %r1 = call float @llvm.R600.load.input(i32 1) - %r2 = fcmp uge float %r0, %r1 + %r2 = fcmp oge float %r0, %r1 %r3 = select i1 %r2, float %r0, float %r1 call void @llvm.AMDGPU.store.output(float %r3, i32 0) ret void diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll index 8bdb050a126..0baa3cd3e1a 100644 --- a/test/CodeGen/R600/kcache-fold.ll +++ b/test/CodeGen/R600/kcache-fold.ll @@ -10,7 +10,7 @@ main_body: %3 = extractelement <4 x float> %2, i32 0 %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %5 = extractelement <4 x float> %4, i32 0 - %6 = fcmp ult float %1, 0.000000e+00 + %6 = fcmp ogt float %1, 0.000000e+00 %7 = select i1 %6, float %3, float %5 %8 = load <4 x float> addrspace(8)* null %9 = extractelement <4 x float> %8, i32 1 @@ -18,7 +18,7 @@ main_body: %11 = extractelement <4 x float> %10, i32 1 %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %13 = extractelement <4 x float> %12, i32 1 - %14 = fcmp ult float %9, 0.000000e+00 + %14 = fcmp ogt float %9, 0.000000e+00 %15 = select i1 %14, float %11, float %13 %16 = load <4 x float> addrspace(8)* null %17 = extractelement <4 x float> %16, i32 2 @@ -26,7 +26,7 @@ main_body: %19 = extractelement <4 x float> %18, i32 2 %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %21 = extractelement <4 x float> %20, i32 2 - %22 = fcmp ult float %17, 0.000000e+00 + %22 = fcmp ogt float %17, 0.000000e+00 %23 = select i1 %22, float %19, float %21 %24 = load <4 x float> addrspace(8)* null %25 = extractelement <4 x float> %24, i32 3 @@ -34,7 +34,7 @@ main_body: %27 = extractelement <4 x float> %26, i32 3 %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %29 = extractelement <4 x float> %28, i32 3 - %30 = fcmp ult float %25, 0.000000e+00 + %30 = fcmp ogt float %25, 0.000000e+00 %31 = select i1 %30, float %27, float %29 %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00) %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00) @@ -58,7 +58,7 @@ main_body: %3 = extractelement <4 x float> %2, i32 0 %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %5 = extractelement <4 x float> %4, i32 1 - %6 = fcmp ult float %1, 0.000000e+00 + %6 = fcmp ogt float %1, 0.000000e+00 %7 = select i1 %6, float %3, float %5 %8 = load <4 x float> addrspace(8)* null %9 = extractelement <4 x float> %8, i32 1 @@ -66,7 +66,7 @@ main_body: %11 = extractelement <4 x float> %10, i32 0 %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %13 = extractelement <4 x float> %12, i32 1 - %14 = fcmp ult float %9, 0.000000e+00 + %14 = fcmp ogt float %9, 0.000000e+00 %15 = select i1 %14, float %11, float %13 %16 = load <4 x float> addrspace(8)* null %17 = extractelement <4 x float> %16, i32 2 @@ -74,7 +74,7 @@ main_body: %19 = extractelement <4 x float> %18, i32 3 %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %21 = extractelement <4 x float> %20, i32 2 - %22 = fcmp ult float %17, 0.000000e+00 + %22 = fcmp ogt float %17, 0.000000e+00 %23 = select i1 %22, float %19, float %21 %24 = load <4 x float> addrspace(8)* null %25 = extractelement <4 x float> %24, i32 3 @@ -82,7 +82,7 @@ main_body: %27 = extractelement <4 x float> %26, i32 3 %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %29 = extractelement <4 x float> %28, i32 2 - %30 = fcmp ult float %25, 0.000000e+00 + %30 = fcmp ogt float %25, 0.000000e+00 %31 = select i1 %30, float %27, float %29 %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00) %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00) diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll index 6e0b7440838..6d9396cb7d1 100644 --- a/test/CodeGen/R600/pv.ll +++ b/test/CodeGen/R600/pv.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=r600 | FileCheck %s ;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED) -;CHECK: CNDGE T{{[0-9].[XYZW]}}, PV.X +;CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X define void @main() #0 { main_body: diff --git a/test/CodeGen/R600/selectcc-opt.ll b/test/CodeGen/R600/selectcc-opt.ll index 7e2d55950af..834c0306952 100644 --- a/test/CodeGen/R600/selectcc-opt.ll +++ b/test/CodeGen/R600/selectcc-opt.ll @@ -6,7 +6,7 @@ define void @test_a(i32 addrspace(1)* %out, float %in) { entry: - %0 = fcmp ult float %in, 0.000000e+00 + %0 = fcmp olt float %in, 0.000000e+00 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 %2 = fsub float -0.000000e+00, %1 %3 = fptosi float %2 to i32 @@ -34,7 +34,7 @@ ENDIF: ; CHECK-NEXT: ALU clause starting define void @test_b(i32 addrspace(1)* %out, float %in) { entry: - %0 = fcmp ult float %in, 0.0 + %0 = fcmp olt float %in, 0.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 %2 = fsub float -0.000000e+00, %1 %3 = fptosi float %2 to i32 diff --git a/test/CodeGen/R600/set-dx10.ll b/test/CodeGen/R600/set-dx10.ll index bdc2ff40b76..5c7d4998d07 100644 --- a/test/CodeGen/R600/set-dx10.ll +++ b/test/CodeGen/R600/set-dx10.ll @@ -30,13 +30,13 @@ entry: ret void } -; CHECK: @fcmp_ueq_select_fptosi +; CHECK: @fcmp_oeq_select_fptosi ; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) -define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) { +define void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: - %0 = fcmp ueq float %in, 5.0 + %0 = fcmp oeq float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 %2 = fsub float -0.000000e+00, %1 %3 = fptosi float %2 to i32 @@ -44,25 +44,25 @@ entry: ret void } -; CHECK: @fcmp_ueq_select_i32 +; CHECK: @fcmp_oeq_select_i32 ; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) -define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) { +define void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) { entry: - %0 = fcmp ueq float %in, 5.0 + %0 = fcmp oeq float %in, 5.0 %1 = select i1 %0, i32 -1, i32 0 store i32 %1, i32 addrspace(1)* %out ret void } -; CHECK: @fcmp_ugt_select_fptosi +; CHECK: @fcmp_ogt_select_fptosi ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) -define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) { +define void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: - %0 = fcmp ugt float %in, 5.0 + %0 = fcmp ogt float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 %2 = fsub float -0.000000e+00, %1 %3 = fptosi float %2 to i32 @@ -70,25 +70,25 @@ entry: ret void } -; CHECK: @fcmp_ugt_select_i32 +; CHECK: @fcmp_ogt_select_i32 ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) -define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) { +define void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) { entry: - %0 = fcmp ugt float %in, 5.0 + %0 = fcmp ogt float %in, 5.0 %1 = select i1 %0, i32 -1, i32 0 store i32 %1, i32 addrspace(1)* %out ret void } -; CHECK: @fcmp_uge_select_fptosi +; CHECK: @fcmp_oge_select_fptosi ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) -define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) { +define void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: - %0 = fcmp uge float %in, 5.0 + %0 = fcmp oge float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 %2 = fsub float -0.000000e+00, %1 %3 = fptosi float %2 to i32 @@ -96,25 +96,25 @@ entry: ret void } -; CHECK: @fcmp_uge_select_i32 +; CHECK: @fcmp_oge_select_i32 ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) -define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) { +define void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) { entry: - %0 = fcmp uge float %in, 5.0 + %0 = fcmp oge float %in, 5.0 %1 = select i1 %0, i32 -1, i32 0 store i32 %1, i32 addrspace(1)* %out ret void } -; CHECK: @fcmp_ule_select_fptosi +; CHECK: @fcmp_ole_select_fptosi ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, ; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) -define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) { +define void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: - %0 = fcmp ule float %in, 5.0 + %0 = fcmp ole float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 %2 = fsub float -0.000000e+00, %1 %3 = fptosi float %2 to i32 @@ -122,25 +122,25 @@ entry: ret void } -; CHECK: @fcmp_ule_select_i32 +; CHECK: @fcmp_ole_select_i32 ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, ; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) -define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) { +define void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) { entry: - %0 = fcmp ule float %in, 5.0 + %0 = fcmp ole float %in, 5.0 %1 = select i1 %0, i32 -1, i32 0 store i32 %1, i32 addrspace(1)* %out ret void } -; CHECK: @fcmp_ult_select_fptosi +; CHECK: @fcmp_olt_select_fptosi ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, ; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) -define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) { +define void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: - %0 = fcmp ult float %in, 5.0 + %0 = fcmp olt float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 %2 = fsub float -0.000000e+00, %1 %3 = fptosi float %2 to i32 @@ -148,13 +148,13 @@ entry: ret void } -; CHECK: @fcmp_ult_select_i32 +; CHECK: @fcmp_olt_select_i32 ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, ; CHECK-NEXT: LSHR ; CHECK-NEXT: 1084227584(5.000000e+00) -define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) { +define void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) { entry: - %0 = fcmp ult float %in, 5.0 + %0 = fcmp olt float %in, 5.0 %1 = select i1 %0, i32 -1, i32 0 store i32 %1, i32 addrspace(1)* %out ret void diff --git a/test/CodeGen/R600/unsupported-cc.ll b/test/CodeGen/R600/unsupported-cc.ll index d3aa060adc0..f986a0251dc 100644 --- a/test/CodeGen/R600/unsupported-cc.ll +++ b/test/CodeGen/R600/unsupported-cc.ll @@ -2,7 +2,7 @@ ; These tests are for condition codes that are not supported by the hardware -; CHECK: @slt +; CHECK-LABEL: @slt ; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z ; CHECK-NEXT: LSHR ; CHECK-NEXT: 5(7.006492e-45) @@ -14,7 +14,7 @@ entry: ret void } -; CHECK: @ult_i32 +; CHECK-LABEL: @ult_i32 ; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z ; CHECK-NEXT: LSHR ; CHECK-NEXT: 5(7.006492e-45) @@ -26,10 +26,11 @@ entry: ret void } -; CHECK: @ult_float -; CHECK: SETGT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR +; CHECK-LABEL: @ult_float +; CHECK: SETGE * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x ; CHECK-NEXT: 1084227584(5.000000e+00) +; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0 +; CHECK-NEXT: LSHR * define void @ult_float(float addrspace(1)* %out, float %in) { entry: %0 = fcmp ult float %in, 5.0 @@ -38,10 +39,22 @@ entry: ret void } -; CHECK: @olt -; CHECK: SETGT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR -;CHECK-NEXT: 1084227584(5.000000e+00) +; CHECK-LABEL: @ult_float_native +; CHECK: SETGE T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x +; CHECK-NEXT: LSHR * +; CHECK-NEXT: 1084227584(5.000000e+00) +define void @ult_float_native(float addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ult float %in, 5.0 + %1 = select i1 %0, float 0.0, float 1.0 + store float %1, float addrspace(1)* %out + ret void +} + +; CHECK-LABEL: @olt +; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK-NEXT: LSHR * +; CHECK-NEXT: 1084227584(5.000000e+00) define void @olt(float addrspace(1)* %out, float %in) { entry: %0 = fcmp olt float %in, 5.0 @@ -50,7 +63,7 @@ entry: ret void } -; CHECK: @sle +; CHECK-LABEL: @sle ; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z ; CHECK-NEXT: LSHR ; CHECK-NEXT: 6(8.407791e-45) @@ -62,7 +75,7 @@ entry: ret void } -; CHECK: @ule_i32 +; CHECK-LABEL: @ule_i32 ; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z ; CHECK-NEXT: LSHR ; CHECK-NEXT: 6(8.407791e-45) @@ -74,10 +87,11 @@ entry: ret void } -; CHECK: @ule_float -; CHECK: SETGE {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR +; CHECK-LABEL: @ule_float +; CHECK: SETGT * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x ; CHECK-NEXT: 1084227584(5.000000e+00) +; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0 +; CHECK-NEXT: LSHR * define void @ule_float(float addrspace(1)* %out, float %in) { entry: %0 = fcmp ule float %in, 5.0 @@ -86,9 +100,21 @@ entry: ret void } -; CHECK: @ole -; CHECK: SETGE {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR +; CHECK-LABEL: @ule_float_native +; CHECK: SETGT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x +; CHECK-NEXT: LSHR * +; CHECK-NEXT: 1084227584(5.000000e+00) +define void @ule_float_native(float addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ule float %in, 5.0 + %1 = select i1 %0, float 0.0, float 1.0 + store float %1, float addrspace(1)* %out + ret void +} + +; CHECK-LABEL: @ole +; CHECK: SETGE T{{[0-9]\.[XYZW]}}, literal.x, KC0[2].Z +; CHECK-NEXT: LSHR * ; CHECK-NEXT:1084227584(5.000000e+00) define void @ole(float addrspace(1)* %out, float %in) { entry: diff --git a/test/CodeGen/R600/vselect.ll b/test/CodeGen/R600/vselect.ll index 8e9c5b59cd1..ee17e0f5087 100644 --- a/test/CodeGen/R600/vselect.ll +++ b/test/CodeGen/R600/vselect.ll @@ -31,7 +31,7 @@ define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrs entry: %0 = load <2 x float> addrspace(1)* %in0 %1 = load <2 x float> addrspace(1)* %in1 - %cmp = fcmp one <2 x float> %0, %1 + %cmp = fcmp une <2 x float> %0, %1 %result = select <2 x i1> %cmp, <2 x float> %0, <2 x float> %1 store <2 x float> %result, <2 x float> addrspace(1)* %out ret void @@ -69,7 +69,7 @@ define void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrs entry: %0 = load <4 x float> addrspace(1)* %in0 %1 = load <4 x float> addrspace(1)* %in1 - %cmp = fcmp one <4 x float> %0, %1 + %cmp = fcmp une <4 x float> %0, %1 %result = select <4 x i1> %cmp, <4 x float> %0, <4 x float> %1 store <4 x float> %result, <4 x float> addrspace(1)* %out ret void