From: Matt Arsenault Date: Tue, 10 Jun 2014 19:00:20 +0000 (+0000) Subject: R600: Handle fcopysign X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=bfd00e21b76fcbc51e005747b030af7eee7b10c5;p=oota-llvm.git R600: Handle fcopysign git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210564 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 849f16919c0..cb4f8cde1bf 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -214,6 +214,12 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::UDIVREM, MVT::i64, Custom); setOperationAction(ISD::UREM, MVT::i32, Expand); + if (!Subtarget->hasBFI()) { + // fcopysign can be done in a single instruction with BFI. + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + } + static const MVT::SimpleValueType IntTypes[] = { MVT::v2i32, MVT::v4i32 }; @@ -260,6 +266,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::FNEG, VT, Expand); setOperationAction(ISD::SELECT, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); + setOperationAction(ISD::FCOPYSIGN, VT, Expand); } setTargetDAGCombine(ISD::MUL); diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 7d19c3db85b..1345f79646d 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -365,7 +365,7 @@ class DwordAddrPat : Pat < // BFI_INT patterns -multiclass BFIPatterns { +multiclass BFIPatterns { // Definition from ISA doc: // (y & x) | (z & ~x) @@ -381,6 +381,19 @@ multiclass BFIPatterns { (BFI_INT $x, $y, $z) >; + def : Pat < + (fcopysign f32:$src0, f32:$src1), + (BFI_INT (LoadImm32 0x7fffffff), $src0, $src1) + >; + + def : Pat < + (f64 (fcopysign f64:$src0, f64:$src1)), + (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (i32 (EXTRACT_SUBREG $src0, sub0)), sub0), + (BFI_INT (LoadImm32 0x7fffffff), + (i32 (EXTRACT_SUBREG $src0, sub1)), + (i32 (EXTRACT_SUBREG $src1, sub1))), sub1) + >; } // SHA-256 Ma patterns diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index 1b041d6bd2b..b3b3a1a0edc 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -72,6 +72,10 @@ public: return (getGeneration() >= EVERGREEN); } + bool hasBFI() const { + return (getGeneration() >= EVERGREEN); + } + bool hasBFM() const { return hasBFE(); } diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td index 20654419a8f..e09a0b2f786 100644 --- a/lib/Target/R600/EvergreenInstructions.td +++ b/lib/Target/R600/EvergreenInstructions.td @@ -295,7 +295,7 @@ def : Pat<(i32 (sext_inreg i32:$src, i8)), def : Pat<(i32 (sext_inreg i32:$src, i16)), (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 16))>; -defm : BFIPatterns ; +defm : BFIPatterns ; def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT", [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))], diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 07aba66c45d..43ac47dad29 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -2112,7 +2112,7 @@ def : Pat < (V_MUL_HI_I32 $src0, $src1, (i32 0)) >; -defm : BFIPatterns ; +defm : BFIPatterns ; def : ROTRPattern ; /********** ======================= **********/ diff --git a/test/CodeGen/R600/fcopysign.f32.ll b/test/CodeGen/R600/fcopysign.f32.ll new file mode 100644 index 00000000000..7b4425bed72 --- /dev/null +++ b/test/CodeGen/R600/fcopysign.f32.ll @@ -0,0 +1,50 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + + +declare float @llvm.copysign.f32(float, float) nounwind readnone +declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind readnone + +; Try to identify arg based on higher address. +; FUNC-LABEL: @test_copysign_f32: +; SI: S_LOAD_DWORD [[SSIGN:s[0-9]+]], {{.*}} 0xc +; SI: V_MOV_B32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]] +; SI-DAG: S_LOAD_DWORD [[SMAG:s[0-9]+]], {{.*}} 0xb +; SI-DAG: V_MOV_B32_e32 [[VMAG:v[0-9]+]], [[SMAG]] +; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff +; SI: V_BFI_B32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM + +; EG: BFI_INT +define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign) nounwind { + %result = call float @llvm.copysign.f32(float %mag, float %sign) + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @test_copysign_v2f32: +; SI: S_ENDPGM + +; EG: BFI_INT +; EG: BFI_INT +define void @test_copysign_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %mag, <2 x float> %sign) nounwind { + %result = call <2 x float> @llvm.copysign.v2f32(<2 x float> %mag, <2 x float> %sign) + store <2 x float> %result, <2 x float> addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @test_copysign_v4f32: +; SI: S_ENDPGM + +; EG: BFI_INT +; EG: BFI_INT +; EG: BFI_INT +; EG: BFI_INT +define void @test_copysign_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %mag, <4 x float> %sign) nounwind { + %result = call <4 x float> @llvm.copysign.v4f32(<4 x float> %mag, <4 x float> %sign) + store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16 + ret void +} + diff --git a/test/CodeGen/R600/fcopysign.f64.ll b/test/CodeGen/R600/fcopysign.f64.ll new file mode 100644 index 00000000000..ea7a6db67f3 --- /dev/null +++ b/test/CodeGen/R600/fcopysign.f64.ll @@ -0,0 +1,37 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare double @llvm.copysign.f64(double, double) nounwind readnone +declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nounwind readnone +declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind readnone + +; FUNC-LABEL: @test_copysign_f64: +; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd +; SI: V_MOV_B32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]] +; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: V_MOV_B32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]] +; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff +; SI: V_BFI_B32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]] +; SI: V_MOV_B32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]] +; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}} +; SI: S_ENDPGM +define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %sign) nounwind { + %result = call double @llvm.copysign.f64(double %mag, double %sign) + store double %result, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @test_copysign_v2f64: +; SI: S_ENDPGM +define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %mag, <2 x double> %sign) nounwind { + %result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign) + store <2 x double> %result, <2 x double> addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @test_copysign_v4f64: +; SI: S_ENDPGM +define void @test_copysign_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %mag, <4 x double> %sign) nounwind { + %result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign) + store <4 x double> %result, <4 x double> addrspace(1)* %out, align 8 + ret void +}