From 1296bb3ba673268c0a69e45b758548625d3f4026 Mon Sep 17 00:00:00 2001 From: Hao Liu Date: Tue, 3 Dec 2013 06:06:55 +0000 Subject: [PATCH] [AArch64]Add missing floating point convert, round and misc intrinsics. E.g. int64x1_t vcvt_s64_f64(float64x1_t a) -> FCVTZS Dd, Dn git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196210 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 6 + lib/Target/AArch64/AArch64InstrNEON.td | 55 +++++- test/CodeGen/AArch64/neon-misc.ll | 202 ++++++++++++++++++++- test/CodeGen/AArch64/neon-simd-shift.ll | 32 ++++ 4 files changed, 287 insertions(+), 8 deletions(-) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 5d20a96ff02..f4b5af4c2c6 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -341,26 +341,32 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal); setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::v1f64, Legal); setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); setOperationAction(ISD::FCEIL, MVT::v2f32, Legal); setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); + setOperationAction(ISD::FCEIL, MVT::v1f64, Legal); setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal); setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::v1f64, Legal); setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); setOperationAction(ISD::FRINT, MVT::v2f32, Legal); setOperationAction(ISD::FRINT, MVT::v4f32, Legal); + setOperationAction(ISD::FRINT, MVT::v1f64, Legal); setOperationAction(ISD::FRINT, MVT::v2f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); setOperationAction(ISD::FROUND, MVT::v2f32, Legal); setOperationAction(ISD::FROUND, MVT::v4f32, Legal); + setOperationAction(ISD::FROUND, MVT::v1f64, Legal); setOperationAction(ISD::FROUND, MVT::v2f64, Legal); } } diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index b3a2b74bca5..42eb868c10b 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -4745,6 +4745,29 @@ defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns; +// Patterns For Convert Instructions Between v1f64 and v1i64 +class Neon_ScalarShiftImm_cvtf_v1f64_pattern + : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), + (INST FPR64:$Rn, imm:$Imm)>; + +class Neon_ScalarShiftImm_fcvt_v1f64_pattern + : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))), + (INST FPR64:$Rn, imm:$Imm)>; + +def : Neon_ScalarShiftImm_cvtf_v1f64_pattern; + +def : Neon_ScalarShiftImm_cvtf_v1f64_pattern; + +def : Neon_ScalarShiftImm_fcvt_v1f64_pattern; + +def : Neon_ScalarShiftImm_fcvt_v1f64_pattern; + // Scalar Integer Add let isCommutable = 1 in { def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">; @@ -4814,6 +4837,8 @@ defm : Neon_Scalar3Same_SD_size_patterns; +def : Pat<(v1f64 (fsqrt (v1f64 FPR64:$Rn))), (FSQRTdd FPR64:$Rn)>; + // Patterns to match llvm.aarch64.* intrinsic for // Scalar Floating-point Multiply Extended, multiclass Neon_Scalar3Same_MULX_SD_size_patterns; defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; +// Patterns For Convert Instructions Between v1f64 and v1i64 +class Neon_Scalar2SameMisc_cvtf_v1f64_pattern + : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn))), (INST FPR64:$Rn)>; + +class Neon_Scalar2SameMisc_fcvt_v1f64_pattern + : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; + +def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern; +def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern; + +def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern; +def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern; + // Scalar Floating-point Reciprocal Estimate defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">; defm : Neon_Scalar2SameMisc_SD_size_patterns; defm : Neon_Scalar2SameMisc_SD_size_patterns; +// Scalar Floating-point Round +class Neon_ScalarFloatRound_pattern + : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; + +def : Neon_ScalarFloatRound_pattern; +def : Neon_ScalarFloatRound_pattern; +def : Neon_ScalarFloatRound_pattern; +def : Neon_ScalarFloatRound_pattern; +def : Neon_ScalarFloatRound_pattern; +def : Neon_ScalarFloatRound_pattern; +def : Neon_ScalarFloatRound_pattern; + // Scalar Integer Compare // Scalar Compare Bitwise Equal @@ -8347,8 +8399,7 @@ defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101, int_arm_neon_vrecpe>; defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101, int_arm_neon_vrsqrte>; -defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, - int_aarch64_neon_fsqrt>; +defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>; multiclass NeonI_2VMisc_S_Conv opcode, SDPatternOperator Neon_Op> { diff --git a/test/CodeGen/AArch64/neon-misc.ll b/test/CodeGen/AArch64/neon-misc.ll index 3fd9a500f48..9660bf2c7a3 100644 --- a/test/CodeGen/AArch64/neon-misc.ll +++ b/test/CodeGen/AArch64/neon-misc.ll @@ -1274,19 +1274,19 @@ define <4 x i32> @test_vrecpeq_u32(<4 x i32> %a) #0 { define <2 x float> @test_vsqrt_f32(<2 x float> %a) #0 { ; CHECK: fsqrt v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vsqrt1.i = tail call <2 x float> @llvm.aarch64.neon.fsqrt.v2f32(<2 x float> %a) #4 + %vsqrt1.i = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #4 ret <2 x float> %vsqrt1.i } define <4 x float> @test_vsqrtq_f32(<4 x float> %a) #0 { ; CHECK: fsqrt v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vsqrt1.i = tail call <4 x float> @llvm.aarch64.neon.fsqrt.v4f32(<4 x float> %a) #4 + %vsqrt1.i = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #4 ret <4 x float> %vsqrt1.i } define <2 x double> @test_vsqrtq_f64(<2 x double> %a) #0 { ; CHECK: fsqrt v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vsqrt1.i = tail call <2 x double> @llvm.aarch64.neon.fsqrt.v2f64(<2 x double> %a) #4 + %vsqrt1.i = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #4 ret <2 x double> %vsqrt1.i } @@ -1326,11 +1326,11 @@ define <2 x double> @test_vcvtq_f64_u64(<2 x i64> %a) #0 { ret <2 x double> %vcvt.i } -declare <2 x double> @llvm.aarch64.neon.fsqrt.v2f64(<2 x double>) #2 +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #2 -declare <4 x float> @llvm.aarch64.neon.fsqrt.v4f32(<4 x float>) #2 +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #2 -declare <2 x float> @llvm.aarch64.neon.fsqrt.v2f32(<2 x float>) #2 +declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #2 declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) #2 @@ -1607,3 +1607,193 @@ declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) #2 declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) #2 +define <1 x i64> @test_vcvt_s64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvt_s64_f64 +; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}} + %1 = fptosi <1 x double> %a to <1 x i64> + ret <1 x i64> %1 +} + +define <1 x i64> @test_vcvt_u64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvt_u64_f64 +; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}} + %1 = fptoui <1 x double> %a to <1 x i64> + ret <1 x i64> %1 +} + +define <1 x i64> @test_vcvtn_s64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvtn_s64_f64 +; CHECK: fcvtns d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %a) + ret <1 x i64> %1 +} + +define <1 x i64> @test_vcvtn_u64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvtn_u64_f64 +; CHECK: fcvtnu d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a) + ret <1 x i64> %1 +} + +define <1 x i64> @test_vcvtp_s64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvtp_s64_f64 +; CHECK: fcvtps d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %a) + ret <1 x i64> %1 +} + +define <1 x i64> @test_vcvtp_u64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvtp_u64_f64 +; CHECK: fcvtpu d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a) + ret <1 x i64> %1 +} + +define <1 x i64> @test_vcvtm_s64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvtm_s64_f64 +; CHECK: fcvtms d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %a) + ret <1 x i64> %1 +} + +define <1 x i64> @test_vcvtm_u64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvtm_u64_f64 +; CHECK: fcvtmu d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a) + ret <1 x i64> %1 +} + +define <1 x i64> @test_vcvta_s64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvta_s64_f64 +; CHECK: fcvtas d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %a) + ret <1 x i64> %1 +} + +define <1 x i64> @test_vcvta_u64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvta_u64_f64 +; CHECK: fcvtau d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %a) + ret <1 x i64> %1 +} + +define <1 x double> @test_vcvt_f64_s64(<1 x i64> %a) { +; CHECK-LABEL: test_vcvt_f64_s64 +; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}} + %1 = sitofp <1 x i64> %a to <1 x double> + ret <1 x double> %1 +} + +define <1 x double> @test_vcvt_f64_u64(<1 x i64> %a) { +; CHECK-LABEL: test_vcvt_f64_u64 +; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}} + %1 = uitofp <1 x i64> %a to <1 x double> + ret <1 x double> %1 +} + +declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double>) + +define <1 x double> @test_vrndn_f64(<1 x double> %a) { +; CHECK-LABEL: test_vrndn_f64 +; CHECK: frintn d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vrnda_f64(<1 x double> %a) { +; CHECK-LABEL: test_vrnda_f64 +; CHECK: frinta d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.round.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vrndp_f64(<1 x double> %a) { +; CHECK-LABEL: test_vrndp_f64 +; CHECK: frintp d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.ceil.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vrndm_f64(<1 x double> %a) { +; CHECK-LABEL: test_vrndm_f64 +; CHECK: frintm d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.floor.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vrndx_f64(<1 x double> %a) { +; CHECK-LABEL: test_vrndx_f64 +; CHECK: frintx d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.rint.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vrnd_f64(<1 x double> %a) { +; CHECK-LABEL: test_vrnd_f64 +; CHECK: frintz d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.trunc.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vrndi_f64(<1 x double> %a) { +; CHECK-LABEL: test_vrndi_f64 +; CHECK: frinti d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>) +declare <1 x double> @llvm.trunc.v1f64(<1 x double>) +declare <1 x double> @llvm.rint.v1f64(<1 x double>) +declare <1 x double> @llvm.floor.v1f64(<1 x double>) +declare <1 x double> @llvm.ceil.v1f64(<1 x double>) +declare <1 x double> @llvm.round.v1f64(<1 x double>) +declare <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double>) + +define <1 x double> @test_vrsqrte_f64(<1 x double> %a) { +; CHECK-LABEL: test_vrsqrte_f64 +; CHECK: frsqrte d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vrecpe_f64(<1 x double> %a) { +; CHECK-LABEL: test_vrecpe_f64 +; CHECK: frecpe d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vsqrt_f64(<1 x double> %a) { +; CHECK-LABEL: test_vsqrt_f64 +; CHECK: fsqrt d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vrecps_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vrecps_f64 +; CHECK: frecps d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double> %a, <1 x double> %b) + ret <1 x double> %1 +} + +define <1 x double> @test_vrsqrts_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vrsqrts_f64 +; CHECK: frsqrts d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double> %a, <1 x double> %b) + ret <1 x double> %1 +} + +declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>) +declare <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double>, <1 x double>) +declare <1 x double> @llvm.sqrt.v1f64(<1 x double>) +declare <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double>) +declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>) \ No newline at end of file diff --git a/test/CodeGen/AArch64/neon-simd-shift.ll b/test/CodeGen/AArch64/neon-simd-shift.ll index 19d1b219646..fd762656e56 100644 --- a/test/CodeGen/AArch64/neon-simd-shift.ll +++ b/test/CodeGen/AArch64/neon-simd-shift.ll @@ -1522,3 +1522,35 @@ declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) declare <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32) +define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvt_n_s64_f64 +; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64 + %1 = tail call <1 x i64> @llvm.arm.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64) + ret <1 x i64> %1 +} + +define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvt_n_u64_f64 +; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64 + %1 = tail call <1 x i64> @llvm.arm.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64) + ret <1 x i64> %1 +} + +define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) { +; CHECK-LABEL: test_vcvt_n_f64_s64 +; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64 + %1 = tail call <1 x double> @llvm.arm.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64) + ret <1 x double> %1 +} + +define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) { +; CHECK-LABEL: test_vcvt_n_f64_u64 +; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64 + %1 = tail call <1 x double> @llvm.arm.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64) + ret <1 x double> %1 +} + +declare <1 x i64> @llvm.arm.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32) +declare <1 x i64> @llvm.arm.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32) +declare <1 x double> @llvm.arm.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32) +declare <1 x double> @llvm.arm.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32) \ No newline at end of file -- 2.34.1