From 14b4c0358055c3de5ab404e953ba8c8a678c61ae Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 11 Apr 2012 06:59:47 +0000 Subject: [PATCH] Add more fused mul+add/sub patterns. rdar://10139676 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154484 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 10 ++++- lib/Target/ARM/ARMInstrVFP.td | 44 ++++++++++++++++++++- test/CodeGen/ARM/fusedMAC.ll | 70 ++++++++++++++++++++++++++-------- 3 files changed, 104 insertions(+), 20 deletions(-) diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index ea16dcd8e38..c2b8ce466da 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -4133,12 +4133,18 @@ def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", Requires<[HasVFP4,UseFusedMAC]>; // Match @llvm.fma.* intrinsics -def : Pat<(fma (v2f32 DPR:$src1), (v2f32 DPR:$Vn), (v2f32 DPR:$Vm)), +def : Pat<(v2f32 (fma DPR:$src1, DPR:$Vn, DPR:$Vm)), (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, Requires<[HasVFP4]>; -def : Pat<(fma (v4f32 QPR:$src1), (v4f32 QPR:$Vn), (v4f32 QPR:$Vm)), +def : Pat<(v4f32 (fma QPR:$src1, QPR:$Vn, QPR:$Vm)), (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, Requires<[HasVFP4]>; +def : Pat<(v2f32 (fma (fneg DPR:$src1), DPR:$Vn, DPR:$Vm)), + (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v4f32 (fma (fneg QPR:$src1), QPR:$Vn, QPR:$Vm)), + (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasVFP4]>; // Vector Subtract Operations. diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 28cf460f874..3600b889d6f 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -1081,10 +1081,10 @@ def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; // Match @llvm.fma.* intrinsics -def : Pat<(fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm)), +def : Pat<(f64 (fma DPR:$Ddin, DPR:$Dn, DPR:$Dm)), (VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, Requires<[HasVFP4]>; -def : Pat<(fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm)), +def : Pat<(f32 (fma SPR:$Sdin, SPR:$Sn, SPR:$Sm)), (VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; @@ -1114,6 +1114,22 @@ def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; +// Match @llvm.fma.* intrinsics +// (fma (fneg x), y, z) -> (vfms x, y, z) +def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm)), + (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm)), + (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; +// (fneg (fma x, (fneg y), z) -> (vfms x, y, z) +def : Pat<(fneg (f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm))), + (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(fneg (f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm))), + (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; + def VFNMAD : ADbI<0b11101, 0b01, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm", @@ -1141,12 +1157,20 @@ def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; // Match @llvm.fma.* intrinsics +// (fneg (fma x, y, z)) -> (vfnma x, y, z) def : Pat<(fneg (fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm))), (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, Requires<[HasVFP4]>; def : Pat<(fneg (fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm))), (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; +// (fma (fneg x), y, (fneg z)) -> (vfnma x, y, z) +def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, (fneg DPR:$Dm))), + (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, (fneg SPR:$Sm))), + (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; def VFNMSD : ADbI<0b11101, 0b01, 0, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1173,6 +1197,22 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; +// Match @llvm.fma.* intrinsics +// (fneg (fma (fneg x), y, z)) -> (vnfms x, y, z) +def : Pat<(fneg (f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm))), + (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(fneg (f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm))), + (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; +// (fma x, (fneg y), z) -> (vnfms x, y, z) +def : Pat<(f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm)), + (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm)), + (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; + //===----------------------------------------------------------------------===// // FP Conditional moves. // diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll index b5398c7a587..a8b3999d2bf 100644 --- a/test/CodeGen/ARM/fusedMAC.ll +++ b/test/CodeGen/ARM/fusedMAC.ll @@ -103,43 +103,81 @@ define float @test_fma_f32(float %a, float %b, float %c) nounwind readnone ssp { entry: ; CHECK: test_fma_f32 ; CHECK: vfma.f32 - %call = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone - ret float %call + %tmp1 = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone + ret float %tmp1 } define double @test_fma_f64(double %a, double %b, double %c) nounwind readnone ssp { entry: ; CHECK: test_fma_f64 ; CHECK: vfma.f64 - %call = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone - ret double %call + %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone + ret double %tmp1 } define <2 x float> @test_fma_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp { entry: ; CHECK: test_fma_v2f32 ; CHECK: vfma.f32 - %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind - ret <2 x float> %0 + %tmp1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind + ret <2 x float> %tmp1 } -define float @test_fnma_f32(float %a, float %b, float %c) nounwind readnone ssp { +define double @test_fms_f64(double %a, double %b, double %c) nounwind readnone ssp { entry: -; CHECK: test_fnma_f32 -; CHECK: vfnma.f32 - %call = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone - %tmp1 = fsub float -0.0, %call - %tmp2 = fsub float %tmp1, %c - ret float %tmp2 +; CHECK: test_fms_f64 +; CHECK: vfms.f64 + %tmp1 = fsub double -0.0, %a + %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone + ret double %tmp2 +} + +define double @test_fms_f64_2(double %a, double %b, double %c) nounwind readnone ssp { +entry: +; CHECK: test_fms_f64_2 +; CHECK: vfms.f64 + %tmp1 = fsub double -0.0, %b + %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone + %tmp3 = fsub double -0.0, %tmp2 + ret double %tmp3 +} + +define double @test_fnms_f64(double %a, double %b, double %c) nounwind readnone ssp { +entry: +; CHECK: test_fnms_f64 +; CHECK: vfnms.f64 + %tmp1 = fsub double -0.0, %a + %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone + %tmp3 = fsub double -0.0, %tmp2 + ret double %tmp3 +} + +define double @test_fnms_f64_2(double %a, double %b, double %c) nounwind readnone ssp { +entry: +; CHECK: test_fnms_f64_2 +; CHECK: vfnms.f64 + %tmp1 = fsub double -0.0, %b + %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone + ret double %tmp2 } define double @test_fnma_f64(double %a, double %b, double %c) nounwind readnone ssp { entry: ; CHECK: test_fnma_f64 ; CHECK: vfnma.f64 - %call = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone - %tmp = fsub double -0.0, %call - ret double %tmp + %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone + %tmp2 = fsub double -0.0, %tmp1 + ret double %tmp2 +} + +define double @test_fnma_f64_2(double %a, double %b, double %c) nounwind readnone ssp { +entry: +; CHECK: test_fnma_f64_2 +; CHECK: vfnma.f64 + %tmp1 = fsub double -0.0, %a + %tmp2 = fsub double -0.0, %c + %tmp3 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %tmp2) nounwind readnone + ret double %tmp3 } declare float @llvm.fma.f32(float, float, float) nounwind readnone -- 2.34.1