From 9dd21f43807c146e72cd5341fdf742a36ce5fa4a Mon Sep 17 00:00:00 2001 From: Olivier Sallenave Date: Tue, 13 Jan 2015 15:06:36 +0000 Subject: [PATCH] Added TLI hook for isFPExtFree. Some of the FMA combine heuristics are now guarded with that hook. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225795 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 8 ++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 133 ++++++++++++----------- lib/Target/PowerPC/PPCISelLowering.cpp | 5 + lib/Target/PowerPC/PPCISelLowering.h | 2 + 4 files changed, 85 insertions(+), 63 deletions(-) diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 542b1fa82ba..8318b2917b8 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -1500,6 +1500,14 @@ public: return isZExtFree(Val.getValueType(), VT2); } + /// Return true if an fpext operation is free (for instance, because + /// single-precision floating-point numbers are implicitly extended to + /// double-precision). + virtual bool isFPExtFree(EVT VT) const { + assert(VT.isFloatingPoint()); + return false; + } + /// Return true if an fneg operation is free to the point where it is never /// worthwhile to replace it with a bitwise operation. virtual bool isFNegFree(EVT VT) const { diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 56ba91fbac5..171ee1e3488 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6957,32 +6957,11 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1), N0); - // More folding opportunities when target permits. - if (TLI.enableAggressiveFMAFusion(VT)) { - // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) - if (N0.getOpcode() == ISD::FMA && - N0.getOperand(2).getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(2).getOperand(0), - N0.getOperand(2).getOperand(1), - N1)); - - // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) - if (N1->getOpcode() == ISD::FMA && - N1.getOperand(2).getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N1.getOperand(0), N1.getOperand(1), - DAG.getNode(ISD::FMA, SDLoc(N), VT, - N1.getOperand(2).getOperand(0), - N1.getOperand(2).getOperand(1), - N0)); + // When FP_EXTEND nodes are free on the target, and there is an opportunity + // to combine into FMA, arrange such nodes accordingly. + if (TLI.isFPExtFree(VT)) { - // Remove FP_EXTEND when there is an opportunity to combine. This is - // legal here since extra precision is allowed. - - // fold (fadd (fpext (fmul x, y)), z) -> (fma x, y, z) + // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == ISD::FMUL) @@ -6993,7 +6972,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N00.getOperand(1)), N1); } - // fold (fadd x, (fpext (fmul y, z)), z) -> (fma y, z, x) + // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x) // Note: Commutes FADD operands. if (N1.getOpcode() == ISD::FP_EXTEND) { SDValue N10 = N1.getOperand(0); @@ -7005,6 +6984,30 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N10.getOperand(1)), N0); } } + + // More folding opportunities when target permits. + if (TLI.enableAggressiveFMAFusion(VT)) { + + // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) + if (N0.getOpcode() == ISD::FMA && + N0.getOperand(2).getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + N1)); + + // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) + if (N1->getOpcode() == ISD::FMA && + N1.getOperand(2).getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N1.getOperand(0), N1.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + N1.getOperand(2).getOperand(0), + N1.getOperand(2).getOperand(1), + N0)); + } } return SDValue(); @@ -7099,41 +7102,12 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { DAG.getNode(ISD::FNEG, dl, VT, N1)); } - // More folding opportunities when target permits. - if (TLI.enableAggressiveFMAFusion(VT)) { + // When FP_EXTEND nodes are free on the target, and there is an opportunity + // to combine into FMA, arrange such nodes accordingly. + if (TLI.isFPExtFree(VT)) { - // fold (fsub (fma x, y, (fmul u, v)), z) - // -> (fma x, y (fma u, v, (fneg z))) - if (N0.getOpcode() == ISD::FMA && - N0.getOperand(2).getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(2).getOperand(0), - N0.getOperand(2).getOperand(1), - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N1))); - - // fold (fsub x, (fma y, z, (fmul u, v))) - // -> (fma (fneg y), z, (fma (fneg u), v, x)) - if (N1.getOpcode() == ISD::FMA && - N1.getOperand(2).getOpcode() == ISD::FMUL) { - SDValue N20 = N1.getOperand(2).getOperand(0); - SDValue N21 = N1.getOperand(2).getOperand(1); - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N1.getOperand(0)), - N1.getOperand(1), - DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N20), - N21, N0)); - } - - // Remove FP_EXTEND when there is an opportunity to combine. This is - // legal here since extra precision is allowed. - - // fold (fsub (fpext (fmul x, y)), z) -> (fma x, y, (fneg z)) + // fold (fsub (fpext (fmul x, y)), z) + // -> (fma (fpext x), (fpext y), (fneg z)) if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == ISD::FMUL) @@ -7145,7 +7119,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1)); } - // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg y), z, x) + // fold (fsub x, (fpext (fmul y, z))) + // -> (fma (fneg (fpext y)), (fpext z), x) // Note: Commutes FSUB operands. if (N1.getOpcode() == ISD::FP_EXTEND) { SDValue N10 = N1.getOperand(0); @@ -7160,7 +7135,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // fold (fsub (fpext (fneg (fmul, x, y))), z) - // -> (fma (fneg x), y, (fneg z)) + // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == ISD::FNEG) { @@ -7178,7 +7153,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // fold (fsub (fneg (fpext (fmul, x, y))), z) - // -> (fma (fneg x), y, (fneg z)) + // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) if (N0.getOpcode() == ISD::FNEG) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == ISD::FP_EXTEND) { @@ -7195,6 +7170,38 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } } } + + // More folding opportunities when target permits. + if (TLI.enableAggressiveFMAFusion(VT)) { + + // fold (fsub (fma x, y, (fmul u, v)), z) + // -> (fma x, y (fma u, v, (fneg z))) + if (N0.getOpcode() == ISD::FMA && + N0.getOperand(2).getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N1))); + + // fold (fsub x, (fma y, z, (fmul u, v))) + // -> (fma (fneg y), z, (fma (fneg u), v, x)) + if (N1.getOpcode() == ISD::FMA && + N1.getOperand(2).getOpcode() == ISD::FMUL) { + SDValue N20 = N1.getOperand(2).getOperand(0); + SDValue N21 = N1.getOperand(2).getOperand(1); + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N1.getOperand(0)), + N1.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N20), + N21, N0)); + } + } } return SDValue(); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 30f08d050da..2157003ef3e 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9817,6 +9817,11 @@ bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { return TargetLowering::isZExtFree(Val, VT2); } +bool PPCTargetLowering::isFPExtFree(EVT VT) const { + assert(VT.isFloatingPoint()); + return true; +} + bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { return isInt<16>(Imm) || isUInt<16>(Imm); } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index db5a3e42d52..b171b165877 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -528,6 +528,8 @@ namespace llvm { bool isZExtFree(SDValue Val, EVT VT2) const override; + bool isFPExtFree(EVT VT) const override; + /// \brief Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool shouldConvertConstantLoadToIntImm(const APInt &Imm, -- 2.34.1