From: Nate Begeman Date: Wed, 14 Dec 2005 22:54:33 +0000 (+0000) Subject: Use the new predicate support that Evan Cheng added to remove some code X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=a07da92624219599e6569460b3b56b49d60a4b46;p=oota-llvm.git Use the new predicate support that Evan Cheng added to remove some code from the DAGToDAG cpp file. This adds pattern support for vector and scalar fma, which passes test/Regression/CodeGen/PowerPC/fma.ll, and does the right thing in the presence of -disable-excess-fp-precision. Allows us to match: void %foo(<4 x float> * %a) { entry: %tmp1 = load <4 x float> * %a; %tmp2 = mul <4 x float> %tmp1, %tmp1 %tmp3 = add <4 x float> %tmp2, %tmp1 store <4 x float> %tmp3, <4 x float> *%a ret void } As: _foo: li r2, 0 lvx v0, r2, r3 vmaddfp v0, v0, v0, v0 stvx v0, r2, r3 blr Or, with llc -disable-excess-fp-precision, _foo: li r2, 0 lvx v0, r2, r3 vxor v1, v1, v1 vmaddfp v1, v0, v0, v1 vaddfp v0, v1, v0 stvx v0, r2, r3 blr git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@24719 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 01d089d9b91..f666f232556 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -885,53 +885,6 @@ SDOperand PPCDAGToDAGISel::Select(SDOperand Op) { CurDAG->getTargetFrameIndex(FI, MVT::i32), getI32Imm(0)); } - case ISD::FADD: { - MVT::ValueType Ty = N->getValueType(0); - if (!NoExcessFPPrecision) { // Match FMA ops - if (N->getOperand(0).getOpcode() == ISD::FMUL && - N->getOperand(0).Val->hasOneUse()) { - ++FusedFP; // Statistic - return CurDAG->SelectNodeTo(N, Ty == MVT::f64 ? PPC::FMADD :PPC::FMADDS, - Ty, Select(N->getOperand(0).getOperand(0)), - Select(N->getOperand(0).getOperand(1)), - Select(N->getOperand(1))); - } else if (N->getOperand(1).getOpcode() == ISD::FMUL && - N->getOperand(1).hasOneUse()) { - ++FusedFP; // Statistic - return CurDAG->SelectNodeTo(N, Ty == MVT::f64 ? PPC::FMADD :PPC::FMADDS, - Ty, Select(N->getOperand(1).getOperand(0)), - Select(N->getOperand(1).getOperand(1)), - Select(N->getOperand(0))); - } - } - - // Other cases are autogenerated. - break; - } - case ISD::FSUB: { - MVT::ValueType Ty = N->getValueType(0); - - if (!NoExcessFPPrecision) { // Match FMA ops - if (N->getOperand(0).getOpcode() == ISD::FMUL && - N->getOperand(0).Val->hasOneUse()) { - ++FusedFP; // Statistic - return CurDAG->SelectNodeTo(N, Ty == MVT::f64 ? PPC::FMSUB:PPC::FMSUBS, - Ty, Select(N->getOperand(0).getOperand(0)), - Select(N->getOperand(0).getOperand(1)), - Select(N->getOperand(1))); - } else if (N->getOperand(1).getOpcode() == ISD::FMUL && - N->getOperand(1).Val->hasOneUse()) { - ++FusedFP; // Statistic - return CurDAG->SelectNodeTo(N, Ty == MVT::f64 ?PPC::FNMSUB:PPC::FNMSUBS, - Ty, Select(N->getOperand(1).getOperand(0)), - Select(N->getOperand(1).getOperand(1)), - Select(N->getOperand(0))); - } - } - - // Other cases are autogenerated. - break; - } case ISD::SDIV: { // FIXME: since this depends on the setting of the carry flag from the srawi // we should really be making notes about that for the scheduler. diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 04011e8513f..4ec8fe3346d 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -168,7 +168,7 @@ def crbitm: Operand { //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. -def FPContractions : Predicate<"!NoExcessFPPrecision">; +def FPContractions : Predicate<"NoExcessFPPrecision">; //===----------------------------------------------------------------------===// // PowerPC Instruction Definitions. @@ -746,22 +746,26 @@ def FNMADD : AForm_1<63, 31, (ops F8RC:$FRT, F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused, [(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC), - F8RC:$FRB)))]>; + F8RC:$FRB)))]>, + Requires<[FPContractions]>; def FNMADDS : AForm_1<59, 31, (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, [(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC), - F4RC:$FRB)))]>; + F4RC:$FRB)))]>, + Requires<[FPContractions]>; def FNMSUB : AForm_1<63, 30, (ops F8RC:$FRT, F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused, [(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC), - F8RC:$FRB)))]>; + F8RC:$FRB)))]>, + Requires<[FPContractions]>; def FNMSUBS : AForm_1<59, 30, (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, [(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC), - F4RC:$FRB)))]>; + F4RC:$FRB)))]>, + Requires<[FPContractions]>; // FSEL is artificially split into 4 and 8-byte forms for the result. To avoid // having 4 of these, force the comparison to always be an 8-byte double (code // should use an FMRSD if the input comparison value really wants to be a float) @@ -848,12 +852,14 @@ def RLDICR : MDForm_1<30, 1, def VMADDFP : VAForm_1<46, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC), "vmaddfp $vD, $vA, $vC, $vB", VecFP, [(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC), - VRRC:$vB))]>; + VRRC:$vB))]>, + Requires<[FPContractions]>; def VNMSUBFP: VAForm_1<47, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC), - "vnmsubfp $vD, $vA, $vC, $vB", VecFP, - [(set VRRC:$vD, (fneg (fsub (fmul VRRC:$vA, - VRRC:$vC), - VRRC:$vB)))]>; + "vnmsubfp $vD, $vA, $vC, $vB", VecFP, + [(set VRRC:$vD, (fneg (fsub (fmul VRRC:$vA, + VRRC:$vC), + VRRC:$vB)))]>, + Requires<[FPContractions]>; // VX-Form instructions. AltiVec arithmetic ops. def VADDFP : VXForm_1<10, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB), @@ -971,6 +977,14 @@ def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)), def : Pat<(fmul VRRC:$vA, VRRC:$vB), (VMADDFP VRRC:$vA, (V_SET0), VRRC:$vB)>; +// Fused negative multiply subtract, alternate pattern +def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)), + (FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>, + Requires<[FPContractions]>; +def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)), + (FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>, + Requires<[FPContractions]>; + // Fused multiply add and multiply sub for packed float. These are represented // separately from the real instructions above, for operations that must have // the additional precision, such as Newton-Rhapson (used by divide, sqrt)