"Enable the MFOCRF instruction">;
def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
"Enable the fsqrt instruction">;
+def FeatureFRE : SubtargetFeature<"fre", "HasFRE", "true",
+ "Enable the fre instruction">;
+def FeatureFRES : SubtargetFeature<"fres", "HasFRES", "true",
+ "Enable the fres instruction">;
+def FeatureFRSQRTE : SubtargetFeature<"frsqrte", "HasFRSQRTE", "true",
+ "Enable the frsqrte instruction">;
+def FeatureFRSQRTES : SubtargetFeature<"frsqrtes", "HasFRSQRTES", "true",
+ "Enable the frsqrtes instruction">;
+def FeatureRecipPrec : SubtargetFeature<"recipprec", "HasRecipPrec", "true",
+ "Assume higher precision reciprocal estimates">;
def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
"Enable the stfiwx instruction">;
def FeatureLFIWAX : SubtargetFeature<"lfiwax","HasLFIWAX", "true",
def : Processor<"generic", G3Itineraries, [Directive32]>;
def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL,
+ FeatureFRES, FeatureFRSQRTE,
FeatureBookE]>;
def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL,
+ FeatureFRES, FeatureFRSQRTE,
FeatureBookE]>;
def : Processor<"601", G3Itineraries, [Directive601]>;
def : Processor<"602", G3Itineraries, [Directive602]>;
-def : Processor<"603", G3Itineraries, [Directive603]>;
-def : Processor<"603e", G3Itineraries, [Directive603]>;
-def : Processor<"603ev", G3Itineraries, [Directive603]>;
-def : Processor<"604", G3Itineraries, [Directive604]>;
-def : Processor<"604e", G3Itineraries, [Directive604]>;
-def : Processor<"620", G3Itineraries, [Directive620]>;
-def : Processor<"750", G4Itineraries, [Directive750]>;
-def : Processor<"g3", G3Itineraries, [Directive750]>;
-def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec]>;
-def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec]>;
-def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec]>;
-def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"603", G3Itineraries, [Directive603,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"603e", G3Itineraries, [Directive603,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"603ev", G3Itineraries, [Directive603,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"604", G3Itineraries, [Directive604,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"604e", G3Itineraries, [Directive604,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"620", G3Itineraries, [Directive620,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"750", G4Itineraries, [Directive750,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"g3", G3Itineraries, [Directive750,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE]>;
def : Processor<"970", G5Itineraries,
[Directive970, FeatureAltivec,
- FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
+ FeatureMFOCRF, FeatureFSqrt,
+ FeatureFRES, FeatureFRSQRTE, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */]>;
def : Processor<"g5", G5Itineraries,
[Directive970, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
+ FeatureFRES, FeatureFRSQRTE,
Feature64Bit /*, Feature64BitRegs */]>;
def : ProcessorModel<"e500mc", PPCE500mcModel,
[DirectiveE500mc, FeatureMFOCRF,
FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
def : Processor<"a2", PPCA2Itineraries,
[DirectiveA2, FeatureBookE, FeatureMFOCRF,
- FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+ FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
/*, Feature64BitRegs */]>;
def : Processor<"a2q", PPCA2Itineraries,
[DirectiveA2, FeatureBookE, FeatureMFOCRF,
- FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+ FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
/*, Feature64BitRegs */, FeatureQPX]>;
def : Processor<"pwr3", G5Itineraries,
- [DirectivePwr3, FeatureAltivec, FeatureMFOCRF,
+ [DirectivePwr3, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF,
FeatureSTFIWX, Feature64Bit]>;
def : Processor<"pwr4", G5Itineraries,
[DirectivePwr4, FeatureAltivec, FeatureMFOCRF,
- FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
+ FeatureFSqrt, FeatureFRES, FeatureFRSQRTE,
+ FeatureSTFIWX, Feature64Bit]>;
def : Processor<"pwr5", G5Itineraries,
[DirectivePwr5, FeatureAltivec, FeatureMFOCRF,
- FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>;
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureSTFIWX, Feature64Bit]>;
def : Processor<"pwr5x", G5Itineraries,
[DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
- FeatureFSqrt, FeatureSTFIWX, FeatureFPRND,
- Feature64Bit]>;
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureSTFIWX, FeatureFPRND, Feature64Bit]>;
def : Processor<"pwr6", G5Itineraries,
[DirectivePwr6, FeatureAltivec,
- FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
- FeatureLFIWAX, FeatureFPRND, Feature64Bit
- /*, Feature64BitRegs */]>;
+ FeatureMFOCRF, FeatureFSqrt, FeatureFRE,
+ FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, Feature64Bit /*, Feature64BitRegs */]>;
def : Processor<"pwr6x", G5Itineraries,
[DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
- FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+ FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, Feature64Bit]>;
def : Processor<"pwr7", G5Itineraries,
[DirectivePwr7, FeatureAltivec,
- FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
- FeatureLFIWAX, FeatureFPRND, FeatureFPCVT,
- FeatureISEL, FeaturePOPCNTD, FeatureLDBRX,
+ FeatureMFOCRF, FeatureFSqrt, FeatureFRE,
+ FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureLDBRX,
Feature64Bit /*, Feature64BitRegs */]>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
def : Processor<"ppc64", G5Itineraries,
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
// If we're enabling GP optimizations, use hardware square root
- if (!Subtarget->hasFSQRT()) {
+ if (!Subtarget->hasFSQRT() &&
+ !(TM.Options.UnsafeFPMath &&
+ Subtarget->hasFRSQRTE() && Subtarget->hasFRE()))
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+
+ if (!Subtarget->hasFSQRT() &&
+ !(TM.Options.UnsafeFPMath &&
+ Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
- }
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
+
+ if (TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+ }
+
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
setTargetDAGCombine(ISD::BR_CC);
setTargetDAGCombine(ISD::BSWAP);
+ // Use reciprocal estimates.
+ if (TM.Options.UnsafeFPMath) {
+ setTargetDAGCombine(ISD::FDIV);
+ setTargetDAGCombine(ISD::FSQRT);
+ }
+
// Darwin long double math library functions have $LDBL128 appended.
if (Subtarget->isDarwin()) {
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
case PPCISD::FCFID: return "PPCISD::FCFID";
case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
+ case PPCISD::FRE: return "PPCISD::FRE";
+ case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
case PPCISD::STFIWX: return "PPCISD::STFIWX";
case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
// Target Optimization Hooks
//===----------------------------------------------------------------------===//
+SDValue PPCTargetLowering::DAGCombineFastRecip(SDNode *N,
+ DAGCombinerInfo &DCI,
+ bool UseOperand) const {
+ if (DCI.isAfterLegalizeVectorOps())
+ return SDValue();
+
+ if ((N->getValueType(0) == MVT::f32 && PPCSubTarget.hasFRES()) ||
+ (N->getValueType(0) == MVT::f64 && PPCSubTarget.hasFRE()) ||
+ (N->getValueType(0) == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+
+ // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+ // For the reciprocal, we need to find the zero of the function:
+ // F(X) = A X - 1 [which has a zero at X = 1/A]
+ // =>
+ // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
+ // does not require additional intermediate precision]
+
+ // Convergence is quadratic, so we essentially double the number of digits
+ // correct after every iteration. The minimum architected relative
+ // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
+ // 23 digits and double has 52 digits.
+ int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+ if (N->getValueType(0).getScalarType() == MVT::f64)
+ ++Iterations;
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue FPOne =
+ DAG.getConstantFP(1.0, N->getValueType(0).getScalarType());
+ if (N->getValueType(0).isVector()) {
+ assert(N->getValueType(0).getVectorNumElements() == 4 &&
+ "Unknown vector type");
+ FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0),
+ FPOne, FPOne, FPOne, FPOne);
+ }
+
+ SDValue Est = DAG.getNode(PPCISD::FRE, dl,
+ N->getValueType(0),
+ UseOperand ? N->getOperand(1) :
+ SDValue(N, 0));
+ DCI.AddToWorklist(Est.getNode());
+
+ // Newton iterations: Est = Est + Est (1 - Arg * Est)
+ for (int i = 0; i < Iterations; ++i) {
+ SDValue NewEst = DAG.getNode(ISD::FMUL, dl,
+ N->getValueType(0),
+ UseOperand ? N->getOperand(1) :
+ SDValue(N, 0),
+ Est);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FSUB, dl,
+ N->getValueType(0), FPOne, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FMUL, dl,
+ N->getValueType(0), Est, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ Est = DAG.getNode(ISD::FADD, dl,
+ N->getValueType(0), Est, NewEst);
+ DCI.AddToWorklist(Est.getNode());
+ }
+
+ return Est;
+ }
+
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ if (DCI.isAfterLegalizeVectorOps())
+ return SDValue();
+
+ if ((N->getValueType(0) == MVT::f32 && PPCSubTarget.hasFRSQRTES()) ||
+ (N->getValueType(0) == MVT::f64 && PPCSubTarget.hasFRSQRTE()) ||
+ (N->getValueType(0) == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+
+ // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+ // For the reciprocal sqrt, we need to find the zero of the function:
+ // F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+ // =>
+ // X_{i+1} = X_i (1.5 - A X_i^2 / 2)
+ // As a result, we precompute A/2 prior to the iteration loop.
+
+ // Convergence is quadratic, so we essentially double the number of digits
+ // correct after every iteration. The minimum architected relative
+ // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
+ // 23 digits and double has 52 digits.
+ int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+ if (N->getValueType(0).getScalarType() == MVT::f64)
+ ++Iterations;
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue FPThreeHalfs =
+ DAG.getConstantFP(1.5, N->getValueType(0).getScalarType());
+ if (N->getValueType(0).isVector()) {
+ assert(N->getValueType(0).getVectorNumElements() == 4 &&
+ "Unknown vector type");
+ FPThreeHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0),
+ FPThreeHalfs, FPThreeHalfs,
+ FPThreeHalfs, FPThreeHalfs);
+ }
+
+ SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl,
+ N->getValueType(0), N->getOperand(0));
+ DCI.AddToWorklist(Est.getNode());
+
+ // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that
+ // this entire sequence requires only one FP constant.
+ SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ FPThreeHalfs, N->getOperand(0));
+ DCI.AddToWorklist(HalfArg.getNode());
+
+ HalfArg = DAG.getNode(ISD::FSUB, dl, N->getValueType(0),
+ HalfArg, N->getOperand(0));
+ DCI.AddToWorklist(HalfArg.getNode());
+
+ // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
+ for (int i = 0; i < Iterations; ++i) {
+ SDValue NewEst = DAG.getNode(ISD::FMUL, dl,
+ N->getValueType(0), Est, Est);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FMUL, dl,
+ N->getValueType(0), HalfArg, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FSUB, dl,
+ N->getValueType(0), FPThreeHalfs, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ Est = DAG.getNode(ISD::FMUL, dl,
+ N->getValueType(0), Est, NewEst);
+ DCI.AddToWorklist(Est.getNode());
+ }
+
+ return Est;
+ }
+
+ return SDValue();
+}
+
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
const TargetMachine &TM = getTargetMachine();
return N->getOperand(0);
}
break;
+ case ISD::FDIV: {
+ assert(TM.Options.UnsafeFPMath &&
+ "Reciprocal estimates require UnsafeFPMath");
+
+ if (N->getOperand(1).getOpcode() == ISD::FSQRT) {
+ SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(1).getNode(), DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+ }
+
+ SDValue RV = DAGCombineFastRecip(N, DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+
+ }
+ break;
+ case ISD::FSQRT: {
+ assert(TM.Options.UnsafeFPMath &&
+ "Reciprocal estimates require UnsafeFPMath");
+
+ // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
+ // reciprocal sqrt.
+ SDValue RV = DAGCombineFastRecipFSQRT(N, DCI);
+ if (RV.getNode() != 0) {
+ DCI.AddToWorklist(RV.getNode());
+ RV = DAGCombineFastRecip(RV.getNode(), DCI, false);
+ if (RV.getNode() != 0)
+ return RV;
+ }
+ }
+ break;
case ISD::SINT_TO_FP:
if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
/// unsigned integers.
FCTIDUZ, FCTIWUZ,
+ /// Reciprocal estimate instructions (unary FP ops).
+ FRE, FRSQRTE,
+
// VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
// three v4f32 operands and producing a v4f32 result.
VMADDFP, VNMSUBFP,
SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue DAGCombineFastRecip(SDNode *N, DAGCombinerInfo &DCI,
+ bool UseOperand = true) const;
+ SDValue DAGCombineFastRecipFSQRT(SDNode *N, DAGCombinerInfo &DCI) const;
};
}
def : Pat<(PPCvperm (v16i8 VRRC:$vA), VRRC:$vB, VRRC:$vC),
(VPERM $vA, $vB, $vC)>;
+def : Pat<(PPCfre v4f32:$A), (VREFP $A)>;
+def : Pat<(PPCfrsqrte v4f32:$A), (VRSQRTEFP $A)>;
+
// Vector shifts
def : Pat<(v16i8 (shl v16i8:$vA, v16i8:$vB)),
(v16i8 (VSLB $vA, $vB))>;
// PowerPC specific DAG Nodes.
//
+def PPCfre : SDNode<"PPCISD::FRE", SDTFPUnaryOp, []>;
+def PPCfrsqrte: SDNode<"PPCISD::FRSQRTE", SDTFPUnaryOp, []>;
+
def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>;
def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>;
def PPCfcfids : SDNode<"PPCISD::FCFIDS", SDTFPRoundOp, []>;
def FNEGD : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB),
"fneg $frD, $frB", FPGeneral,
[(set f64:$frD, (fneg f64:$frB))]>;
+
+// Reciprocal estimates.
+def FRE : XForm_26<63, 24, (outs F8RC:$frD), (ins F8RC:$frB),
+ "fre $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfre f64:$frB))]>;
+def FRES : XForm_26<59, 24, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fres $frD, $frB", FPGeneral,
+ [(set f32:$frD, (PPCfre f32:$frB))]>;
+def FRSQRTE : XForm_26<63, 26, (outs F8RC:$frD), (ins F8RC:$frB),
+ "frsqrte $frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfrsqrte f64:$frB))]>;
+def FRSQRTES : XForm_26<59, 26, (outs F4RC:$frD), (ins F4RC:$frB),
+ "frsqrtes $frD, $frB", FPGeneral,
+ [(set f32:$frD, (PPCfrsqrte f32:$frB))]>;
}
-
// XL-Form instructions. condition register logical ops.
//
def : Pat<(atomic_fence (imm), (imm)), (SYNC)>;
+// Additional FNMSUB patterns: -a*c + b == -(a*c - b)
+def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
+ (FNMSUB $A, $C, $B)>;
+def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B),
+ (FNMSUB $A, $C, $B)>;
+def : Pat<(fma (fneg f32:$A), f32:$C, f32:$B),
+ (FNMSUBS $A, $C, $B)>;
+def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B),
+ (FNMSUBS $A, $C, $B)>;
+
include "PPCInstrAltivec.td"
include "PPCInstr64Bit.td"
, HasAltivec(false)
, HasQPX(false)
, HasFSQRT(false)
+ , HasFRE(false)
+ , HasFRES(false)
+ , HasFRSQRTE(false)
+ , HasFRSQRTES(false)
+ , HasRecipPrec(false)
, HasSTFIWX(false)
, HasLFIWAX(false)
, HasFPRND(false)
bool HasAltivec;
bool HasQPX;
bool HasFSQRT;
+ bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
+ bool HasRecipPrec;
bool HasSTFIWX;
bool HasLFIWAX;
bool HasFPRND;
// Specific obvious features.
bool hasFSQRT() const { return HasFSQRT; }
+ bool hasFRE() const { return HasFRE; }
+ bool hasFRES() const { return HasFRES; }
+ bool hasFRSQRTE() const { return HasFRSQRTE; }
+ bool hasFRSQRTES() const { return HasFRSQRTES; }
+ bool hasRecipPrec() const { return HasRecipPrec; }
bool hasSTFIWX() const { return HasSTFIWX; }
bool hasLFIWAX() const { return HasLFIWAX; }
bool hasFPRND() const { return HasFPRND; }
--- /dev/null
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck -check-prefix=CHECK-SAFE %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare double @llvm.sqrt.f64(double)
+declare float @llvm.sqrt.f32(float)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
+
+define double @foo(double %a, double %b) nounwind {
+entry:
+ %x = call double @llvm.sqrt.f64(double %b)
+ %r = fdiv double %a, %x
+ ret double %r
+
+; CHECK: @foo
+; CHECK: frsqrte
+; CHECK: fnmsub
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: blr
+
+; CHECK-SAFE: @foo
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define float @goo(float %a, float %b) nounwind {
+entry:
+ %x = call float @llvm.sqrt.f32(float %b)
+ %r = fdiv float %a, %x
+ ret float %r
+
+; CHECK: @goo
+; CHECK: frsqrtes
+; CHECK: fnmsubs
+; CHECK: fmuls
+; CHECK: fmadds
+; CHECK: fmuls
+; CHECK: fmuls
+; CHECK: blr
+
+; CHECK-SAFE: @goo
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @hoo(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+ %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
+ %r = fdiv <4 x float> %a, %x
+ ret <4 x float> %r
+
+; CHECK: @hoo
+; CHECK: vrsqrtefp
+
+; CHECK-SAFE: @hoo
+; CHECK-SAFE-NOT: vrsqrtefp
+; CHECK-SAFE: blr
+}
+
+define double @foo2(double %a, double %b) nounwind {
+entry:
+ %r = fdiv double %a, %b
+ ret double %r
+
+; CHECK: @foo2
+; CHECK: fre
+; CHECK: fnmsub
+; CHECK: fmadd
+; CHECK: fnmsub
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: blr
+
+; CHECK-SAFE: @foo2
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define float @goo2(float %a, float %b) nounwind {
+entry:
+ %r = fdiv float %a, %b
+ ret float %r
+
+; CHECK: @goo2
+; CHECK: fres
+; CHECK: fnmsubs
+; CHECK: fmadds
+; CHECK: fmuls
+; CHECK: blr
+
+; CHECK-SAFE: @goo2
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @hoo2(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+ %r = fdiv <4 x float> %a, %b
+ ret <4 x float> %r
+
+; CHECK: @hoo2
+; CHECK: vrefp
+
+; CHECK-SAFE: @hoo2
+; CHECK-SAFE-NOT: vrefp
+; CHECK-SAFE: blr
+}
+
+define double @foo3(double %a) nounwind {
+entry:
+ %r = call double @llvm.sqrt.f64(double %a)
+ ret double %r
+
+; CHECK: @foo3
+; CHECK: frsqrte
+; CHECK: fnmsub
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmadd
+; CHECK: fmul
+; CHECK: fre
+; CHECK: fnmsub
+; CHECK: fmadd
+; CHECK: fnmsub
+; CHECK: fmadd
+; CHECK: blr
+
+; CHECK-SAFE: @foo3
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: blr
+}
+
+define float @goo3(float %a) nounwind {
+entry:
+ %r = call float @llvm.sqrt.f32(float %a)
+ ret float %r
+
+; CHECK: @goo3
+; CHECK: frsqrtes
+; CHECK: fnmsubs
+; CHECK: fmuls
+; CHECK: fmadds
+; CHECK: fmuls
+; CHECK: fres
+; CHECK: fnmsubs
+; CHECK: fmadds
+; CHECK: blr
+
+; CHECK-SAFE: @goo3
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @hoo3(<4 x float> %a) nounwind {
+entry:
+ %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
+ ret <4 x float> %r
+
+; CHECK: @hoo3
+; CHECK: vrsqrtefp
+; CHECK: vrefp
+
+; CHECK-SAFE: @hoo3
+; CHECK-SAFE-NOT: vrsqrtefp
+; CHECK-SAFE: blr
+}
+