Convert the PPC backend to use the new FMA infrastructure.

author Hal Finkel <hfinkel@anl.gov>

Fri, 22 Jun 2012 00:49:52 +0000 (00:49 +0000)

committer Hal Finkel <hfinkel@anl.gov>

Fri, 22 Jun 2012 00:49:52 +0000 (00:49 +0000)
author Hal Finkel <hfinkel@anl.gov>
Fri, 22 Jun 2012 00:49:52 +0000 (00:49 +0000)
committer Hal Finkel <hfinkel@anl.gov>
Fri, 22 Jun 2012 00:49:52 +0000 (00:49 +0000)
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index dc50d860a87d84269886bc70128b43e415e7c68f..e88c3fdbb505c94f7aec880a5a2a7a6c04b3635d 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -132,12 +132,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    setOperationAction(ISD::FCOS , MVT::f64, Expand);
    setOperationAction(ISD::FREM , MVT::f64, Expand);
    setOperationAction(ISD::FPOW , MVT::f64, Expand);
-  setOperationAction(ISD::FMA  , MVT::f64, Expand);
+  setOperationAction(ISD::FMA  , MVT::f64, Legal);
    setOperationAction(ISD::FSIN , MVT::f32, Expand);
    setOperationAction(ISD::FCOS , MVT::f32, Expand);
    setOperationAction(ISD::FREM , MVT::f32, Expand);
    setOperationAction(ISD::FPOW , MVT::f32, Expand);
-  setOperationAction(ISD::FMA  , MVT::f32, Expand);
+  setOperationAction(ISD::FMA  , MVT::f32, Legal);
  
    setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
  
@@ -378,6 +378,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
      addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
  
      setOperationAction(ISD::MUL, MVT::v4f32, Legal);
+    setOperationAction(ISD::FMA, MVT::v4f32, Legal);
      setOperationAction(ISD::MUL, MVT::v4i32, Custom);
      setOperationAction(ISD::MUL, MVT::v8i16, Custom);
      setOperationAction(ISD::MUL, MVT::v16i8, Custom);
@@ -5876,6 +5877,26 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
    }
  }
  
+/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+/// is expanded to mul + add.
+bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
+  if (!VT.isSimple())
+    return false;
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::f32:
+  case MVT::f64:
+  case MVT::v4f32:
+    return true;
+  default:
+    break;
+  }
+
+  return false;
+}
+
  Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
    if (DisableILPPref)
      return TargetLowering::getSchedulingPreference(N);
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h

index 973800b461212837f8e9cc53cd95e6afc1ae6258..b0a013b4b4cf9859c2051bcd92a76f6f084bdf66 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -366,6 +366,12 @@ namespace llvm {
                          bool IsZeroVal, bool MemcpyStrSrc,
                          MachineFunction &MF) const;
  
+    /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+    /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+    /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+    /// is expanded to mul + add.
+    virtual bool isFMAFasterThanMulAndAdd(EVT VT) const;
+
    private:
      SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
      SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td

index 6c0f3d3f06e5fa37f8b66938619f9df616d5b4b8..b0b842328196759091826494383df27716a17389 100644 (file)
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -274,15 +274,11 @@ let PPC970_Unit = 5 in {  // VALU Operations.
  // VA-Form instructions.  3-input AltiVec ops.
  def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
                         "vmaddfp $vD, $vA, $vC, $vB", VecFP,
-                       [(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC),
-                                             VRRC:$vB))]>,
-                       Requires<[FPContractions]>;
+                       [(set VRRC:$vD, (fma VRRC:$vA, VRRC:$vC, VRRC:$vB))]>;
  def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
                         "vnmsubfp $vD, $vA, $vC, $vB", VecFP,
-                       [(set VRRC:$vD, (fsub V_immneg0,
-                                             (fsub (fmul VRRC:$vA, VRRC:$vC),
-                                                   VRRC:$vB)))]>,
-                       Requires<[FPContractions]>;
+                       [(set VRRC:$vD, (fneg (fma VRRC:$vA, VRRC:$vC,
+                                                  (fneg VRRC:$vB))))]>; 
  
  def VMHADDSHS  : VA1a_Int<32, "vmhaddshs",  int_ppc_altivec_vmhaddshs>;
  def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td

index 543038335e9d455eb7db43dc886b837f1f1ebd08..29f7875a7927f73dbcad8e142fde43435ca808fe 100644 (file)
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -353,7 +353,6 @@ def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>;
  
  //===----------------------------------------------------------------------===//
  // PowerPC Instruction Predicate Definitions.
-def FPContractions : Predicate<"TM.Options.AllowExcessFPPrecision">;
  def In32BitMode  : Predicate<"!PPCSubTarget.isPPC64()">;
  def In64BitMode  : Predicate<"PPCSubTarget.isPPC64()">;
  def IsBookE  : Predicate<"PPCSubTarget.isBookE()">;
@@ -1312,51 +1311,43 @@ let Uses = [RM] in {
    def FMADD : AForm_1<63, 29, 
                        (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
                        "fmadd $FRT, $FRA, $FRC, $FRB", FPFused,
-                      [(set F8RC:$FRT, (fadd (fmul F8RC:$FRA, F8RC:$FRC),
-                                             F8RC:$FRB))]>,
-                      Requires<[FPContractions]>;
+                      [(set F8RC:$FRT,
+                            (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB))]>;
    def FMADDS : AForm_1<59, 29,
                        (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
                        "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
-                      [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC),
-                                             F4RC:$FRB))]>,
-                      Requires<[FPContractions]>;
+                      [(set F4RC:$FRT,
+                            (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB))]>;
    def FMSUB : AForm_1<63, 28,
                        (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
                        "fmsub $FRT, $FRA, $FRC, $FRB", FPFused,
-                      [(set F8RC:$FRT, (fsub (fmul F8RC:$FRA, F8RC:$FRC),
-                                             F8RC:$FRB))]>,
-                      Requires<[FPContractions]>;
+                      [(set F8RC:$FRT,
+                            (fma F8RC:$FRA, F8RC:$FRC, (fneg F8RC:$FRB)))]>;
    def FMSUBS : AForm_1<59, 28,
                        (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
                        "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
-                      [(set F4RC:$FRT, (fsub (fmul F4RC:$FRA, F4RC:$FRC),
-                                             F4RC:$FRB))]>,
-                      Requires<[FPContractions]>;
+                      [(set F4RC:$FRT,
+                            (fma F4RC:$FRA, F4RC:$FRC, (fneg F4RC:$FRB)))]>;
    def FNMADD : AForm_1<63, 31,
                        (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
                        "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
-                      [(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC),
-                                                   F8RC:$FRB)))]>,
-                      Requires<[FPContractions]>;
+                      [(set F8RC:$FRT,
+                            (fneg (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB)))]>;
    def FNMADDS : AForm_1<59, 31,
                        (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
                        "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
-                      [(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC),
-                                                   F4RC:$FRB)))]>,
-                      Requires<[FPContractions]>;
+                      [(set F4RC:$FRT,
+                            (fneg (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB)))]>;
    def FNMSUB : AForm_1<63, 30,
                        (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
                        "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
-                      [(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC),
-                                                   F8RC:$FRB)))]>,
-                      Requires<[FPContractions]>;
+                      [(set F8RC:$FRT, (fneg (fma F8RC:$FRA, F8RC:$FRC,
+                                                  (fneg F8RC:$FRB))))]>;
    def FNMSUBS : AForm_1<59, 30,
                        (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
                        "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
-                      [(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC),
-                                                   F4RC:$FRB)))]>,
-                      Requires<[FPContractions]>;
+                      [(set F4RC:$FRT, (fneg (fma F4RC:$FRA, F4RC:$FRC,
+                                                  (fneg F4RC:$FRB))))]>;
  }
  // FSEL is artificially split into 4 and 8-byte forms for the result.  To avoid
  // having 4 of these, force the comparison to always be an 8-byte double (code
@@ -1517,14 +1508,6 @@ def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)),
  def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)),
            (ADDIS GPRC:$in, tblockaddress:$g)>;
  
-// Fused negative multiply subtract, alternate pattern
-def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)),
-          (FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>,
-          Requires<[FPContractions]>;
-def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)),
-          (FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>,
-          Requires<[FPContractions]>;
-
  // Standard shifts.  These are represented separately from the real shifts above
  // so that we can distinguish between shifts that allow 5-bit and 6-bit shift
  // amounts.
author	Hal Finkel <hfinkel@anl.gov>
	Fri, 22 Jun 2012 00:49:52 +0000 (00:49 +0000)
committer	Hal Finkel <hfinkel@anl.gov>
	Fri, 22 Jun 2012 00:49:52 +0000 (00:49 +0000)
lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCISelLowering.h		patch \| blob \| history
lib/Target/PowerPC/PPCInstrAltivec.td		patch \| blob \| history
lib/Target/PowerPC/PPCInstrInfo.td		patch \| blob \| history