DAGCombiner: Allow the DAGCombiner to combine multiple FDIVs with the same divisor...

author Hao Liu <Hao.Liu@arm.com>

Fri, 21 Nov 2014 06:39:58 +0000 (06:39 +0000)

committer Hao Liu <Hao.Liu@arm.com>

Fri, 21 Nov 2014 06:39:58 +0000 (06:39 +0000)
author Hao Liu <Hao.Liu@arm.com>
Fri, 21 Nov 2014 06:39:58 +0000 (06:39 +0000)
committer Hao Liu <Hao.Liu@arm.com>
Fri, 21 Nov 2014 06:39:58 +0000 (06:39 +0000)
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h

index 882dab40e4199096d9a9536b2488a007bb5272c4..c0f2395eca67a656ad56706dd9084fa8c019f463 100644 (file)
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -2652,6 +2652,12 @@ public:
      return SDValue();
    }
  
+  /// Indicate whether this target prefers to combine the given number of FDIVs
+  /// with the same divisor.
+  virtual bool combineRepeatedFPDivisors(unsigned NumUsers) const {
+    return false;
+  }
+
    /// Hooks for building estimates in place of slower divisions and square
    /// roots.
    
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index a1291ed6e491ddc8c2820b236bffe175067b9e88..860423a963bc000de865eae79bcec63dc433166e 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7104,6 +7104,44 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
      }
    }
  
+  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
+  // reciprocal.
+  // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
+  // Notice that this is not always beneficial. One reason is different target
+  // may have different costs for FDIV and FMUL, so sometimes the cost of two
+  // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
+  // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
+  if (Options.UnsafeFPMath) {
+    // Skip if current node is a reciprocal.
+    if (N0CFP && N0CFP->isExactlyValue(1.0))
+      return SDValue();
+
+    SmallVector<SDNode *, 4> Users;
+    // Find all FDIV users of the same divisor.
+    for (SDNode::use_iterator UI = N1.getNode()->use_begin(),
+                              UE = N1.getNode()->use_end();
+         UI != UE; ++UI) {
+      SDNode *User = UI.getUse().getUser();
+      if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1)
+        Users.push_back(User);
+    }
+
+    if (TLI.combineRepeatedFPDivisors(Users.size())) {
+      SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0
+      SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1);
+
+      // Dividend / Divisor -> Dividend * Reciprocal
+      for (auto I = Users.begin(), E = Users.end(); I != E; ++I) {
+        if ((*I)->getOperand(0) != FPOne) {
+          SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(*I), VT,
+                                        (*I)->getOperand(0), Reciprocal);
+          DAG.ReplaceAllUsesWith(*I, NewNode.getNode());
+        }
+      }
+      return SDValue();
+    }
+  }
+
    return SDValue();
  }
  
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index 7c94d831cbae419f340b248e04bde0eec230ecd3..70cf7599a395d1cc737e9d7544c5adab4dc8124f 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8732,6 +8732,12 @@ bool AArch64TargetLowering::useLoadStackGuardNode() const {
    return true;
  }
  
+bool AArch64TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
+  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
+  // reciprocal if there are three or more FDIVs.
+  return NumUsers > 2;
+}
+
  TargetLoweringBase::LegalizeTypeAction
  AArch64TargetLowering::getPreferredVectorAction(EVT VT) const {
    MVT SVT = VT.getSimpleVT();
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h

index 2f5708dd8978f5d660c1a88fb2d6edd3dd3180be..c76f6a865dce9027965614f6be44a3f2e55817b6 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -440,6 +440,7 @@ private:
  
    SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
                          std::vector<SDNode *> *Created) const override;
+  bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
  
    ConstraintType
    getConstraintType(const std::string &Constraint) const override;
diff --git a/test/CodeGen/AArch64/fdiv-combine.ll b/test/CodeGen/AArch64/fdiv-combine.ll

new file mode 100644 (file)

index 0000000..389eefd
--- /dev/null
+++ b/test/CodeGen/AArch64/fdiv-combine.ll
@@ -0,0 +1,94 @@
+; RUN: llc -march=aarch64 < %s | FileCheck %s
+
+; Following test cases check:
+;   a / D; b / D; c / D;
+;                =>
+;   recip = 1.0 / D; a * recip; b * recip; c * recip;
+define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
+; CHECK-LABEL: three_fdiv_float:
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fdiv
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmul
+  %div = fdiv float %a, %D
+  %div1 = fdiv float %b, %D
+  %div2 = fdiv float %c, %D
+  tail call void @foo_3f(float %div, float %div1, float %div2)
+  ret void
+}
+
+define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
+; CHECK-LABEL: three_fdiv_double:
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fdiv
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmul
+  %div = fdiv double %a, %D
+  %div1 = fdiv double %b, %D
+  %div2 = fdiv double %c, %D
+  tail call void @foo_3d(double %div, double %div1, double %div2)
+  ret void
+}
+
+define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
+; CHECK-LABEL: three_fdiv_4xfloat:
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fdiv
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmul
+  %div = fdiv <4 x float> %a, %D
+  %div1 = fdiv <4 x float> %b, %D
+  %div2 = fdiv <4 x float> %c, %D
+  tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
+  ret void
+}
+
+define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
+; CHECK-LABEL: three_fdiv_2xdouble:
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fdiv
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmul
+  %div = fdiv <2 x double> %a, %D
+  %div1 = fdiv <2 x double> %b, %D
+  %div2 = fdiv <2 x double> %c, %D
+  tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2)
+  ret void
+}
+
+; Following test cases check we never combine two FDIVs if neither of them
+; calculates a reciprocal.
+define void @two_fdiv_float(float %D, float %a, float %b) #0 {
+; CHECK-LABEL: two_fdiv_float:
+; CHECK: fdiv
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fmul
+  %div = fdiv float %a, %D
+  %div1 = fdiv float %b, %D
+  tail call void @foo_2f(float %div, float %div1)
+  ret void
+}
+
+define void @two_fdiv_double(double %D, double %a, double %b) #0 {
+; CHECK-LABEL: two_fdiv_double:
+; CHECK: fdiv
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fmul
+  %div = fdiv double %a, %D
+  %div1 = fdiv double %b, %D
+  tail call void @foo_2d(double %div, double %div1)
+  ret void
+}
+
+declare void @foo_3f(float, float, float)
+declare void @foo_3d(double, double, double)
+declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>)
+declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)
+declare void @foo_2f(float, float)
+declare void @foo_2d(double, double)
+
+attributes #0 = { "unsafe-fp-math"="true" }
author	Hao Liu <Hao.Liu@arm.com>
	Fri, 21 Nov 2014 06:39:58 +0000 (06:39 +0000)
committer	Hao Liu <Hao.Liu@arm.com>
	Fri, 21 Nov 2014 06:39:58 +0000 (06:39 +0000)
include/llvm/Target/TargetLowering.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64ISelLowering.h		patch \| blob \| history
test/CodeGen/AArch64/fdiv-combine.ll	[new file with mode: 0644]	patch \| blob