[PowerPC] Make use of the TargetRecip system

author Hal Finkel <hfinkel@anl.gov>

Sun, 12 Jul 2015 02:33:57 +0000 (02:33 +0000)

committer Hal Finkel <hfinkel@anl.gov>

Sun, 12 Jul 2015 02:33:57 +0000 (02:33 +0000)
author Hal Finkel <hfinkel@anl.gov>
Sun, 12 Jul 2015 02:33:57 +0000 (02:33 +0000)
committer Hal Finkel <hfinkel@anl.gov>
Sun, 12 Jul 2015 02:33:57 +0000 (02:33 +0000)
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index ceda29103773231f9aadf9e67313f2656ead2f7f..0ed9b051ffedaa0c9483de53db18bade0c9049bd 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9067,6 +9067,19 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
  // Target Optimization Hooks
  //===----------------------------------------------------------------------===//
  
+static std::string getRecipOp(const char *Base, EVT VT) {
+  std::string RecipOp(Base);
+  if (VT.getScalarType() == MVT::f64)
+    RecipOp += "d";
+  else
+    RecipOp += "f";
+
+  if (VT.isVector())
+    RecipOp = "vec-" + RecipOp;
+
+  return RecipOp;
+}
+
  SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
                                              DAGCombinerInfo &DCI,
                                              unsigned &RefinementSteps,
@@ -9078,13 +9091,12 @@ SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
        (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
        (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
        (VT == MVT::v4f64 && Subtarget.hasQPX())) {
-    // Convergence is quadratic, so we essentially double the number of digits
-    // correct after every iteration. For both FRE and FRSQRTE, the minimum
-    // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
-    // 2^-14. IEEE float has 23 digits and double has 52 digits.
-    RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
-    if (VT.getScalarType() == MVT::f64)
-      ++RefinementSteps;
+    TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+    std::string RecipOp = getRecipOp("sqrt", VT);
+    if (!Recips.isEnabled(RecipOp))
+      return SDValue();
+
+    RefinementSteps = Recips.getRefinementSteps(RecipOp);
      UseOneConstNR = true;
      return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
    }
@@ -9101,13 +9113,12 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
        (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
        (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
        (VT == MVT::v4f64 && Subtarget.hasQPX())) {
-    // Convergence is quadratic, so we essentially double the number of digits
-    // correct after every iteration. For both FRE and FRSQRTE, the minimum
-    // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
-    // 2^-14. IEEE float has 23 digits and double has 52 digits.
-    RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
-    if (VT.getScalarType() == MVT::f64)
-      ++RefinementSteps;
+    TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+    std::string RecipOp = getRecipOp("div", VT);
+    if (!Recips.isEnabled(RecipOp))
+      return SDValue();
+
+    RefinementSteps = Recips.getRefinementSteps(RecipOp);
      return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
    }
    return SDValue();
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp

index 074bc870751ac75113628b4efb72cd6b83789891..1daf244fed448818ff541c50371077668df4b47d 100644 (file)
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -172,7 +172,26 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
      : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
                          computeFSAdditions(FS, OL, TT), Options, RM, CM, OL),
        TLOF(createTLOF(getTargetTriple())),
-      TargetABI(computeTargetABI(TT, Options)) {
+      TargetABI(computeTargetABI(TT, Options)),
+      Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) {
+
+  // For the estimates, convergence is quadratic, so we essentially double the
+  // number of digits correct after every iteration. For both FRE and FRSQRTE,
+  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
+  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
+  unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
+           RefinementSteps64 = RefinementSteps + 1;
+
+  this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps);
+  this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps);
+  this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps);
+  this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps);
+
+  this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64);
+  this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64);
+  this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64);
+  this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64);
+
    initAsmInfo();
  }
  
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h

index 5c0f7e629a69c5912c9700148b9d33e6f46cf8eb..6496339519a1da089edfd58638ee221dc8c0bd1f 100644 (file)
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -29,6 +29,8 @@ public:
  private:
    std::unique_ptr<TargetLoweringObjectFile> TLOF;
    PPCABI TargetABI;
+  PPCSubtarget Subtarget;
+
    mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap;
  
  public:
diff --git a/test/CodeGen/PowerPC/recipest.ll b/test/CodeGen/PowerPC/recipest.ll

index cd77548b281bf499366bf7218efcd32fb6737826..41dcb0f5b3fcb9c77b759e6b1da8a2f46aed0837 100644 (file)
--- a/test/CodeGen/PowerPC/recipest.ll
+++ b/test/CodeGen/PowerPC/recipest.ll
@@ -1,4 +1,5 @@
  ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math -mattr=-vsx | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math -mattr=-vsx -recip=sqrtf:0,sqrtd:0 | FileCheck %s -check-prefix=CHECK-NONR
  ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck -check-prefix=CHECK-SAFE %s
  target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
  target triple = "powerpc64-unknown-linux-gnu"
@@ -24,6 +25,13 @@ define double @foo(double %a, double %b) nounwind {
  ; CHECK-NEXT: fmul
  ; CHECK: blr
  
+; CHECK-NONR: @foo
+; CHECK-NONR: frsqrte
+; CHECK-NONR-NOT: fmadd
+; CHECK-NONR: fmul
+; CHECK-NONR-NOT: fmadd
+; CHECK-NONR: blr
+
  ; CHECK-SAFE: @foo
  ; CHECK-SAFE: fsqrt
  ; CHECK-SAFE: fdiv
@@ -90,6 +98,13 @@ define float @goo(float %a, float %b) nounwind {
  ; CHECK-NEXT: fmuls
  ; CHECK-NEXT: blr
  
+; CHECK-NONR: @goo
+; CHECK-NONR: frsqrtes
+; CHECK-NONR-NOT: fmadds
+; CHECK-NONR: fmuls
+; CHECK-NONR-NOT: fmadds
+; CHECK-NONR: blr
+
  ; CHECK-SAFE: @goo
  ; CHECK-SAFE: fsqrts
  ; CHECK-SAFE: fdivs
author	Hal Finkel <hfinkel@anl.gov>
	Sun, 12 Jul 2015 02:33:57 +0000 (02:33 +0000)
committer	Hal Finkel <hfinkel@anl.gov>
	Sun, 12 Jul 2015 02:33:57 +0000 (02:33 +0000)
lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCTargetMachine.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCTargetMachine.h		patch \| blob \| history
test/CodeGen/PowerPC/recipest.ll		patch \| blob \| history