From 815d6dac1ecc6ad661cebb308f9c06583fcd3cf0 Mon Sep 17 00:00:00 2001
From: Nate Begeman <natebegeman@mac.com>
Date: Wed, 6 Apr 2005 00:25:27 +0000
Subject: [PATCH] Add support for MULHS and MULHU nodes Have LegalizeDAG handle
 SREM and UREM for us Codegen SDIV and UDIV by constant as a multiply by magic
 constant instead of integer divide, which is very slow.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@21104 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/PowerPC/PPC64ISelPattern.cpp |   1 -
 lib/Target/PowerPC/PPCISelPattern.cpp   | 212 +++++++++++++++++++++---
 lib/Target/PowerPC/PPCInstrInfo.td      |   2 +
 3 files changed, 194 insertions(+), 21 deletions(-)
diff --git a/lib/Target/PowerPC/PPC64ISelPattern.cpp b/lib/Target/PowerPC/PPC64ISelPattern.cpp
index 5f5ba5c0ce6..cbff20f866d 100644
--- a/lib/Target/PowerPC/PPC64ISelPattern.cpp
+++ b/lib/Target/PowerPC/PPC64ISelPattern.cpp
@@ -1069,7 +1069,6 @@ unsigned ISel::SelectExpr(SDOperand N) {
     MVT::ValueType TypeBeingLoaded = (ISD::LOAD == opcode) ?
       Node->getValueType(0) : cast<MVTSDNode>(Node)->getExtraValueType();
     bool sext = (ISD::SEXTLOAD == opcode);
-    bool byte = (MVT::i8 == TypeBeingLoaded);
     
     // Make sure we generate both values.
     if (Result != 1)
diff --git a/lib/Target/PowerPC/PPCISelPattern.cpp b/lib/Target/PowerPC/PPCISelPattern.cpp
index dd1a4f331bf..ee0303c2186 100644
--- a/lib/Target/PowerPC/PPCISelPattern.cpp
+++ b/lib/Target/PowerPC/PPCISelPattern.cpp
@@ -8,6 +8,8 @@
 //===----------------------------------------------------------------------===//
 //
 // This file defines a pattern matching instruction selector for 32 bit PowerPC.
+// Magic number generation for integer divide from the PowerPC Compiler Writer's
+// Guide, section 3.2.3.5
 //
 //===----------------------------------------------------------------------===//
 
@@ -54,6 +56,10 @@ namespace {
       // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
       setOperationAction(ISD::SEXTLOAD, MVT::i1, Expand);
       setOperationAction(ISD::SEXTLOAD, MVT::i8, Expand);
+      
+      // PowerPC has no SREM/UREM instructions
+      setOperationAction(ISD::SREM, MVT::i32, Expand);
+      setOperationAction(ISD::UREM, MVT::i32, Expand);
 
       setShiftAmountFlavor(Extend);   // shl X, 32 == 0
       addLegalFPImmediate(+0.0); // Necessary for FSEL
@@ -439,9 +445,10 @@ Statistic<>FusedFP("ppc-codegen", "Number of fused fp operations");
 /// SelectionDAG operations.
 //===--------------------------------------------------------------------===//
 class ISel : public SelectionDAGISel {
-  
-  /// Comment Here.
   PPC32TargetLowering PPC32Lowering;
+  SelectionDAG *ISelDAG;  // Hack to support us having a dag->dag transform
+                          // for sdiv and udiv until it is put into the future
+                          // dag combiner.
   
   /// ExprMap - As shared expressions are codegen'd, we keep track of which
   /// vreg the value is produced in, so we only emit one copy of each compiled
@@ -452,8 +459,8 @@ class ISel : public SelectionDAGISel {
   bool GlobalBaseInitialized;
   
 public:
-  ISel(TargetMachine &TM) : SelectionDAGISel(PPC32Lowering), PPC32Lowering(TM) 
-  {}
+  ISel(TargetMachine &TM) : SelectionDAGISel(PPC32Lowering), PPC32Lowering(TM),
+                            ISelDAG(0) {}
   
   /// runOnFunction - Override this function in order to reset our per-function
   /// variables.
@@ -468,11 +475,17 @@ public:
   virtual void InstructionSelectBasicBlock(SelectionDAG &DAG) {
     DEBUG(BB->dump());
     // Codegen the basic block.
+    ISelDAG = &DAG;
     Select(DAG.getRoot());
     
     // Clear state used for selection.
     ExprMap.clear();
+    ISelDAG = 0;
   }
+
+  // dag -> dag expanders for integer divide by constant
+  SDOperand BuildSDIVSequence(SDOperand N);
+  SDOperand BuildUDIVSequence(SDOperand N);
   
   unsigned getGlobalBaseReg();
   unsigned getConstDouble(double floatVal, unsigned Result);
@@ -504,7 +517,8 @@ static unsigned ExactLog2(unsigned Val) {
 /// of 1 indicates that the constant may be used in normal immediate form.  A
 /// return value of 2 indicates that the constant may be used in shifted
 /// immediate form.  A return value of 3 indicates that log base 2 of the
-/// constant may be used.
+/// constant may be used.  A return value of 4 indicates that the constant is
+/// suitable for conversion into a magic number for integer division.
 ///
 static unsigned getImmediateForOpcode(SDOperand N, unsigned Opcode,
                                       unsigned& Imm, bool U = false) {
@@ -534,6 +548,10 @@ static unsigned getImmediateForOpcode(SDOperand N, unsigned Opcode,
     break;
   case ISD::SDIV:
     if ((Imm = ExactLog2(v))) { return 3; }
+    if (v <= -2 || v >= 2) { return 4; }
+    break;
+  case ISD::UDIV:
+    if (v != 0) { return 4; }
     break;
   }
   return 0;
@@ -574,6 +592,156 @@ static unsigned IndexedOpForOp(unsigned Opcode) {
   }
   return 0;
 }
+
+// Structure used to return the necessary information to codegen an SDIV as 
+// a multiply.
+struct ms {
+  int m; // magic number
+  int s; // shift amount
+};
+
+struct mu {
+  unsigned int m; // magic number
+  int a;          // add indicator
+  int s;          // shift amount
+};
+
+/// magic - calculate the magic numbers required to codegen an integer sdiv as
+/// a sequence of multiply and shifts.  Requires that the divisor not be 0, 1, 
+/// or -1.
+static struct ms magic(int d) {
+  int p;
+  unsigned int ad, anc, delta, q1, r1, q2, r2, t;
+  const unsigned int two31 = 2147483648U; // 2^31
+  struct ms mag;
+  
+  ad = abs(d);
+  t = two31 + ((unsigned int)d >> 31);
+  anc = t - 1 - t%ad;   // absolute value of nc
+  p = 31;               // initialize p
+  q1 = two31/anc;       // initialize q1 = 2p/abs(nc)
+  r1 = two31 - q1*anc;  // initialize r1 = rem(2p,abs(nc))
+  q2 = two31/ad;        // initialize q2 = 2p/abs(d)
+  r2 = two31 - q2*ad;   // initialize r2 = rem(2p,abs(d))
+  do {
+    p = p + 1;
+    q1 = 2*q1;        // update q1 = 2p/abs(nc)
+    r1 = 2*r1;        // update r1 = rem(2p/abs(nc))
+    if (r1 >= anc) {  // must be unsigned comparison
+      q1 = q1 + 1;
+      r1 = r1 - anc;
+    }
+    q2 = 2*q2;        // update q2 = 2p/abs(d)
+    r2 = 2*r2;        // update r2 = rem(2p/abs(d))
+    if (r2 >= ad) {   // must be unsigned comparison
+      q2 = q2 + 1;
+      r2 = r2 - ad;
+    }
+    delta = ad - r2;
+  } while (q1 < delta || (q1 == delta && r1 == 0));
+
+  mag.m = q2 + 1;
+  if (d < 0) mag.m = -mag.m; // resulting magic number
+  mag.s = p - 32;            // resulting shift
+  return mag;
+}
+
+/// magicu - calculate the magic numbers required to codegen an integer udiv as
+/// a sequence of multiply, add and shifts.  Requires that the divisor not be 0.
+static struct mu magicu(unsigned d)
+{
+  int p;
+  unsigned int nc, delta, q1, r1, q2, r2;
+  struct mu magu;
+  magu.a = 0;               // initialize "add" indicator
+  nc = - 1 - (-d)%d;
+  p = 31;                   // initialize p
+  q1 = 0x80000000/nc;       // initialize q1 = 2p/nc
+  r1 = 0x80000000 - q1*nc;  // initialize r1 = rem(2p,nc)
+  q2 = 0x7FFFFFFF/d;        // initialize q2 = (2p-1)/d
+  r2 = 0x7FFFFFFF - q2*d;   // initialize r2 = rem((2p-1),d)
+  do {
+    p = p + 1;
+    if (r1 >= nc - r1 ) {
+      q1 = 2*q1 + 1;  // update q1
+      r1 = 2*r1 - nc; // update r1
+    }
+    else {
+      q1 = 2*q1; // update q1
+      r1 = 2*r1; // update r1
+    }
+    if (r2 + 1 >= d - r2) {
+      if (q2 >= 0x7FFFFFFF) magu.a = 1;
+      q2 = 2*q2 + 1;     // update q2
+      r2 = 2*r2 + 1 - d; // update r2
+    }
+    else {
+      if (q2 >= 0x80000000) magu.a = 1;
+      q2 = 2*q2;     // update q2
+      r2 = 2*r2 + 1; // update r2
+    }
+    delta = d - 1 - r2;
+  } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0)));
+  magu.m = q2 + 1; // resulting magic number
+  magu.s = p - 32;  // resulting shift
+  return magu;
+}
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDOperand ISel::BuildSDIVSequence(SDOperand N) {
+  int d = (int)cast<ConstantSDNode>(N.getOperand(1))->getSignExtended();
+  ms magics = magic(d);
+  // Multiply the numerator (operand 0) by the magic value
+  SDOperand Q = ISelDAG->getNode(ISD::MULHS, MVT::i32, N.getOperand(0), 
+                                 ISelDAG->getConstant(magics.m, MVT::i32));
+  // If d > 0 and m < 0, add the numerator
+  if (d > 0 && magics.m < 0)
+    Q = ISelDAG->getNode(ISD::ADD, MVT::i32, Q, N.getOperand(0));
+  // If d < 0 and m > 0, subtract the numerator.
+  if (d < 0 && magics.m > 0)
+    Q = ISelDAG->getNode(ISD::SUB, MVT::i32, Q, N.getOperand(0));
+  // Shift right algebraic if shift value is nonzero
+  if (magics.s > 0)
+    Q = ISelDAG->getNode(ISD::SRA, MVT::i32, Q, 
+                         ISelDAG->getConstant(magics.s, MVT::i32));
+  // Extract the sign bit and add it to the quotient
+  SDOperand T = 
+    ISelDAG->getNode(ISD::SRL, MVT::i32, Q, ISelDAG->getConstant(31, MVT::i32));
+  Q = ISelDAG->getNode(ISD::ADD, MVT::i32, Q, T);
+  // Compute the remainder
+  T = ISelDAG->getNode(ISD::MUL, MVT::i32, Q, N.getOperand(1));
+  return ISelDAG->getNode(ISD::SUB, MVT::i32, N.getOperand(0), T);
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDOperand ISel::BuildUDIVSequence(SDOperand N) {
+  unsigned d = 
+    (unsigned)cast<ConstantSDNode>(N.getOperand(1))->getSignExtended();
+  mu magics = magicu(d);
+  // Multiply the numerator (operand 0) by the magic value
+  SDOperand Q = ISelDAG->getNode(ISD::MULHU, MVT::i32, N.getOperand(0), 
+                                 ISelDAG->getConstant(magics.m, MVT::i32));
+  if (magics.a == 0) {
+    Q = ISelDAG->getNode(ISD::SRL, MVT::i32, Q, 
+                         ISelDAG->getConstant(magics.s, MVT::i32));
+  } else {
+    SDOperand NPQ = ISelDAG->getNode(ISD::SUB, MVT::i32, N.getOperand(0), Q);
+    NPQ = ISelDAG->getNode(ISD::SRL, MVT::i32, NPQ, 
+                           ISelDAG->getConstant(1, MVT::i32));
+    NPQ = ISelDAG->getNode(ISD::ADD, MVT::i32, NPQ, Q);
+    Q = ISelDAG->getNode(ISD::SRL, MVT::i32, NPQ, 
+                           ISelDAG->getConstant(magics.s-1, MVT::i32));
+  }
+  // Compute the remainder
+  SDOperand T = ISelDAG->getNode(ISD::MUL, MVT::i32, Q, N.getOperand(1));
+  return ISelDAG->getNode(ISD::SUB, MVT::i32, N.getOperand(0), T);
 }
 
 /// getGlobalBaseReg - Output the instructions required to put the
@@ -1091,7 +1259,6 @@ unsigned ISel::SelectExpr(SDOperand N) {
     MVT::ValueType TypeBeingLoaded = (ISD::LOAD == opcode) ?
       Node->getValueType(0) : cast<MVTSDNode>(Node)->getExtraValueType();
     bool sext = (ISD::SEXTLOAD == opcode);
-    bool byte = (MVT::i8 == TypeBeingLoaded);
     
     // Make sure we generate both values.
     if (Result != 1)
@@ -1413,14 +1580,32 @@ unsigned ISel::SelectExpr(SDOperand N) {
     }
     return Result;
 
+  case ISD::MULHS:
+  case ISD::MULHU:
+    Tmp1 = SelectExpr(N.getOperand(0));
+    Tmp2 = SelectExpr(N.getOperand(1));
+    Opc = (ISD::MULHU == opcode) ? PPC::MULHWU : PPC::MULHW;
+    BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
+    return Result;
+
   case ISD::SDIV:
   case ISD::UDIV:
-    if (3 == getImmediateForOpcode(N.getOperand(1), opcode, Tmp3)) {
+    switch (getImmediateForOpcode(N.getOperand(1), opcode, Tmp3)) {
+    default: break;
+    // If this is an sdiv by a power of two, we can use an srawi/addze pair.
+    case 3:
       Tmp1 = MakeReg(MVT::i32);
       Tmp2 = SelectExpr(N.getOperand(0));
       BuildMI(BB, PPC::SRAWI, 2, Tmp1).addReg(Tmp2).addImm(Tmp3);
       BuildMI(BB, PPC::ADDZE, 1, Result).addReg(Tmp1);
       return Result;
+    // If this is a divide by constant, we can emit code using some magic
+    // constants to implement it as a multiply instead.
+    case 4:
+      if (opcode == ISD::SDIV)
+        return SelectExpr(BuildSDIVSequence(N));
+      else
+        return SelectExpr(BuildUDIVSequence(N));
     }
     Tmp1 = SelectExpr(N.getOperand(0));
     Tmp2 = SelectExpr(N.getOperand(1));
@@ -1428,19 +1613,6 @@ unsigned ISel::SelectExpr(SDOperand N) {
     BuildMI(BB, Opc, 2, Result).addReg(Tmp1).addReg(Tmp2);
     return Result;
 
-  case ISD::UREM:
-  case ISD::SREM: {
-    Tmp1 = SelectExpr(N.getOperand(0));
-    Tmp2 = SelectExpr(N.getOperand(1));
-    Tmp3 = MakeReg(MVT::i32);
-    unsigned Tmp4 = MakeReg(MVT::i32);
-    Opc = (ISD::UREM == opcode) ? PPC::DIVWU : PPC::DIVW;
-    BuildMI(BB, Opc, 2, Tmp3).addReg(Tmp1).addReg(Tmp2);
-    BuildMI(BB, PPC::MULLW, 2, Tmp4).addReg(Tmp3).addReg(Tmp2);
-    BuildMI(BB, PPC::SUBF, 2, Result).addReg(Tmp4).addReg(Tmp1);
-    return Result;
-  }
-
   case ISD::ADD_PARTS:
   case ISD::SUB_PARTS: {
     assert(N.getNumOperands() == 4 && N.getValueType() == MVT::i32 &&
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index a75eefc5618..747bbdeb926 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -368,6 +368,8 @@ def DIVW  : XOForm_1<31, 491, 0, 0, 0, 0, (ops GPRC:$rT, GPRC:$rA, GPRC:$rB),
                      "divw $rT, $rA, $rB">;
 def DIVWU : XOForm_1<31, 459, 0, 0, 0, 0, (ops GPRC:$rT, GPRC:$rA, GPRC:$rB),
                      "divwu $rT, $rA, $rB">;
+def MULHW : XOForm_1<31, 75, 0, 0, 0, 0, (ops GPRC:$rT, GPRC:$rA, GPRC:$rB),
+                     "mulhw $rT, $rA, $rB">;
 def MULHWU : XOForm_1<31, 11, 0, 0, 0, 0, (ops GPRC:$rT, GPRC:$rA, GPRC:$rB),
                      "mulhwu $rT, $rA, $rB">;
 def MULLD : XOForm_1<31, 233, 0, 0, 1, 0, (ops GPRC:$rT, GPRC:$rA, GPRC:$rB),
-- 
2.34.1