From 201c9776bd4197569b71fef0519f98a28d1db989 Mon Sep 17 00:00:00 2001
From: Eli Friedman <eli.friedman@gmail.com>
Date: Sun, 30 Nov 2008 06:02:26 +0000
Subject: [PATCH] Fix for PR2164: allow transforming arbitrary-width unsigned
 divides into multiplies.

Some more cleverness would be nice, though. It would be nice if we
could do this transformation on illegal types.  Also, we would
prefer a narrower constant when possible so that we can use a narrower
multiply, which can be cheaper.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@60283 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/SelectionDAG/TargetLowering.cpp   | 160 +++++++-----------
 .../X86/2008-11-29-DivideConstant16bit.ll     |  10 ++
 test/CodeGen/X86/urem-i8-constant.ll          |   2 +-
 3 files changed, 76 insertions(+), 96 deletions(-)
 create mode 100644 test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll

diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 4d8b9be44fb..ed29f134637 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2221,19 +2221,64 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
   return true;
 }
 
-// Magic for divide replacement
+struct mu {
+  APInt m;     // magic number
+  bool a;      // add indicator
+  uint32_t s;  // shift amount
+};
+
+/// magicu - calculate the magic numbers required to codegen an integer udiv as
+/// a sequence of multiply, add and shifts.  Requires that the divisor not be 0.
+static mu magicu(const APInt& d) {
+  unsigned p;
+  APInt nc, delta, q1, r1, q2, r2;
+  struct mu magu;
+  magu.a = 0;               // initialize "add" indicator
+  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
+  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
+  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
+
+  nc = allOnes - (-d).urem(d);
+  p = d.getBitWidth() - 1;  // initialize p
+  q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
+  r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
+  q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
+  r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
+  do {
+    p = p + 1;
+    if (r1.uge(nc - r1)) {
+      q1 = q1 + q1 + 1;  // update q1
+      r1 = r1 + r1 - nc; // update r1
+    }
+    else {
+      q1 = q1+q1; // update q1
+      r1 = r1+r1; // update r1
+    }
+    if ((r2 + 1).uge(d - r2)) {
+      if (q2.uge(signedMax)) magu.a = 1;
+      q2 = q2+q2 + 1;     // update q2
+      r2 = r2+r2 + 1 - d; // update r2
+    }
+    else {
+      if (q2.uge(signedMin)) magu.a = 1;
+      q2 = q2+q2;     // update q2
+      r2 = r2+r2 + 1; // update r2
+    }
+    delta = d - 1 - r2;
+  } while (p < d.getBitWidth()*2 &&
+           (q1.ult(delta) || (q1 == delta && r1 == 0)));
+  magu.m = q2 + 1; // resulting magic number
+  magu.s = p - d.getBitWidth();  // resulting shift
+  return magu;
+}
 
+// Magic for divide replacement
+// FIXME: This should be APInt'ified
 struct ms {
   int64_t m;  // magic number
   int64_t s;  // shift amount
 };
 
-struct mu {
-  uint64_t m; // magic number
-  int64_t a;  // add indicator
-  int64_t s;  // shift amount
-};
-
 /// magic - calculate the magic numbers required to codegen an integer sdiv as
 /// a sequence of multiply and shifts.  Requires that the divisor not be 0, 1,
 /// or -1.
@@ -2274,46 +2319,6 @@ static ms magic32(int32_t d) {
   return mag;
 }
 
-/// magicu - calculate the magic numbers required to codegen an integer udiv as
-/// a sequence of multiply, add and shifts.  Requires that the divisor not be 0.
-static mu magicu32(uint32_t d) {
-  int32_t p;
-  uint32_t nc, delta, q1, r1, q2, r2;
-  struct mu magu;
-  magu.a = 0;               // initialize "add" indicator
-  nc = - 1 - (-d)%d;
-  p = 31;                   // initialize p
-  q1 = 0x80000000/nc;       // initialize q1 = 2p/nc
-  r1 = 0x80000000 - q1*nc;  // initialize r1 = rem(2p,nc)
-  q2 = 0x7FFFFFFF/d;        // initialize q2 = (2p-1)/d
-  r2 = 0x7FFFFFFF - q2*d;   // initialize r2 = rem((2p-1),d)
-  do {
-    p = p + 1;
-    if (r1 >= nc - r1 ) {
-      q1 = 2*q1 + 1;  // update q1
-      r1 = 2*r1 - nc; // update r1
-    }
-    else {
-      q1 = 2*q1; // update q1
-      r1 = 2*r1; // update r1
-    }
-    if (r2 + 1 >= d - r2) {
-      if (q2 >= 0x7FFFFFFF) magu.a = 1;
-      q2 = 2*q2 + 1;     // update q2
-      r2 = 2*r2 + 1 - d; // update r2
-    }
-    else {
-      if (q2 >= 0x80000000) magu.a = 1;
-      q2 = 2*q2;     // update q2
-      r2 = 2*r2 + 1; // update r2
-    }
-    delta = d - 1 - r2;
-  } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0)));
-  magu.m = q2 + 1; // resulting magic number
-  magu.s = p - 32;  // resulting shift
-  return magu;
-}
-
 /// magic - calculate the magic numbers required to codegen an integer sdiv as
 /// a sequence of multiply and shifts.  Requires that the divisor not be 0, 1,
 /// or -1.
@@ -2354,47 +2359,6 @@ static ms magic64(int64_t d) {
   return mag;
 }
 
-/// magicu - calculate the magic numbers required to codegen an integer udiv as
-/// a sequence of multiply, add and shifts.  Requires that the divisor not be 0.
-static mu magicu64(uint64_t d)
-{
-  int64_t p;
-  uint64_t nc, delta, q1, r1, q2, r2;
-  struct mu magu;
-  magu.a = 0;               // initialize "add" indicator
-  nc = - 1 - (-d)%d;
-  p = 63;                   // initialize p
-  q1 = 0x8000000000000000ull/nc;       // initialize q1 = 2p/nc
-  r1 = 0x8000000000000000ull - q1*nc;  // initialize r1 = rem(2p,nc)
-  q2 = 0x7FFFFFFFFFFFFFFFull/d;        // initialize q2 = (2p-1)/d
-  r2 = 0x7FFFFFFFFFFFFFFFull - q2*d;   // initialize r2 = rem((2p-1),d)
-  do {
-    p = p + 1;
-    if (r1 >= nc - r1 ) {
-      q1 = 2*q1 + 1;  // update q1
-      r1 = 2*r1 - nc; // update r1
-    }
-    else {
-      q1 = 2*q1; // update q1
-      r1 = 2*r1; // update r1
-    }
-    if (r2 + 1 >= d - r2) {
-      if (q2 >= 0x7FFFFFFFFFFFFFFFull) magu.a = 1;
-      q2 = 2*q2 + 1;     // update q2
-      r2 = 2*r2 + 1 - d; // update r2
-    }
-    else {
-      if (q2 >= 0x8000000000000000ull) magu.a = 1;
-      q2 = 2*q2;     // update q2
-      r2 = 2*r2 + 1; // update r2
-    }
-    delta = d - 1 - r2;
-  } while (p < 128 && (q1 < delta || (q1 == delta && r1 == 0)));
-  magu.m = q2 + 1; // resulting magic number
-  magu.s = p - 64;  // resulting shift
-  return magu;
-}
-
 /// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
 /// return a DAG expression to select that will generate the same value by
 /// multiplying by a magic number.  See:
@@ -2456,15 +2420,19 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
                                   std::vector<SDNode*>* Created) const {
   MVT VT = N->getValueType(0);
-  
+
   // Check to see if we can do this.
-  if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64))
-    return SDValue();       // BuildUDIV only operates on i32 or i64
-  
-  uint64_t d = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
-  mu magics = (VT == MVT::i32) ? magicu32(d) : magicu64(d);
-  
+  // FIXME: We should be more aggressive here.
+  if (!isTypeLegal(VT))
+    return SDValue();
+
+  // FIXME: We should use a narrower constant when the upper
+  // bits are known to be zero.
+  ConstantSDNode *N1C = cast<ConstantSDNode>(N->getOperand(1));
+  mu magics = magicu(N1C->getAPIntValue());
+
   // Multiply the numerator (operand 0) by the magic value
+  // FIXME: We should support doing a MUL in a wider type
   SDValue Q;
   if (isOperationLegal(ISD::MULHU, VT))
     Q = DAG.getNode(ISD::MULHU, VT, N->getOperand(0),
@@ -2479,6 +2447,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
     Created->push_back(Q.getNode());
 
   if (magics.a == 0) {
+    assert(magics.s < N1C->getAPIntValue().getBitWidth() &&
+           "We shouldn't generate an undefined shift!");
     return DAG.getNode(ISD::SRL, VT, Q, 
                        DAG.getConstant(magics.s, getShiftAmountTy()));
   } else {
diff --git a/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll b/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
new file mode 100644
index 00000000000..fe1870e1d84
--- /dev/null
+++ b/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll
@@ -0,0 +1,10 @@
+; RUN:  llvm-as < %s | llc -mtriple=i686-pc-linux-gnu | grep 63551 | count 1
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define zeroext i16 @a(i16 zeroext %x) nounwind {
+entry:
+	%div = udiv i16 %x, 33		; <i32> [#uses=1]
+	ret i16 %div
+}
diff --git a/test/CodeGen/X86/urem-i8-constant.ll b/test/CodeGen/X86/urem-i8-constant.ll
index 8a433334f62..bc93684877b 100644
--- a/test/CodeGen/X86/urem-i8-constant.ll
+++ b/test/CodeGen/X86/urem-i8-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | not grep mul
+; RUN: llvm-as < %s | llc -march=x86 | grep 111
 
 define i8 @foo(i8 %tmp325) {
 	%t546 = urem i8 %tmp325, 37
-- 
2.34.1