From 201c9776bd4197569b71fef0519f98a28d1db989 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Sun, 30 Nov 2008 06:02:26 +0000 Subject: [PATCH] Fix for PR2164: allow transforming arbitrary-width unsigned divides into multiplies. Some more cleverness would be nice, though. It would be nice if we could do this transformation on illegal types. Also, we would prefer a narrower constant when possible so that we can use a narrower multiply, which can be cheaper. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@60283 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 160 +++++++----------- .../X86/2008-11-29-DivideConstant16bit.ll | 10 ++ test/CodeGen/X86/urem-i8-constant.ll | 2 +- 3 files changed, 76 insertions(+), 96 deletions(-) create mode 100644 test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 4d8b9be44fb..ed29f134637 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2221,19 +2221,64 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, return true; } -// Magic for divide replacement +struct mu { + APInt m; // magic number + bool a; // add indicator + uint32_t s; // shift amount +}; + +/// magicu - calculate the magic numbers required to codegen an integer udiv as +/// a sequence of multiply, add and shifts. Requires that the divisor not be 0. +static mu magicu(const APInt& d) { + unsigned p; + APInt nc, delta, q1, r1, q2, r2; + struct mu magu; + magu.a = 0; // initialize "add" indicator + APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()); + APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); + APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth()); + + nc = allOnes - (-d).urem(d); + p = d.getBitWidth() - 1; // initialize p + q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc + r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc) + q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d + r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d) + do { + p = p + 1; + if (r1.uge(nc - r1)) { + q1 = q1 + q1 + 1; // update q1 + r1 = r1 + r1 - nc; // update r1 + } + else { + q1 = q1+q1; // update q1 + r1 = r1+r1; // update r1 + } + if ((r2 + 1).uge(d - r2)) { + if (q2.uge(signedMax)) magu.a = 1; + q2 = q2+q2 + 1; // update q2 + r2 = r2+r2 + 1 - d; // update r2 + } + else { + if (q2.uge(signedMin)) magu.a = 1; + q2 = q2+q2; // update q2 + r2 = r2+r2 + 1; // update r2 + } + delta = d - 1 - r2; + } while (p < d.getBitWidth()*2 && + (q1.ult(delta) || (q1 == delta && r1 == 0))); + magu.m = q2 + 1; // resulting magic number + magu.s = p - d.getBitWidth(); // resulting shift + return magu; +} +// Magic for divide replacement +// FIXME: This should be APInt'ified struct ms { int64_t m; // magic number int64_t s; // shift amount }; -struct mu { - uint64_t m; // magic number - int64_t a; // add indicator - int64_t s; // shift amount -}; - /// magic - calculate the magic numbers required to codegen an integer sdiv as /// a sequence of multiply and shifts. Requires that the divisor not be 0, 1, /// or -1. @@ -2274,46 +2319,6 @@ static ms magic32(int32_t d) { return mag; } -/// magicu - calculate the magic numbers required to codegen an integer udiv as -/// a sequence of multiply, add and shifts. Requires that the divisor not be 0. -static mu magicu32(uint32_t d) { - int32_t p; - uint32_t nc, delta, q1, r1, q2, r2; - struct mu magu; - magu.a = 0; // initialize "add" indicator - nc = - 1 - (-d)%d; - p = 31; // initialize p - q1 = 0x80000000/nc; // initialize q1 = 2p/nc - r1 = 0x80000000 - q1*nc; // initialize r1 = rem(2p,nc) - q2 = 0x7FFFFFFF/d; // initialize q2 = (2p-1)/d - r2 = 0x7FFFFFFF - q2*d; // initialize r2 = rem((2p-1),d) - do { - p = p + 1; - if (r1 >= nc - r1 ) { - q1 = 2*q1 + 1; // update q1 - r1 = 2*r1 - nc; // update r1 - } - else { - q1 = 2*q1; // update q1 - r1 = 2*r1; // update r1 - } - if (r2 + 1 >= d - r2) { - if (q2 >= 0x7FFFFFFF) magu.a = 1; - q2 = 2*q2 + 1; // update q2 - r2 = 2*r2 + 1 - d; // update r2 - } - else { - if (q2 >= 0x80000000) magu.a = 1; - q2 = 2*q2; // update q2 - r2 = 2*r2 + 1; // update r2 - } - delta = d - 1 - r2; - } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0))); - magu.m = q2 + 1; // resulting magic number - magu.s = p - 32; // resulting shift - return magu; -} - /// magic - calculate the magic numbers required to codegen an integer sdiv as /// a sequence of multiply and shifts. Requires that the divisor not be 0, 1, /// or -1. @@ -2354,47 +2359,6 @@ static ms magic64(int64_t d) { return mag; } -/// magicu - calculate the magic numbers required to codegen an integer udiv as -/// a sequence of multiply, add and shifts. Requires that the divisor not be 0. -static mu magicu64(uint64_t d) -{ - int64_t p; - uint64_t nc, delta, q1, r1, q2, r2; - struct mu magu; - magu.a = 0; // initialize "add" indicator - nc = - 1 - (-d)%d; - p = 63; // initialize p - q1 = 0x8000000000000000ull/nc; // initialize q1 = 2p/nc - r1 = 0x8000000000000000ull - q1*nc; // initialize r1 = rem(2p,nc) - q2 = 0x7FFFFFFFFFFFFFFFull/d; // initialize q2 = (2p-1)/d - r2 = 0x7FFFFFFFFFFFFFFFull - q2*d; // initialize r2 = rem((2p-1),d) - do { - p = p + 1; - if (r1 >= nc - r1 ) { - q1 = 2*q1 + 1; // update q1 - r1 = 2*r1 - nc; // update r1 - } - else { - q1 = 2*q1; // update q1 - r1 = 2*r1; // update r1 - } - if (r2 + 1 >= d - r2) { - if (q2 >= 0x7FFFFFFFFFFFFFFFull) magu.a = 1; - q2 = 2*q2 + 1; // update q2 - r2 = 2*r2 + 1 - d; // update r2 - } - else { - if (q2 >= 0x8000000000000000ull) magu.a = 1; - q2 = 2*q2; // update q2 - r2 = 2*r2 + 1; // update r2 - } - delta = d - 1 - r2; - } while (p < 128 && (q1 < delta || (q1 == delta && r1 == 0))); - magu.m = q2 + 1; // resulting magic number - magu.s = p - 64; // resulting shift - return magu; -} - /// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: @@ -2456,15 +2420,19 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, std::vector* Created) const { MVT VT = N->getValueType(0); - + // Check to see if we can do this. - if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64)) - return SDValue(); // BuildUDIV only operates on i32 or i64 - - uint64_t d = cast(N->getOperand(1))->getZExtValue(); - mu magics = (VT == MVT::i32) ? magicu32(d) : magicu64(d); - + // FIXME: We should be more aggressive here. + if (!isTypeLegal(VT)) + return SDValue(); + + // FIXME: We should use a narrower constant when the upper + // bits are known to be zero. + ConstantSDNode *N1C = cast(N->getOperand(1)); + mu magics = magicu(N1C->getAPIntValue()); + // Multiply the numerator (operand 0) by the magic value + // FIXME: We should support doing a MUL in a wider type SDValue Q; if (isOperationLegal(ISD::MULHU, VT)) Q = DAG.getNode(ISD::MULHU, VT, N->getOperand(0), @@ -2479,6 +2447,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, Created->push_back(Q.getNode()); if (magics.a == 0) { + assert(magics.s < N1C->getAPIntValue().getBitWidth() && + "We shouldn't generate an undefined shift!"); return DAG.getNode(ISD::SRL, VT, Q, DAG.getConstant(magics.s, getShiftAmountTy())); } else { diff --git a/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll b/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll new file mode 100644 index 00000000000..fe1870e1d84 --- /dev/null +++ b/test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll @@ -0,0 +1,10 @@ +; RUN: llvm-as < %s | llc -mtriple=i686-pc-linux-gnu | grep 63551 | count 1 +; ModuleID = '' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" +target triple = "i686-pc-linux-gnu" + +define zeroext i16 @a(i16 zeroext %x) nounwind { +entry: + %div = udiv i16 %x, 33 ; [#uses=1] + ret i16 %div +} diff --git a/test/CodeGen/X86/urem-i8-constant.ll b/test/CodeGen/X86/urem-i8-constant.ll index 8a433334f62..bc93684877b 100644 --- a/test/CodeGen/X86/urem-i8-constant.ll +++ b/test/CodeGen/X86/urem-i8-constant.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=x86 | not grep mul +; RUN: llvm-as < %s | llc -march=x86 | grep 111 define i8 @foo(i8 %tmp325) { %t546 = urem i8 %tmp325, 37 -- 2.34.1