Define AVX 128-bit pattern versions of SET0PS/PD.

[oota-llvm.git] / lib / Support / APInt.cpp
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp

index 9d1468493d5b4c577de1025146b466466225630d..262fa42ab2ced3988c44a5043aba7a268bd82c55 100644 (file)
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -102,7 +102,7 @@ APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
    clearUnusedBits();
  }
  
-APInt::APInt(unsigned numbits, const StringRef& Str, uint8_t radix)
+APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
    : BitWidth(numbits), VAL(0) {
    assert(BitWidth && "Bitwidth too small");
    fromString(numbits, Str, radix);
@@ -273,7 +273,7 @@ APInt& APInt::operator-=(const APInt& RHS) {
    return clearUnusedBits();
  }
  
-/// Multiplies an integer array, x by a a uint64_t integer and places the result
+/// Multiplies an integer array, x, by a uint64_t integer and places the result
  /// into dest.
  /// @returns the carry out of the multiplication.
  /// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
@@ -613,7 +613,7 @@ APInt& APInt::flip(unsigned bitPosition) {
    return *this;
  }
  
-unsigned APInt::getBitsNeeded(const StringRef& str, uint8_t radix) {
+unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
    assert(!str.empty() && "Invalid string length");
    assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
           "Radix should be 2, 8, 10, or 16!");
@@ -702,15 +702,14 @@ static inline uint32_t hashword(const uint64_t *k64, size_t length)
    a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
  
    /*------------------------------------------------- handle most of the key */
-  while (length > 3)
-    {
-      a += k[0];
-      b += k[1];
-      c += k[2];
-      mix(a,b,c);
-      length -= 3;
-      k += 3;
-    }
+  while (length > 3) {
+    a += k[0];
+    b += k[1];
+    c += k[2];
+    mix(a,b,c);
+    length -= 3;
+    k += 3;
+  }
  
    /*------------------------------------------- handle the last 3 uint32_t's */
    switch (length) {                  /* all the case statements fall through */
@@ -767,8 +766,23 @@ bool APInt::isPowerOf2() const {
  }
  
  unsigned APInt::countLeadingZerosSlowCase() const {
-  unsigned Count = 0;
-  for (unsigned i = getNumWords(); i > 0u; --i) {
+  // Treat the most significand word differently because it might have
+  // meaningless bits set beyond the precision.
+  unsigned BitsInMSW = BitWidth % APINT_BITS_PER_WORD;
+  integerPart MSWMask;
+  if (BitsInMSW) MSWMask = (integerPart(1) << BitsInMSW) - 1;
+  else {
+    MSWMask = ~integerPart(0);
+    BitsInMSW = APINT_BITS_PER_WORD;
+  }
+
+  unsigned i = getNumWords();
+  integerPart MSW = pVal[i-1] & MSWMask;
+  if (MSW)
+    return CountLeadingZeros_64(MSW) - (APINT_BITS_PER_WORD - BitsInMSW);
+
+  unsigned Count = BitsInMSW;
+  for (--i; i > 0u; --i) {
      if (pVal[i-1] == 0)
        Count += APINT_BITS_PER_WORD;
      else {
@@ -776,10 +790,7 @@ unsigned APInt::countLeadingZerosSlowCase() const {
        break;
      }
    }
-  unsigned remainder = BitWidth % APINT_BITS_PER_WORD;
-  if (remainder)
-    Count -= APINT_BITS_PER_WORD - remainder;
-  return std::min(Count, BitWidth);
+  return Count;
  }
  
  static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
@@ -1371,13 +1382,12 @@ APInt APInt::sqrt() const {
    // libc sqrt function which will probably use a hardware sqrt computation.
    // This should be faster than the algorithm below.
    if (magnitude < 52) {
-#ifdef _MSC_VER
-    // Amazingly, VC++ doesn't have round().
+#if HAVE_ROUND
      return APInt(BitWidth,
-                 uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
+                 uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
  #else
      return APInt(BitWidth,
-                 uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
+                 uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
  #endif
    }
  
@@ -1754,7 +1764,7 @@ void APInt::divide(const APInt LHS, unsigned lhsWords,
  
    // First, compose the values into an array of 32-bit words instead of
    // 64-bit words. This is a necessity of both the "short division" algorithm
-  // and the the Knuth "classical algorithm" which requires there to be native
+  // and the Knuth "classical algorithm" which requires there to be native
    // operations for +, -, and * on an m bit value with an m*2 bit result. We
    // can't use 64-bit operands here because we don't have native results of
    // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
@@ -2036,7 +2046,7 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS,
    divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
  }
  
-void APInt::fromString(unsigned numbits, const StringRef& str, uint8_t radix) {
+void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
    // Check our assumptions here
    assert(!str.empty() && "Invalid string length");
    assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
@@ -2053,8 +2063,8 @@ void APInt::fromString(unsigned numbits, const StringRef& str, uint8_t radix) {
    assert((slen <= numbits || radix != 2) && "Insufficient bit width");
    assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
    assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
-  assert((((slen-1)*64)/22 <= numbits || radix != 10)
-         && "Insufficient bit width");
+  assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
+         "Insufficient bit width");
  
    // Allocate memory
    if (!isSingleWord())
@@ -2217,7 +2227,7 @@ namespace {
    static inline integerPart
    lowBitMask(unsigned int bits)
    {
-    assert (bits != 0 && bits <= integerPartWidth);
+    assert(bits != 0 && bits <= integerPartWidth);
  
      return ~(integerPart) 0 >> (integerPartWidth - bits);
    }
@@ -2294,10 +2304,10 @@ APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
  {
    unsigned int i;
  
-  assert (parts > 0);
+  assert(parts > 0);
  
    dst[0] = part;
-  for(i = 1; i < parts; i++)
+  for (i = 1; i < parts; i++)
      dst[i] = 0;
  }
  
@@ -2307,7 +2317,7 @@ APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
  {
    unsigned int i;
  
-  for(i = 0; i < parts; i++)
+  for (i = 0; i < parts; i++)
      dst[i] = src[i];
  }
  
@@ -2317,7 +2327,7 @@ APInt::tcIsZero(const integerPart *src, unsigned int parts)
  {
    unsigned int i;
  
-  for(i = 0; i < parts; i++)
+  for (i = 0; i < parts; i++)
      if (src[i])
        return false;
  
@@ -2328,17 +2338,25 @@ APInt::tcIsZero(const integerPart *src, unsigned int parts)
  int
  APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
  {
-  return(parts[bit / integerPartWidth]
-         & ((integerPart) 1 << bit % integerPartWidth)) != 0;
+  return (parts[bit / integerPartWidth] &
+          ((integerPart) 1 << bit % integerPartWidth)) != 0;
  }
  
-/* Set the given bit of a bignum.  */
+/* Set the given bit of a bignum. */
  void
  APInt::tcSetBit(integerPart *parts, unsigned int bit)
  {
    parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
  }
  
+/* Clears the given bit of a bignum. */
+void
+APInt::tcClearBit(integerPart *parts, unsigned int bit)
+{
+  parts[bit / integerPartWidth] &=
+    ~((integerPart) 1 << (bit % integerPartWidth));
+}
+
  /* Returns the bit number of the least significant set bit of a
     number.  If the input number has no bits set -1U is returned.  */
  unsigned int
@@ -2346,7 +2364,7 @@ APInt::tcLSB(const integerPart *parts, unsigned int n)
  {
    unsigned int i, lsb;
  
-  for(i = 0; i < n; i++) {
+  for (i = 0; i < n; i++) {
        if (parts[i] != 0) {
            lsb = partLSB(parts[i]);
  
@@ -2365,13 +2383,13 @@ APInt::tcMSB(const integerPart *parts, unsigned int n)
    unsigned int msb;
  
    do {
-      --n;
+    --n;
  
-      if (parts[n] != 0) {
-          msb = partMSB(parts[n]);
+    if (parts[n] != 0) {
+      msb = partMSB(parts[n]);
  
-          return msb + n * integerPartWidth;
-      }
+      return msb + n * integerPartWidth;
+    }
    } while (n);
  
    return -1U;
@@ -2388,7 +2406,7 @@ APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
    unsigned int firstSrcPart, dstParts, shift, n;
  
    dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
-  assert (dstParts <= dstCount);
+  assert(dstParts <= dstCount);
  
    firstSrcPart = srcLSB / integerPartWidth;
    tcAssign (dst, src + firstSrcPart, dstParts);
@@ -2423,7 +2441,7 @@ APInt::tcAdd(integerPart *dst, const integerPart *rhs,
  
    assert(c <= 1);
  
-  for(i = 0; i < parts; i++) {
+  for (i = 0; i < parts; i++) {
      integerPart l;
  
      l = dst[i];
@@ -2448,7 +2466,7 @@ APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
  
    assert(c <= 1);
  
-  for(i = 0; i < parts; i++) {
+  for (i = 0; i < parts; i++) {
      integerPart l;
  
      l = dst[i];
@@ -2498,7 +2516,7 @@ APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
    /* N loops; minimum of dstParts and srcParts.  */
    n = dstParts < srcParts ? dstParts: srcParts;
  
-  for(i = 0; i < n; i++) {
+  for (i = 0; i < n; i++) {
      integerPart low, mid, high, srcPart;
  
        /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
@@ -2563,7 +2581,7 @@ APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
         non-zero.  This is true if any remaining src parts are non-zero
         and the multiplier is non-zero.  */
      if (multiplier)
-      for(; i < srcParts; i++)
+      for (; i < srcParts; i++)
          if (src[i])
            return 1;
  
@@ -2588,7 +2606,7 @@ APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
    overflow = 0;
    tcSet(dst, 0, parts);
  
-  for(i = 0; i < parts; i++)
+  for (i = 0; i < parts; i++)
      overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
                                 parts - i, true);
  
@@ -2614,7 +2632,7 @@ APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
  
      tcSet(dst, 0, rhsParts);
  
-    for(n = 0; n < lhsParts; n++)
+    for (n = 0; n < lhsParts; n++)
        tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
  
      n = lhsParts + rhsParts;
@@ -2658,7 +2676,7 @@ APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
  
    /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
       the total.  */
-  for(;;) {
+  for (;;) {
        int compare;
  
        compare = tcCompare(remainder, srhs, parts);
@@ -2726,7 +2744,7 @@ APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
  
      /* Perform the shift.  This leaves the most significant COUNT bits
         of the result at zero.  */
-    for(i = 0; i < parts; i++) {
+    for (i = 0; i < parts; i++) {
        integerPart part;
  
        if (i + jump >= parts) {
@@ -2751,7 +2769,7 @@ APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
  {
    unsigned int i;
  
-  for(i = 0; i < parts; i++)
+  for (i = 0; i < parts; i++)
      dst[i] &= rhs[i];
  }
  
@@ -2761,7 +2779,7 @@ APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
  {
    unsigned int i;
  
-  for(i = 0; i < parts; i++)
+  for (i = 0; i < parts; i++)
      dst[i] |= rhs[i];
  }
  
@@ -2771,7 +2789,7 @@ APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
  {
    unsigned int i;
  
-  for(i = 0; i < parts; i++)
+  for (i = 0; i < parts; i++)
      dst[i] ^= rhs[i];
  }
  
@@ -2781,7 +2799,7 @@ APInt::tcComplement(integerPart *dst, unsigned int parts)
  {
    unsigned int i;
  
-  for(i = 0; i < parts; i++)
+  for (i = 0; i < parts; i++)
      dst[i] = ~dst[i];
  }
  
@@ -2810,7 +2828,7 @@ APInt::tcIncrement(integerPart *dst, unsigned int parts)
  {
    unsigned int i;
  
-  for(i = 0; i < parts; i++)
+  for (i = 0; i < parts; i++)
      if (++dst[i] != 0)
        break;