X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FSupport%2FAPFloat.cpp;h=8713ede6a17c4d12190b35f8c7579c6d4106b5ac;hb=55804a089e7ac26d5a07a9ac38e5dcedad3f2754;hp=128fee4f01fe8fa2ba3d463684a6e62b15619c26;hpb=88f2d6c167a575e5d9c62ecb211ff4d825b09ed3;p=oota-llvm.git diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 128fee4f01f..8713ede6a17 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -13,13 +13,25 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APSInt.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include +#include using namespace llvm; -#define convolve(lhs, rhs) ((lhs) * 4 + (rhs)) +/// A macro used to combine two fcCategory enums into one key which can be used +/// in a switch statement to classify how the interaction of two APFloat's +/// categories affects an operation. +/// +/// TODO: If clang source code is ever allowed to use constexpr in its own +/// codebase, change this into a static inline function. +#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) /* Assumed in hexadecimal significand parsing, and conversion to hexadecimal strings. */ @@ -32,36 +44,42 @@ namespace llvm { struct fltSemantics { /* The largest E such that 2^E is representable; this matches the definition of IEEE 754. */ - exponent_t maxExponent; + APFloat::ExponentType maxExponent; /* The smallest E such that 2^E is a normalized number; this matches the definition of IEEE 754. */ - exponent_t minExponent; + APFloat::ExponentType minExponent; /* Number of bits in the significand. This includes the integer bit. */ unsigned int precision; - - /* True if arithmetic is supported. */ - unsigned int arithmeticOK; }; - const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true }; - const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true }; - const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true }; - const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true }; - const fltSemantics APFloat::Bogus = { 0, 0, 0, true }; - - // The PowerPC format consists of two doubles. It does not map cleanly - // onto the usual format above. For now only storage of constants of - // this type is supported, no arithmetic. - const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false }; + const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 }; + const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 }; + const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 }; + const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 }; + const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 }; + const fltSemantics APFloat::Bogus = { 0, 0, 0 }; + + /* The PowerPC format consists of two doubles. It does not map cleanly + onto the usual format above. It is approximated using twice the + mantissa bits. Note that for exponents near the double minimum, + we no longer can represent the full 106 mantissa bits, so those + will be treated as denormal numbers. + + FIXME: While this approximation is equivalent to what GCC uses for + compile-time arithmetic on PPC double-double numbers, it is not able + to represent all possible values held by a PPC double-double number, + for example: (long double) 1.0 + (long double) 0x1p-106 + Should this be replaced by a full emulation of PPC double-double? */ + const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 }; /* A tight upper bound on number of parts required to hold the value pow(5, power) is power * 815 / (351 * integerPartWidth) + 1 - + However, whilst the result may require only this many parts, because we are multiplying two values to get it, the multiplication may require an extra part with the excess part @@ -75,489 +93,482 @@ namespace llvm { / (351 * integerPartWidth)); } -/* Put a bunch of private, handy routines in an anonymous namespace. */ -namespace { - - static inline unsigned int - partCountForBits(unsigned int bits) - { - return ((bits) + integerPartWidth - 1) / integerPartWidth; - } - - /* Returns 0U-9U. Return values >= 10U are not digits. */ - static inline unsigned int - decDigitValue(unsigned int c) - { - return c - '0'; - } - - static unsigned int - hexDigitValue(unsigned int c) - { - unsigned int r; +/* A bunch of private, handy routines. */ - r = c - '0'; - if(r <= 9) - return r; +static inline unsigned int +partCountForBits(unsigned int bits) +{ + return ((bits) + integerPartWidth - 1) / integerPartWidth; +} - r = c - 'A'; - if(r <= 5) - return r + 10; +/* Returns 0U-9U. Return values >= 10U are not digits. */ +static inline unsigned int +decDigitValue(unsigned int c) +{ + return c - '0'; +} - r = c - 'a'; - if(r <= 5) - return r + 10; +/* Return the value of a decimal exponent of the form + [+-]ddddddd. - return -1U; - } + If the exponent overflows, returns a large exponent with the + appropriate sign. */ +static int +readExponent(StringRef::iterator begin, StringRef::iterator end) +{ + bool isNegative; + unsigned int absExponent; + const unsigned int overlargeExponent = 24000; /* FIXME. */ + StringRef::iterator p = begin; - static inline void - assertArithmeticOK(const llvm::fltSemantics &semantics) { - assert(semantics.arithmeticOK - && "Compile-time arithmetic does not support these semantics"); - } + assert(p != end && "Exponent has no digits"); - static inline bool - isArithmeticOk(const llvm::fltSemantics &semantics) { - return semantics.arithmeticOK; + isNegative = (*p == '-'); + if (*p == '-' || *p == '+') { + p++; + assert(p != end && "Exponent has no digits"); } - /* Return the value of a decimal exponent of the form - [+-]ddddddd. - - If the exponent overflows, returns a large exponent with the - appropriate sign. */ - static int - readExponent(const char *p) - { - bool isNegative; - unsigned int absExponent; - const unsigned int overlargeExponent = 24000; /* FIXME. */ + absExponent = decDigitValue(*p++); + assert(absExponent < 10U && "Invalid character in exponent"); - isNegative = (*p == '-'); - if (*p == '-' || *p == '+') - p++; + for (; p != end; ++p) { + unsigned int value; - absExponent = decDigitValue(*p++); - assert (absExponent < 10U); + value = decDigitValue(*p); + assert(value < 10U && "Invalid character in exponent"); - for (;;) { - unsigned int value; + value += absExponent * 10; + if (absExponent >= overlargeExponent) { + absExponent = overlargeExponent; + p = end; /* outwit assert below */ + break; + } + absExponent = value; + } - value = decDigitValue(*p); - if (value >= 10U) - break; + assert(p == end && "Invalid exponent in exponent"); - p++; - value += absExponent * 10; - if (absExponent >= overlargeExponent) { - absExponent = overlargeExponent; - break; - } - absExponent = value; - } + if (isNegative) + return -(int) absExponent; + else + return (int) absExponent; +} - if (isNegative) - return -(int) absExponent; - else - return (int) absExponent; - } +/* This is ugly and needs cleaning up, but I don't immediately see + how whilst remaining safe. */ +static int +totalExponent(StringRef::iterator p, StringRef::iterator end, + int exponentAdjustment) +{ + int unsignedExponent; + bool negative, overflow; + int exponent = 0; - /* This is ugly and needs cleaning up, but I don't immediately see - how whilst remaining safe. */ - static int - totalExponent(const char *p, int exponentAdjustment) - { - int unsignedExponent; - bool negative, overflow; - int exponent; + assert(p != end && "Exponent has no digits"); - /* Move past the exponent letter and sign to the digits. */ + negative = *p == '-'; + if (*p == '-' || *p == '+') { p++; - negative = *p == '-'; - if(*p == '-' || *p == '+') - p++; - - unsignedExponent = 0; - overflow = false; - for(;;) { - unsigned int value; + assert(p != end && "Exponent has no digits"); + } - value = decDigitValue(*p); - if(value >= 10U) - break; + unsignedExponent = 0; + overflow = false; + for (; p != end; ++p) { + unsigned int value; - p++; - unsignedExponent = unsignedExponent * 10 + value; - if(unsignedExponent > 65535) - overflow = true; - } + value = decDigitValue(*p); + assert(value < 10U && "Invalid character in exponent"); - if(exponentAdjustment > 65535 || exponentAdjustment < -65536) + unsignedExponent = unsignedExponent * 10 + value; + if (unsignedExponent > 32767) { overflow = true; - - if(!overflow) { - exponent = unsignedExponent; - if(negative) - exponent = -exponent; - exponent += exponentAdjustment; - if(exponent > 65535 || exponent < -65536) - overflow = true; + break; } + } - if(overflow) - exponent = negative ? -65536: 65535; + if (exponentAdjustment > 32767 || exponentAdjustment < -32768) + overflow = true; - return exponent; + if (!overflow) { + exponent = unsignedExponent; + if (negative) + exponent = -exponent; + exponent += exponentAdjustment; + if (exponent > 32767 || exponent < -32768) + overflow = true; } - static const char * - skipLeadingZeroesAndAnyDot(const char *p, const char **dot) - { - *dot = 0; - while(*p == '0') - p++; + if (overflow) + exponent = negative ? -32768: 32767; - if(*p == '.') { - *dot = p++; - while(*p == '0') - p++; - } + return exponent; +} - return p; - } +static StringRef::iterator +skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, + StringRef::iterator *dot) +{ + StringRef::iterator p = begin; + *dot = end; + while (*p == '0' && p != end) + p++; - /* Given a normal decimal floating point number of the form + if (*p == '.') { + *dot = p++; - dddd.dddd[eE][+-]ddd + assert(end - begin != 1 && "Significand has no digits"); - where the decimal point and exponent are optional, fill out the - structure D. Exponent is appropriate if the significand is - treated as an integer, and normalizedExponent if the significand - is taken to have the decimal point after a single leading - non-zero digit. + while (*p == '0' && p != end) + p++; + } - If the value is zero, V->firstSigDigit points to a non-digit, and - the return exponent is zero. - */ - struct decimalInfo { - const char *firstSigDigit; - const char *lastSigDigit; - int exponent; - int normalizedExponent; - }; + return p; +} - static void - interpretDecimal(const char *p, decimalInfo *D) - { - const char *dot; +/* Given a normal decimal floating point number of the form - p = skipLeadingZeroesAndAnyDot (p, &dot); + dddd.dddd[eE][+-]ddd - D->firstSigDigit = p; - D->exponent = 0; - D->normalizedExponent = 0; + where the decimal point and exponent are optional, fill out the + structure D. Exponent is appropriate if the significand is + treated as an integer, and normalizedExponent if the significand + is taken to have the decimal point after a single leading + non-zero digit. - for (;;) { - if (*p == '.') { - assert(dot == 0); - dot = p++; - } - if (decDigitValue(*p) >= 10U) + If the value is zero, V->firstSigDigit points to a non-digit, and + the return exponent is zero. +*/ +struct decimalInfo { + const char *firstSigDigit; + const char *lastSigDigit; + int exponent; + int normalizedExponent; +}; + +static void +interpretDecimal(StringRef::iterator begin, StringRef::iterator end, + decimalInfo *D) +{ + StringRef::iterator dot = end; + StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot); + + D->firstSigDigit = p; + D->exponent = 0; + D->normalizedExponent = 0; + + for (; p != end; ++p) { + if (*p == '.') { + assert(dot == end && "String contains multiple dots"); + dot = p++; + if (p == end) break; - p++; } + if (decDigitValue(*p) >= 10U) + break; + } + + if (p != end) { + assert((*p == 'e' || *p == 'E') && "Invalid character in significand"); + assert(p != begin && "Significand has no digits"); + assert((dot == end || p - begin != 1) && "Significand has no digits"); - /* If number is all zerooes accept any exponent. */ - if (p != D->firstSigDigit) { - if (*p == 'e' || *p == 'E') - D->exponent = readExponent(p + 1); + /* p points to the first non-digit in the string */ + D->exponent = readExponent(p + 1, end); - /* Implied decimal point? */ - if (!dot) - dot = p; + /* Implied decimal point? */ + if (dot == end) + dot = p; + } - /* Drop insignificant trailing zeroes. */ + /* If number is all zeroes accept any exponent. */ + if (p != D->firstSigDigit) { + /* Drop insignificant trailing zeroes. */ + if (p != begin) { do do p--; - while (*p == '0'); - while (*p == '.'); - - /* Adjust the exponents for any decimal point. */ - D->exponent += static_cast((dot - p) - (dot > p)); - D->normalizedExponent = (D->exponent + - static_cast((p - D->firstSigDigit) - - (dot > D->firstSigDigit && dot < p))); + while (p != begin && *p == '0'); + while (p != begin && *p == '.'); } - D->lastSigDigit = p; + /* Adjust the exponents for any decimal point. */ + D->exponent += static_cast((dot - p) - (dot > p)); + D->normalizedExponent = (D->exponent + + static_cast((p - D->firstSigDigit) + - (dot > D->firstSigDigit && dot < p))); } - /* Return the trailing fraction of a hexadecimal number. - DIGITVALUE is the first hex digit of the fraction, P points to - the next digit. */ - static lostFraction - trailingHexadecimalFraction(const char *p, unsigned int digitValue) - { - unsigned int hexDigit; + D->lastSigDigit = p; +} + +/* Return the trailing fraction of a hexadecimal number. + DIGITVALUE is the first hex digit of the fraction, P points to + the next digit. */ +static lostFraction +trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, + unsigned int digitValue) +{ + unsigned int hexDigit; - /* If the first trailing digit isn't 0 or 8 we can work out the - fraction immediately. */ - if(digitValue > 8) - return lfMoreThanHalf; - else if(digitValue < 8 && digitValue > 0) - return lfLessThanHalf; + /* If the first trailing digit isn't 0 or 8 we can work out the + fraction immediately. */ + if (digitValue > 8) + return lfMoreThanHalf; + else if (digitValue < 8 && digitValue > 0) + return lfLessThanHalf; - /* Otherwise we need to find the first non-zero digit. */ - while(*p == '0') - p++; + /* Otherwise we need to find the first non-zero digit. */ + while (*p == '0') + p++; - hexDigit = hexDigitValue(*p); + assert(p != end && "Invalid trailing hexadecimal fraction!"); - /* If we ran off the end it is exactly zero or one-half, otherwise - a little more. */ - if(hexDigit == -1U) - return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; - else - return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; - } + hexDigit = hexDigitValue(*p); - /* Return the fraction lost were a bignum truncated losing the least - significant BITS bits. */ - static lostFraction - lostFractionThroughTruncation(const integerPart *parts, - unsigned int partCount, - unsigned int bits) - { - unsigned int lsb; + /* If we ran off the end it is exactly zero or one-half, otherwise + a little more. */ + if (hexDigit == -1U) + return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; + else + return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; +} - lsb = APInt::tcLSB(parts, partCount); +/* Return the fraction lost were a bignum truncated losing the least + significant BITS bits. */ +static lostFraction +lostFractionThroughTruncation(const integerPart *parts, + unsigned int partCount, + unsigned int bits) +{ + unsigned int lsb; - /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ - if(bits <= lsb) - return lfExactlyZero; - if(bits == lsb + 1) - return lfExactlyHalf; - if(bits <= partCount * integerPartWidth - && APInt::tcExtractBit(parts, bits - 1)) - return lfMoreThanHalf; + lsb = APInt::tcLSB(parts, partCount); - return lfLessThanHalf; - } + /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ + if (bits <= lsb) + return lfExactlyZero; + if (bits == lsb + 1) + return lfExactlyHalf; + if (bits <= partCount * integerPartWidth && + APInt::tcExtractBit(parts, bits - 1)) + return lfMoreThanHalf; - /* Shift DST right BITS bits noting lost fraction. */ - static lostFraction - shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) - { - lostFraction lost_fraction; + return lfLessThanHalf; +} - lost_fraction = lostFractionThroughTruncation(dst, parts, bits); +/* Shift DST right BITS bits noting lost fraction. */ +static lostFraction +shiftRight(integerPart *dst, unsigned int parts, unsigned int bits) +{ + lostFraction lost_fraction; - APInt::tcShiftRight(dst, parts, bits); + lost_fraction = lostFractionThroughTruncation(dst, parts, bits); - return lost_fraction; - } + APInt::tcShiftRight(dst, parts, bits); - /* Combine the effect of two lost fractions. */ - static lostFraction - combineLostFractions(lostFraction moreSignificant, - lostFraction lessSignificant) - { - if(lessSignificant != lfExactlyZero) { - if(moreSignificant == lfExactlyZero) - moreSignificant = lfLessThanHalf; - else if(moreSignificant == lfExactlyHalf) - moreSignificant = lfMoreThanHalf; - } + return lost_fraction; +} - return moreSignificant; +/* Combine the effect of two lost fractions. */ +static lostFraction +combineLostFractions(lostFraction moreSignificant, + lostFraction lessSignificant) +{ + if (lessSignificant != lfExactlyZero) { + if (moreSignificant == lfExactlyZero) + moreSignificant = lfLessThanHalf; + else if (moreSignificant == lfExactlyHalf) + moreSignificant = lfMoreThanHalf; } - /* The error from the true value, in half-ulps, on multiplying two - floating point numbers, which differ from the value they - approximate by at most HUE1 and HUE2 half-ulps, is strictly less - than the returned value. + return moreSignificant; +} - See "How to Read Floating Point Numbers Accurately" by William D - Clinger. */ - static unsigned int - HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) - { - assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); +/* The error from the true value, in half-ulps, on multiplying two + floating point numbers, which differ from the value they + approximate by at most HUE1 and HUE2 half-ulps, is strictly less + than the returned value. - if (HUerr1 + HUerr2 == 0) - return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ - else - return inexactMultiply + 2 * (HUerr1 + HUerr2); - } + See "How to Read Floating Point Numbers Accurately" by William D + Clinger. */ +static unsigned int +HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) +{ + assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); - /* The number of ulps from the boundary (zero, or half if ISNEAREST) - when the least significant BITS are truncated. BITS cannot be - zero. */ - static integerPart - ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) - { - unsigned int count, partBits; - integerPart part, boundary; + if (HUerr1 + HUerr2 == 0) + return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ + else + return inexactMultiply + 2 * (HUerr1 + HUerr2); +} - assert (bits != 0); +/* The number of ulps from the boundary (zero, or half if ISNEAREST) + when the least significant BITS are truncated. BITS cannot be + zero. */ +static integerPart +ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) +{ + unsigned int count, partBits; + integerPart part, boundary; - bits--; - count = bits / integerPartWidth; - partBits = bits % integerPartWidth + 1; + assert(bits != 0); - part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits)); + bits--; + count = bits / integerPartWidth; + partBits = bits % integerPartWidth + 1; - if (isNearest) - boundary = (integerPart) 1 << (partBits - 1); - else - boundary = 0; + part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits)); - if (count == 0) { - if (part - boundary <= boundary - part) - return part - boundary; - else - return boundary - part; - } + if (isNearest) + boundary = (integerPart) 1 << (partBits - 1); + else + boundary = 0; - if (part == boundary) { - while (--count) - if (parts[count]) - return ~(integerPart) 0; /* A lot. */ + if (count == 0) { + if (part - boundary <= boundary - part) + return part - boundary; + else + return boundary - part; + } - return parts[0]; - } else if (part == boundary - 1) { - while (--count) - if (~parts[count]) - return ~(integerPart) 0; /* A lot. */ + if (part == boundary) { + while (--count) + if (parts[count]) + return ~(integerPart) 0; /* A lot. */ - return -parts[0]; - } + return parts[0]; + } else if (part == boundary - 1) { + while (--count) + if (~parts[count]) + return ~(integerPart) 0; /* A lot. */ - return ~(integerPart) 0; /* A lot. */ + return -parts[0]; } - /* Place pow(5, power) in DST, and return the number of parts used. - DST must be at least one part larger than size of the answer. */ - static unsigned int - powerOf5(integerPart *dst, unsigned int power) - { - static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, - 15625, 78125 }; - static integerPart pow5s[maxPowerOfFiveParts * 2 + 5] = { 78125 * 5 }; - static unsigned int partsCount[16] = { 1 }; - - integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; - unsigned int result; + return ~(integerPart) 0; /* A lot. */ +} - assert(power <= maxExponent); +/* Place pow(5, power) in DST, and return the number of parts used. + DST must be at least one part larger than size of the answer. */ +static unsigned int +powerOf5(integerPart *dst, unsigned int power) +{ + static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, + 15625, 78125 }; + integerPart pow5s[maxPowerOfFiveParts * 2 + 5]; + pow5s[0] = 78125 * 5; - p1 = dst; - p2 = scratch; + unsigned int partsCount[16] = { 1 }; + integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; + unsigned int result; + assert(power <= maxExponent); - *p1 = firstEightPowers[power & 7]; - power >>= 3; + p1 = dst; + p2 = scratch; - result = 1; - pow5 = pow5s; + *p1 = firstEightPowers[power & 7]; + power >>= 3; - for (unsigned int n = 0; power; power >>= 1, n++) { - unsigned int pc; + result = 1; + pow5 = pow5s; - pc = partsCount[n]; + for (unsigned int n = 0; power; power >>= 1, n++) { + unsigned int pc; - /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ - if (pc == 0) { - pc = partsCount[n - 1]; - APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); - pc *= 2; - if (pow5[pc - 1] == 0) - pc--; - partsCount[n] = pc; - } + pc = partsCount[n]; - if (power & 1) { - integerPart *tmp; + /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ + if (pc == 0) { + pc = partsCount[n - 1]; + APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); + pc *= 2; + if (pow5[pc - 1] == 0) + pc--; + partsCount[n] = pc; + } - APInt::tcFullMultiply(p2, p1, pow5, result, pc); - result += pc; - if (p2[result - 1] == 0) - result--; + if (power & 1) { + integerPart *tmp; - /* Now result is in p1 with partsCount parts and p2 is scratch - space. */ - tmp = p1, p1 = p2, p2 = tmp; - } + APInt::tcFullMultiply(p2, p1, pow5, result, pc); + result += pc; + if (p2[result - 1] == 0) + result--; - pow5 += pc; + /* Now result is in p1 with partsCount parts and p2 is scratch + space. */ + tmp = p1, p1 = p2, p2 = tmp; } - if (p1 != dst) - APInt::tcAssign(dst, p1, result); - - return result; + pow5 += pc; } - /* Zero at the end to avoid modular arithmetic when adding one; used - when rounding up during hexadecimal output. */ - static const char hexDigitsLower[] = "0123456789abcdef0"; - static const char hexDigitsUpper[] = "0123456789ABCDEF0"; - static const char infinityL[] = "infinity"; - static const char infinityU[] = "INFINITY"; - static const char NaNL[] = "nan"; - static const char NaNU[] = "NAN"; + if (p1 != dst) + APInt::tcAssign(dst, p1, result); - /* Write out an integerPart in hexadecimal, starting with the most - significant nibble. Write out exactly COUNT hexdigits, return - COUNT. */ - static unsigned int - partAsHex (char *dst, integerPart part, unsigned int count, - const char *hexDigitChars) - { - unsigned int result = count; + return result; +} - assert (count != 0 && count <= integerPartWidth / 4); +/* Zero at the end to avoid modular arithmetic when adding one; used + when rounding up during hexadecimal output. */ +static const char hexDigitsLower[] = "0123456789abcdef0"; +static const char hexDigitsUpper[] = "0123456789ABCDEF0"; +static const char infinityL[] = "infinity"; +static const char infinityU[] = "INFINITY"; +static const char NaNL[] = "nan"; +static const char NaNU[] = "NAN"; - part >>= (integerPartWidth - 4 * count); - while (count--) { - dst[count] = hexDigitChars[part & 0xf]; - part >>= 4; - } +/* Write out an integerPart in hexadecimal, starting with the most + significant nibble. Write out exactly COUNT hexdigits, return + COUNT. */ +static unsigned int +partAsHex (char *dst, integerPart part, unsigned int count, + const char *hexDigitChars) +{ + unsigned int result = count; + + assert(count != 0 && count <= integerPartWidth / 4); - return result; + part >>= (integerPartWidth - 4 * count); + while (count--) { + dst[count] = hexDigitChars[part & 0xf]; + part >>= 4; } - /* Write out an unsigned decimal integer. */ - static char * - writeUnsignedDecimal (char *dst, unsigned int n) - { - char buff[40], *p; + return result; +} + +/* Write out an unsigned decimal integer. */ +static char * +writeUnsignedDecimal (char *dst, unsigned int n) +{ + char buff[40], *p; - p = buff; - do - *p++ = '0' + n % 10; - while (n /= 10); + p = buff; + do + *p++ = '0' + n % 10; + while (n /= 10); - do - *dst++ = *--p; - while (p != buff); + do + *dst++ = *--p; + while (p != buff); - return dst; - } + return dst; +} - /* Write out a signed decimal integer. */ - static char * - writeSignedDecimal (char *dst, int value) - { - if (value < 0) { - *dst++ = '-'; - dst = writeUnsignedDecimal(dst, -(unsigned) value); - } else - dst = writeUnsignedDecimal(dst, value); +/* Write out a signed decimal integer. */ +static char * +writeSignedDecimal (char *dst, int value) +{ + if (value < 0) { + *dst++ = '-'; + dst = writeUnsignedDecimal(dst, -(unsigned) value); + } else + dst = writeUnsignedDecimal(dst, value); - return dst; - } + return dst; } /* Constructors. */ @@ -568,14 +579,14 @@ APFloat::initialize(const fltSemantics *ourSemantics) semantics = ourSemantics; count = partCount(); - if(count > 1) + if (count > 1) significand.parts = new integerPart[count]; } void APFloat::freeSignificand() { - if(partCount() > 1) + if (needsCleanup()) delete [] significand.parts; } @@ -587,16 +598,14 @@ APFloat::assign(const APFloat &rhs) sign = rhs.sign; category = rhs.category; exponent = rhs.exponent; - sign2 = rhs.sign2; - exponent2 = rhs.exponent2; - if(category == fcNormal || category == fcNaN) + if (isFiniteNonZero() || category == fcNaN) copySignificand(rhs); } void APFloat::copySignificand(const APFloat &rhs) { - assert(category == fcNormal || category == fcNaN); + assert(isFiniteNonZero() || category == fcNaN); assert(rhs.partCount() >= partCount()); APInt::tcAssign(significandParts(), rhs.significandParts(), @@ -604,19 +613,67 @@ APFloat::copySignificand(const APFloat &rhs) } /* Make this number a NaN, with an arbitrary but deterministic value - for the significand. */ -void -APFloat::makeNaN(void) + for the significand. If double or longer, this is a signalling NaN, + which may not be ideal. If float, this is QNaN(0). */ +void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { category = fcNaN; - APInt::tcSet(significandParts(), ~0U, partCount()); + sign = Negative; + + integerPart *significand = significandParts(); + unsigned numParts = partCount(); + + // Set the significand bits to the fill. + if (!fill || fill->getNumWords() < numParts) + APInt::tcSet(significand, 0, numParts); + if (fill) { + APInt::tcAssign(significand, fill->getRawData(), + std::min(fill->getNumWords(), numParts)); + + // Zero out the excess bits of the significand. + unsigned bitsToPreserve = semantics->precision - 1; + unsigned part = bitsToPreserve / 64; + bitsToPreserve %= 64; + significand[part] &= ((1ULL << bitsToPreserve) - 1); + for (part++; part != numParts; ++part) + significand[part] = 0; + } + + unsigned QNaNBit = semantics->precision - 2; + + if (SNaN) { + // We always have to clear the QNaN bit to make it an SNaN. + APInt::tcClearBit(significand, QNaNBit); + + // If there are no bits set in the payload, we have to set + // *something* to make it a NaN instead of an infinity; + // conventionally, this is the next bit down from the QNaN bit. + if (APInt::tcIsZero(significand, numParts)) + APInt::tcSetBit(significand, QNaNBit - 1); + } else { + // We always have to set the QNaN bit to make it a QNaN. + APInt::tcSetBit(significand, QNaNBit); + } + + // For x87 extended precision, we want to make a NaN, not a + // pseudo-NaN. Maybe we should expose the ability to make + // pseudo-NaNs? + if (semantics == &APFloat::x87DoubleExtended) + APInt::tcSetBit(significand, QNaNBit + 1); +} + +APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative, + const APInt *fill) { + APFloat value(Sem, uninitialized); + value.makeNaN(SNaN, Negative, fill); + return value; } APFloat & APFloat::operator=(const APFloat &rhs) { - if(this != &rhs) { - if(semantics != rhs.semantics) { + if (this != &rhs) { + if (semantics != rhs.semantics) { freeSignificand(); initialize(rhs.semantics); } @@ -626,6 +683,74 @@ APFloat::operator=(const APFloat &rhs) return *this; } +bool +APFloat::isDenormal() const { + return isFiniteNonZero() && (exponent == semantics->minExponent) && + (APInt::tcExtractBit(significandParts(), + semantics->precision - 1) == 0); +} + +bool +APFloat::isSmallest() const { + // The smallest number by magnitude in our format will be the smallest + // denormal, i.e. the floating point number with exponent being minimum + // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). + return isFiniteNonZero() && exponent == semantics->minExponent && + significandMSB() == 0; +} + +bool APFloat::isSignificandAllOnes() const { + // Test if the significand excluding the integral bit is all ones. This allows + // us to test for binade boundaries. + const integerPart *Parts = significandParts(); + const unsigned PartCount = partCount(); + for (unsigned i = 0; i < PartCount - 1; i++) + if (~Parts[i]) + return false; + + // Set the unused high bits to all ones when we compare. + const unsigned NumHighBits = + PartCount*integerPartWidth - semantics->precision + 1; + assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " + "fill than integerPartWidth"); + const integerPart HighBitFill = + ~integerPart(0) << (integerPartWidth - NumHighBits); + if (~(Parts[PartCount - 1] | HighBitFill)) + return false; + + return true; +} + +bool APFloat::isSignificandAllZeros() const { + // Test if the significand excluding the integral bit is all zeros. This + // allows us to test for binade boundaries. + const integerPart *Parts = significandParts(); + const unsigned PartCount = partCount(); + + for (unsigned i = 0; i < PartCount - 1; i++) + if (Parts[i]) + return false; + + const unsigned NumHighBits = + PartCount*integerPartWidth - semantics->precision + 1; + assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " + "clear than integerPartWidth"); + const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; + + if (Parts[PartCount - 1] & HighBitMask) + return false; + + return true; +} + +bool +APFloat::isLargest() const { + // The largest number by magnitude in our format will be the floating point + // number with maximum exponent and with significand that is all ones. + return isFiniteNonZero() && exponent == semantics->maxExponent + && isSignificandAllOnes(); +} + bool APFloat::bitwiseIsEqual(const APFloat &rhs) const { if (this == &rhs) @@ -634,15 +759,9 @@ APFloat::bitwiseIsEqual(const APFloat &rhs) const { category != rhs.category || sign != rhs.sign) return false; - if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble && - sign2 != rhs.sign2) - return false; if (category==fcZero || category==fcInfinity) return true; - else if (category==fcNormal && exponent!=rhs.exponent) - return false; - else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble && - exponent2!=rhs.exponent2) + else if (isFiniteNonZero() && exponent!=rhs.exponent) return false; else { int i= partCount(); @@ -656,9 +775,7 @@ APFloat::bitwiseIsEqual(const APFloat &rhs) const { } } -APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) -{ - assertArithmeticOK(ourSemantics); +APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) { initialize(&ourSemantics); sign = 0; zeroSignificand(); @@ -667,28 +784,23 @@ APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) normalize(rmNearestTiesToEven, lfExactlyZero); } -APFloat::APFloat(const fltSemantics &ourSemantics, - fltCategory ourCategory, bool negative) -{ - assertArithmeticOK(ourSemantics); +APFloat::APFloat(const fltSemantics &ourSemantics) { initialize(&ourSemantics); - category = ourCategory; - sign = negative; - if(category == fcNormal) - category = fcZero; - else if (ourCategory == fcNaN) - makeNaN(); + category = fcZero; + sign = false; } -APFloat::APFloat(const fltSemantics &ourSemantics, const char *text) -{ - assertArithmeticOK(ourSemantics); +APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) { + // Allocates storage if necessary but does not initialize it. + initialize(&ourSemantics); +} + +APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) { initialize(&ourSemantics); convertFromString(text, rmNearestTiesToEven); } -APFloat::APFloat(const APFloat &rhs) -{ +APFloat::APFloat(const APFloat &rhs) { initialize(rhs.semantics); assign(rhs); } @@ -724,9 +836,7 @@ APFloat::significandParts() const integerPart * APFloat::significandParts() { - assert(category == fcNormal || category == fcNaN); - - if(partCount() > 1) + if (partCount() > 1) return significand.parts; else return &significand.part; @@ -749,6 +859,7 @@ APFloat::incrementSignificand() /* Our callers should never cause us to overflow. */ assert(carry == 0); + (void)carry; } /* Add the significand of the RHS. Returns the carry flag. */ @@ -800,7 +911,7 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) precision = semantics->precision; newPartsCount = partCountForBits(precision * 2); - if(newPartsCount > 4) + if (newPartsCount > 4) fullSignificand = new integerPart[newPartsCount]; else fullSignificand = scratch; @@ -815,7 +926,21 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; exponent += rhs.exponent; - if(addend) { + // Assume the operands involved in the multiplication are single-precision + // FP, and the two multiplicants are: + // *this = a23 . a22 ... a0 * 2^e1 + // rhs = b23 . b22 ... b0 * 2^e2 + // the result of multiplication is: + // *this = c47 c46 . c45 ... c0 * 2^(e1+e2) + // Note that there are two significant bits at the left-hand side of the + // radix point. Move the radix point toward left by one bit, and adjust + // exponent accordingly. + exponent += 1; + + if (addend) { + // The intermediate result of the multiplication has "2 * precision" + // signicant bit; adjust the addend to be consistent with mul result. + // Significand savedSignificand = significand; const fltSemantics *savedSemantics = semantics; fltSemantics extendedSemantics; @@ -823,19 +948,19 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) unsigned int extendedPrecision; /* Normalize our MSB. */ - extendedPrecision = precision + precision - 1; - if(omsb != extendedPrecision) - { - APInt::tcShiftLeft(fullSignificand, newPartsCount, - extendedPrecision - omsb); - exponent -= extendedPrecision - omsb; - } + extendedPrecision = 2 * precision; + if (omsb != extendedPrecision) { + assert(extendedPrecision > omsb); + APInt::tcShiftLeft(fullSignificand, newPartsCount, + extendedPrecision - omsb); + exponent -= extendedPrecision - omsb; + } /* Create new semantics. */ extendedSemantics = *semantics; extendedSemantics.precision = extendedPrecision; - if(newPartsCount == 1) + if (newPartsCount == 1) significand.part = fullSignificand[0]; else significand.parts = fullSignificand; @@ -844,10 +969,11 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) APFloat extendedAddend(*addend); status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored); assert(status == opOK); + (void)status; lost_fraction = addOrSubtractSignificand(extendedAddend, false); /* Restore our state. */ - if(newPartsCount == 1) + if (newPartsCount == 1) fullSignificand[0] = significand.part; significand = savedSignificand; semantics = savedSemantics; @@ -855,9 +981,19 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; } - exponent -= (precision - 1); + // Convert the result having "2 * precision" significant-bits back to the one + // having "precision" significant-bits. First, move the radix point from + // poision "2*precision - 1" to "precision - 1". The exponent need to be + // adjusted by "2*precision - 1" - "precision - 1" = "precision". + exponent -= precision; - if(omsb > precision) { + // In case MSB resides at the left-hand side of radix point, shift the + // mantissa right by some amount to make sure the MSB reside right before + // the radix point (i.e. "MSB . rest-significant-bits"). + // + // Note that the result is not normalized when "omsb < precision". So, the + // caller needs to call APFloat::normalize() if normalized value is expected. + if (omsb > precision) { unsigned int bits, significantParts; lostFraction lf; @@ -870,7 +1006,7 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) APInt::tcAssign(lhsSignificand, fullSignificand, partsCount); - if(newPartsCount > 4) + if (newPartsCount > 4) delete [] fullSignificand; return lost_fraction; @@ -892,7 +1028,7 @@ APFloat::divideSignificand(const APFloat &rhs) rhsSignificand = rhs.significandParts(); partsCount = partCount(); - if(partsCount > 2) + if (partsCount > 2) dividend = new integerPart[partsCount * 2]; else dividend = scratch; @@ -900,7 +1036,7 @@ APFloat::divideSignificand(const APFloat &rhs) divisor = dividend + partsCount; /* Copy the dividend and divisor as they will be modified in-place. */ - for(i = 0; i < partsCount; i++) { + for (i = 0; i < partsCount; i++) { dividend[i] = lhsSignificand[i]; divisor[i] = rhsSignificand[i]; lhsSignificand[i] = 0; @@ -912,14 +1048,14 @@ APFloat::divideSignificand(const APFloat &rhs) /* Normalize the divisor. */ bit = precision - APInt::tcMSB(divisor, partsCount) - 1; - if(bit) { + if (bit) { exponent += bit; APInt::tcShiftLeft(divisor, partsCount, bit); } /* Normalize the dividend. */ bit = precision - APInt::tcMSB(dividend, partsCount) - 1; - if(bit) { + if (bit) { exponent -= bit; APInt::tcShiftLeft(dividend, partsCount, bit); } @@ -927,15 +1063,15 @@ APFloat::divideSignificand(const APFloat &rhs) /* Ensure the dividend >= divisor initially for the loop below. Incidentally, this means that the division loop below is guaranteed to set the integer bit to one. */ - if(APInt::tcCompare(dividend, divisor, partsCount) < 0) { + if (APInt::tcCompare(dividend, divisor, partsCount) < 0) { exponent--; APInt::tcShiftLeft(dividend, partsCount, 1); assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0); } /* Long division. */ - for(bit = precision; bit; bit -= 1) { - if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) { + for (bit = precision; bit; bit -= 1) { + if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) { APInt::tcSubtract(dividend, divisor, 0, partsCount); APInt::tcSetBit(lhsSignificand, bit - 1); } @@ -946,16 +1082,16 @@ APFloat::divideSignificand(const APFloat &rhs) /* Figure out the lost fraction. */ int cmp = APInt::tcCompare(dividend, divisor, partsCount); - if(cmp > 0) + if (cmp > 0) lost_fraction = lfMoreThanHalf; - else if(cmp == 0) + else if (cmp == 0) lost_fraction = lfExactlyHalf; - else if(APInt::tcIsZero(dividend, partsCount)) + else if (APInt::tcIsZero(dividend, partsCount)) lost_fraction = lfExactlyZero; else lost_fraction = lfLessThanHalf; - if(partsCount > 2) + if (partsCount > 2) delete [] dividend; return lost_fraction; @@ -978,7 +1114,7 @@ lostFraction APFloat::shiftSignificandRight(unsigned int bits) { /* Our exponent should not overflow. */ - assert((exponent_t) (exponent + bits) >= exponent); + assert((ExponentType) (exponent + bits) >= exponent); exponent += bits; @@ -991,7 +1127,7 @@ APFloat::shiftSignificandLeft(unsigned int bits) { assert(bits < semantics->precision); - if(bits) { + if (bits) { unsigned int partsCount = partCount(); APInt::tcShiftLeft(significandParts(), partsCount, bits); @@ -1007,20 +1143,20 @@ APFloat::compareAbsoluteValue(const APFloat &rhs) const int compare; assert(semantics == rhs.semantics); - assert(category == fcNormal); - assert(rhs.category == fcNormal); + assert(isFiniteNonZero()); + assert(rhs.isFiniteNonZero()); compare = exponent - rhs.exponent; /* If exponents are equal, do an unsigned bignum comparison of the significands. */ - if(compare == 0) + if (compare == 0) compare = APInt::tcCompare(significandParts(), rhs.significandParts(), partCount()); - if(compare > 0) + if (compare > 0) return cmpGreaterThan; - else if(compare < 0) + else if (compare < 0) return cmpLessThan; else return cmpEqual; @@ -1032,14 +1168,13 @@ APFloat::opStatus APFloat::handleOverflow(roundingMode rounding_mode) { /* Infinity? */ - if(rounding_mode == rmNearestTiesToEven - || rounding_mode == rmNearestTiesToAway - || (rounding_mode == rmTowardPositive && !sign) - || (rounding_mode == rmTowardNegative && sign)) - { - category = fcInfinity; - return (opStatus) (opOverflow | opInexact); - } + if (rounding_mode == rmNearestTiesToEven || + rounding_mode == rmNearestTiesToAway || + (rounding_mode == rmTowardPositive && !sign) || + (rounding_mode == rmTowardNegative && sign)) { + category = fcInfinity; + return (opStatus) (opOverflow | opInexact); + } /* Otherwise we become the largest finite number. */ category = fcNormal; @@ -1061,24 +1196,21 @@ APFloat::roundAwayFromZero(roundingMode rounding_mode, unsigned int bit) const { /* NaNs and infinities should not have lost fractions. */ - assert(category == fcNormal || category == fcZero); + assert(isFiniteNonZero() || category == fcZero); /* Current callers never pass this so we don't handle it. */ assert(lost_fraction != lfExactlyZero); - switch(rounding_mode) { - default: - assert(0); - + switch (rounding_mode) { case rmNearestTiesToAway: return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; case rmNearestTiesToEven: - if(lost_fraction == lfMoreThanHalf) + if (lost_fraction == lfMoreThanHalf) return true; /* Our zeroes don't have a significand to test. */ - if(lost_fraction == lfExactlyHalf && category != fcZero) + if (lost_fraction == lfExactlyHalf && category != fcZero) return APInt::tcExtractBit(significandParts(), bit); return false; @@ -1092,6 +1224,7 @@ APFloat::roundAwayFromZero(roundingMode rounding_mode, case rmTowardNegative: return sign == true; } + llvm_unreachable("Invalid rounding mode found"); } APFloat::opStatus @@ -1101,30 +1234,30 @@ APFloat::normalize(roundingMode rounding_mode, unsigned int omsb; /* One, not zero, based MSB. */ int exponentChange; - if(category != fcNormal) + if (!isFiniteNonZero()) return opOK; /* Before rounding normalize the exponent of fcNormal numbers. */ omsb = significandMSB() + 1; - if(omsb) { + if (omsb) { /* OMSB is numbered from 1. We want to place it in the integer - bit numbered PRECISON if possible, with a compensating change in + bit numbered PRECISION if possible, with a compensating change in the exponent. */ exponentChange = omsb - semantics->precision; /* If the resulting exponent is too high, overflow according to the rounding mode. */ - if(exponent + exponentChange > semantics->maxExponent) + if (exponent + exponentChange > semantics->maxExponent) return handleOverflow(rounding_mode); /* Subnormal numbers have exponent minExponent, and their MSB is forced based on that. */ - if(exponent + exponentChange < semantics->minExponent) + if (exponent + exponentChange < semantics->minExponent) exponentChange = semantics->minExponent - exponent; /* Shifting left is easy as we don't lose precision. */ - if(exponentChange < 0) { + if (exponentChange < 0) { assert(lost_fraction == lfExactlyZero); shiftSignificandLeft(-exponentChange); @@ -1132,7 +1265,7 @@ APFloat::normalize(roundingMode rounding_mode, return opOK; } - if(exponentChange > 0) { + if (exponentChange > 0) { lostFraction lf; /* Shift right and capture any new lost fraction. */ @@ -1141,7 +1274,7 @@ APFloat::normalize(roundingMode rounding_mode, lost_fraction = combineLostFractions(lf, lost_fraction); /* Keep OMSB up-to-date. */ - if(omsb > (unsigned) exponentChange) + if (omsb > (unsigned) exponentChange) omsb -= exponentChange; else omsb = 0; @@ -1153,28 +1286,28 @@ APFloat::normalize(roundingMode rounding_mode, /* As specified in IEEE 754, since we do not trap we do not report underflow for exact results. */ - if(lost_fraction == lfExactlyZero) { + if (lost_fraction == lfExactlyZero) { /* Canonicalize zeroes. */ - if(omsb == 0) + if (omsb == 0) category = fcZero; return opOK; } /* Increment the significand if we're rounding away from zero. */ - if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) { - if(omsb == 0) + if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) { + if (omsb == 0) exponent = semantics->minExponent; incrementSignificand(); omsb = significandMSB() + 1; /* Did the significand increment overflow? */ - if(omsb == (unsigned) semantics->precision + 1) { + if (omsb == (unsigned) semantics->precision + 1) { /* Renormalize by incrementing the exponent and shifting our significand right one. However if we already have the maximum exponent we overflow to infinity. */ - if(exponent == semantics->maxExponent) { + if (exponent == semantics->maxExponent) { category = fcInfinity; return (opStatus) (opOverflow | opInexact); @@ -1188,14 +1321,14 @@ APFloat::normalize(roundingMode rounding_mode, /* The normal case - we were and are not denormal, and any significand increment above didn't overflow. */ - if(omsb == semantics->precision) + if (omsb == semantics->precision) return opInexact; /* We have a non-zero denormal. */ assert(omsb < semantics->precision); /* Canonicalize zeroes. */ - if(omsb == 0) + if (omsb == 0) category = fcZero; /* The fcZero case is a denormal that underflowed to zero. */ @@ -1205,52 +1338,52 @@ APFloat::normalize(roundingMode rounding_mode, APFloat::opStatus APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) { - switch(convolve(category, rhs.category)) { + switch (PackCategoriesIntoKey(category, rhs.category)) { default: - assert(0); - - case convolve(fcNaN, fcZero): - case convolve(fcNaN, fcNormal): - case convolve(fcNaN, fcInfinity): - case convolve(fcNaN, fcNaN): - case convolve(fcNormal, fcZero): - case convolve(fcInfinity, fcNormal): - case convolve(fcInfinity, fcZero): + llvm_unreachable(0); + + case PackCategoriesIntoKey(fcNaN, fcZero): + case PackCategoriesIntoKey(fcNaN, fcNormal): + case PackCategoriesIntoKey(fcNaN, fcInfinity): + case PackCategoriesIntoKey(fcNaN, fcNaN): + case PackCategoriesIntoKey(fcNormal, fcZero): + case PackCategoriesIntoKey(fcInfinity, fcNormal): + case PackCategoriesIntoKey(fcInfinity, fcZero): return opOK; - case convolve(fcZero, fcNaN): - case convolve(fcNormal, fcNaN): - case convolve(fcInfinity, fcNaN): + case PackCategoriesIntoKey(fcZero, fcNaN): + case PackCategoriesIntoKey(fcNormal, fcNaN): + case PackCategoriesIntoKey(fcInfinity, fcNaN): category = fcNaN; copySignificand(rhs); return opOK; - case convolve(fcNormal, fcInfinity): - case convolve(fcZero, fcInfinity): + case PackCategoriesIntoKey(fcNormal, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcInfinity): category = fcInfinity; sign = rhs.sign ^ subtract; return opOK; - case convolve(fcZero, fcNormal): + case PackCategoriesIntoKey(fcZero, fcNormal): assign(rhs); sign = rhs.sign ^ subtract; return opOK; - case convolve(fcZero, fcZero): + case PackCategoriesIntoKey(fcZero, fcZero): /* Sign depends on rounding mode; handled by caller. */ return opOK; - case convolve(fcInfinity, fcInfinity): + case PackCategoriesIntoKey(fcInfinity, fcInfinity): /* Differently signed infinities can only be validly subtracted. */ - if((sign ^ rhs.sign) != subtract) { + if (((sign ^ rhs.sign)!=0) != subtract) { makeNaN(); return opInvalidOp; } return opOK; - case convolve(fcNormal, fcNormal): + case PackCategoriesIntoKey(fcNormal, fcNormal): return opDivByZero; } } @@ -1271,7 +1404,7 @@ APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) bits = exponent - rhs.exponent; /* Subtraction is more subtle than one might naively expect. */ - if(subtract) { + if (subtract) { APFloat temp_rhs(rhs); bool reverse; @@ -1300,16 +1433,17 @@ APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) /* Invert the lost fraction - it was on the RHS and subtracted. */ - if(lost_fraction == lfLessThanHalf) + if (lost_fraction == lfLessThanHalf) lost_fraction = lfMoreThanHalf; - else if(lost_fraction == lfMoreThanHalf) + else if (lost_fraction == lfMoreThanHalf) lost_fraction = lfLessThanHalf; /* The code above is intended to ensure that no borrow is necessary. */ assert(!carry); + (void)carry; } else { - if(bits > 0) { + if (bits > 0) { APFloat temp_rhs(rhs); lost_fraction = temp_rhs.shiftSignificandRight(bits); @@ -1321,6 +1455,7 @@ APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) /* We have a guard bit; generating a carry cannot happen. */ assert(!carry); + (void)carry; } return lost_fraction; @@ -1329,41 +1464,41 @@ APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) APFloat::opStatus APFloat::multiplySpecials(const APFloat &rhs) { - switch(convolve(category, rhs.category)) { + switch (PackCategoriesIntoKey(category, rhs.category)) { default: - assert(0); + llvm_unreachable(0); - case convolve(fcNaN, fcZero): - case convolve(fcNaN, fcNormal): - case convolve(fcNaN, fcInfinity): - case convolve(fcNaN, fcNaN): + case PackCategoriesIntoKey(fcNaN, fcZero): + case PackCategoriesIntoKey(fcNaN, fcNormal): + case PackCategoriesIntoKey(fcNaN, fcInfinity): + case PackCategoriesIntoKey(fcNaN, fcNaN): return opOK; - case convolve(fcZero, fcNaN): - case convolve(fcNormal, fcNaN): - case convolve(fcInfinity, fcNaN): + case PackCategoriesIntoKey(fcZero, fcNaN): + case PackCategoriesIntoKey(fcNormal, fcNaN): + case PackCategoriesIntoKey(fcInfinity, fcNaN): category = fcNaN; copySignificand(rhs); return opOK; - case convolve(fcNormal, fcInfinity): - case convolve(fcInfinity, fcNormal): - case convolve(fcInfinity, fcInfinity): + case PackCategoriesIntoKey(fcNormal, fcInfinity): + case PackCategoriesIntoKey(fcInfinity, fcNormal): + case PackCategoriesIntoKey(fcInfinity, fcInfinity): category = fcInfinity; return opOK; - case convolve(fcZero, fcNormal): - case convolve(fcNormal, fcZero): - case convolve(fcZero, fcZero): + case PackCategoriesIntoKey(fcZero, fcNormal): + case PackCategoriesIntoKey(fcNormal, fcZero): + case PackCategoriesIntoKey(fcZero, fcZero): category = fcZero; return opOK; - case convolve(fcZero, fcInfinity): - case convolve(fcInfinity, fcZero): + case PackCategoriesIntoKey(fcZero, fcInfinity): + case PackCategoriesIntoKey(fcInfinity, fcZero): makeNaN(); return opInvalidOp; - case convolve(fcNormal, fcNormal): + case PackCategoriesIntoKey(fcNormal, fcNormal): return opOK; } } @@ -1371,41 +1506,77 @@ APFloat::multiplySpecials(const APFloat &rhs) APFloat::opStatus APFloat::divideSpecials(const APFloat &rhs) { - switch(convolve(category, rhs.category)) { + switch (PackCategoriesIntoKey(category, rhs.category)) { default: - assert(0); - - case convolve(fcNaN, fcZero): - case convolve(fcNaN, fcNormal): - case convolve(fcNaN, fcInfinity): - case convolve(fcNaN, fcNaN): - case convolve(fcInfinity, fcZero): - case convolve(fcInfinity, fcNormal): - case convolve(fcZero, fcInfinity): - case convolve(fcZero, fcNormal): + llvm_unreachable(0); + + case PackCategoriesIntoKey(fcNaN, fcZero): + case PackCategoriesIntoKey(fcNaN, fcNormal): + case PackCategoriesIntoKey(fcNaN, fcInfinity): + case PackCategoriesIntoKey(fcNaN, fcNaN): + case PackCategoriesIntoKey(fcInfinity, fcZero): + case PackCategoriesIntoKey(fcInfinity, fcNormal): + case PackCategoriesIntoKey(fcZero, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcNormal): return opOK; - case convolve(fcZero, fcNaN): - case convolve(fcNormal, fcNaN): - case convolve(fcInfinity, fcNaN): + case PackCategoriesIntoKey(fcZero, fcNaN): + case PackCategoriesIntoKey(fcNormal, fcNaN): + case PackCategoriesIntoKey(fcInfinity, fcNaN): category = fcNaN; copySignificand(rhs); return opOK; - case convolve(fcNormal, fcInfinity): + case PackCategoriesIntoKey(fcNormal, fcInfinity): category = fcZero; return opOK; - case convolve(fcNormal, fcZero): + case PackCategoriesIntoKey(fcNormal, fcZero): category = fcInfinity; return opDivByZero; - case convolve(fcInfinity, fcInfinity): - case convolve(fcZero, fcZero): + case PackCategoriesIntoKey(fcInfinity, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcZero): + makeNaN(); + return opInvalidOp; + + case PackCategoriesIntoKey(fcNormal, fcNormal): + return opOK; + } +} + +APFloat::opStatus +APFloat::modSpecials(const APFloat &rhs) +{ + switch (PackCategoriesIntoKey(category, rhs.category)) { + default: + llvm_unreachable(0); + + case PackCategoriesIntoKey(fcNaN, fcZero): + case PackCategoriesIntoKey(fcNaN, fcNormal): + case PackCategoriesIntoKey(fcNaN, fcInfinity): + case PackCategoriesIntoKey(fcNaN, fcNaN): + case PackCategoriesIntoKey(fcZero, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcNormal): + case PackCategoriesIntoKey(fcNormal, fcInfinity): + return opOK; + + case PackCategoriesIntoKey(fcZero, fcNaN): + case PackCategoriesIntoKey(fcNormal, fcNaN): + case PackCategoriesIntoKey(fcInfinity, fcNaN): + category = fcNaN; + copySignificand(rhs); + return opOK; + + case PackCategoriesIntoKey(fcNormal, fcZero): + case PackCategoriesIntoKey(fcInfinity, fcZero): + case PackCategoriesIntoKey(fcInfinity, fcNormal): + case PackCategoriesIntoKey(fcInfinity, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcZero): makeNaN(); return opInvalidOp; - case convolve(fcNormal, fcNormal): + case PackCategoriesIntoKey(fcNormal, fcNormal): return opOK; } } @@ -1439,12 +1610,10 @@ APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, { opStatus fs; - assertArithmeticOK(*semantics); - fs = addOrSubtractSpecials(rhs, subtract); /* This return code means it was not a simple case. */ - if(fs == opDivByZero) { + if (fs == opDivByZero) { lostFraction lost_fraction; lost_fraction = addOrSubtractSignificand(rhs, subtract); @@ -1457,8 +1626,8 @@ APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a positive zero unless rounding to minus infinity, except that adding two like-signed zeroes gives that zero. */ - if(category == fcZero) { - if(rhs.category != fcZero || (sign == rhs.sign) == subtract) + if (category == fcZero) { + if (rhs.category != fcZero || (sign == rhs.sign) == subtract) sign = (rounding_mode == rmTowardNegative); } @@ -1485,14 +1654,13 @@ APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) { opStatus fs; - assertArithmeticOK(*semantics); sign ^= rhs.sign; fs = multiplySpecials(rhs); - if(category == fcNormal) { + if (isFiniteNonZero()) { lostFraction lost_fraction = multiplySignificand(rhs, 0); fs = normalize(rounding_mode, lost_fraction); - if(lost_fraction != lfExactlyZero) + if (lost_fraction != lfExactlyZero) fs = (opStatus) (fs | opInexact); } @@ -1505,29 +1673,27 @@ APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) { opStatus fs; - assertArithmeticOK(*semantics); sign ^= rhs.sign; fs = divideSpecials(rhs); - if(category == fcNormal) { + if (isFiniteNonZero()) { lostFraction lost_fraction = divideSignificand(rhs); fs = normalize(rounding_mode, lost_fraction); - if(lost_fraction != lfExactlyZero) + if (lost_fraction != lfExactlyZero) fs = (opStatus) (fs | opInexact); } return fs; } -/* Normalized remainder. This is not currently doing TRT. */ +/* Normalized remainder. This is not currently correct in all cases. */ APFloat::opStatus -APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) +APFloat::remainder(const APFloat &rhs) { opStatus fs; APFloat V = *this; unsigned int origSign = sign; - assertArithmeticOK(*semantics); fs = V.divide(rhs, rmNearestTiesToEven); if (fs == opDivByZero) return fs; @@ -1544,10 +1710,10 @@ APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) rmNearestTiesToEven); assert(fs==opOK); // should always work - fs = V.multiply(rhs, rounding_mode); + fs = V.multiply(rhs, rmNearestTiesToEven); assert(fs==opOK || fs==opInexact); // should not overflow or underflow - fs = subtract(V, rounding_mode); + fs = subtract(V, rmNearestTiesToEven); assert(fs==opOK || fs==opInexact); // likewise if (isZero()) @@ -1556,6 +1722,47 @@ APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) return fs; } +/* Normalized llvm frem (C fmod). + This is not currently correct in all cases. */ +APFloat::opStatus +APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) +{ + opStatus fs; + fs = modSpecials(rhs); + + if (isFiniteNonZero() && rhs.isFiniteNonZero()) { + APFloat V = *this; + unsigned int origSign = sign; + + fs = V.divide(rhs, rmNearestTiesToEven); + if (fs == opDivByZero) + return fs; + + int parts = partCount(); + integerPart *x = new integerPart[parts]; + bool ignored; + fs = V.convertToInteger(x, parts * integerPartWidth, true, + rmTowardZero, &ignored); + if (fs==opInvalidOp) + return fs; + + fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, + rmNearestTiesToEven); + assert(fs==opOK); // should always work + + fs = V.multiply(rhs, rounding_mode); + assert(fs==opOK || fs==opInexact); // should not overflow or underflow + + fs = subtract(V, rounding_mode); + assert(fs==opOK || fs==opInexact); // likewise + + if (isZero()) + sign = origSign; // IEEE754 requires this + delete[] x; + } + return fs; +} + /* Normalized fused-multiply-add. */ APFloat::opStatus APFloat::fusedMultiplyAdd(const APFloat &multiplicand, @@ -1564,27 +1771,25 @@ APFloat::fusedMultiplyAdd(const APFloat &multiplicand, { opStatus fs; - assertArithmeticOK(*semantics); - /* Post-multiplication sign, before addition. */ sign ^= multiplicand.sign; /* If and only if all arguments are normal do we need to do an extended-precision calculation. */ - if(category == fcNormal - && multiplicand.category == fcNormal - && addend.category == fcNormal) { + if (isFiniteNonZero() && + multiplicand.isFiniteNonZero() && + addend.isFiniteNonZero()) { lostFraction lost_fraction; lost_fraction = multiplySignificand(multiplicand, &addend); fs = normalize(rounding_mode, lost_fraction); - if(lost_fraction != lfExactlyZero) + if (lost_fraction != lfExactlyZero) fs = (opStatus) (fs | opInexact); /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a positive zero unless rounding to minus infinity, except that adding two like-signed zeroes gives that zero. */ - if(category == fcZero && sign != addend.sign) + if (category == fcZero && sign != addend.sign) sign = (rounding_mode == rmTowardNegative); } else { fs = multiplySpecials(multiplicand); @@ -1596,69 +1801,111 @@ APFloat::fusedMultiplyAdd(const APFloat &multiplicand, If we need to do the addition we can do so with normal precision. */ - if(fs == opOK) + if (fs == opOK) fs = addOrSubtract(addend, rounding_mode, false); } return fs; } +/* Rounding-mode corrrect round to integral value. */ +APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) { + opStatus fs; + + // If the exponent is large enough, we know that this value is already + // integral, and the arithmetic below would potentially cause it to saturate + // to +/-Inf. Bail out early instead. + if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics)) + return opOK; + + // The algorithm here is quite simple: we add 2^(p-1), where p is the + // precision of our format, and then subtract it back off again. The choice + // of rounding modes for the addition/subtraction determines the rounding mode + // for our integral rounding as well. + // NOTE: When the input value is negative, we do subtraction followed by + // addition instead. + APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); + IntegerConstant <<= semanticsPrecision(*semantics)-1; + APFloat MagicConstant(*semantics); + fs = MagicConstant.convertFromAPInt(IntegerConstant, false, + rmNearestTiesToEven); + MagicConstant.copySign(*this); + + if (fs != opOK) + return fs; + + // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly. + bool inputSign = isNegative(); + + fs = add(MagicConstant, rounding_mode); + if (fs != opOK && fs != opInexact) + return fs; + + fs = subtract(MagicConstant, rounding_mode); + + // Restore the input sign. + if (inputSign != isNegative()) + changeSign(); + + return fs; +} + + /* Comparison requires normalized numbers. */ APFloat::cmpResult APFloat::compare(const APFloat &rhs) const { cmpResult result; - assertArithmeticOK(*semantics); assert(semantics == rhs.semantics); - switch(convolve(category, rhs.category)) { + switch (PackCategoriesIntoKey(category, rhs.category)) { default: - assert(0); - - case convolve(fcNaN, fcZero): - case convolve(fcNaN, fcNormal): - case convolve(fcNaN, fcInfinity): - case convolve(fcNaN, fcNaN): - case convolve(fcZero, fcNaN): - case convolve(fcNormal, fcNaN): - case convolve(fcInfinity, fcNaN): + llvm_unreachable(0); + + case PackCategoriesIntoKey(fcNaN, fcZero): + case PackCategoriesIntoKey(fcNaN, fcNormal): + case PackCategoriesIntoKey(fcNaN, fcInfinity): + case PackCategoriesIntoKey(fcNaN, fcNaN): + case PackCategoriesIntoKey(fcZero, fcNaN): + case PackCategoriesIntoKey(fcNormal, fcNaN): + case PackCategoriesIntoKey(fcInfinity, fcNaN): return cmpUnordered; - case convolve(fcInfinity, fcNormal): - case convolve(fcInfinity, fcZero): - case convolve(fcNormal, fcZero): - if(sign) + case PackCategoriesIntoKey(fcInfinity, fcNormal): + case PackCategoriesIntoKey(fcInfinity, fcZero): + case PackCategoriesIntoKey(fcNormal, fcZero): + if (sign) return cmpLessThan; else return cmpGreaterThan; - case convolve(fcNormal, fcInfinity): - case convolve(fcZero, fcInfinity): - case convolve(fcZero, fcNormal): - if(rhs.sign) + case PackCategoriesIntoKey(fcNormal, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcNormal): + if (rhs.sign) return cmpGreaterThan; else return cmpLessThan; - case convolve(fcInfinity, fcInfinity): - if(sign == rhs.sign) + case PackCategoriesIntoKey(fcInfinity, fcInfinity): + if (sign == rhs.sign) return cmpEqual; - else if(sign) + else if (sign) return cmpLessThan; else return cmpGreaterThan; - case convolve(fcZero, fcZero): + case PackCategoriesIntoKey(fcZero, fcZero): return cmpEqual; - case convolve(fcNormal, fcNormal): + case PackCategoriesIntoKey(fcNormal, fcNormal): break; } /* Two normal numbers. Do they have the same sign? */ - if(sign != rhs.sign) { - if(sign) + if (sign != rhs.sign) { + if (sign) result = cmpLessThan; else result = cmpGreaterThan; @@ -1666,10 +1913,10 @@ APFloat::compare(const APFloat &rhs) const /* Compare absolute values; invert result if negative. */ result = compareAbsoluteValue(rhs); - if(sign) { - if(result == cmpLessThan) + if (sign) { + if (result == cmpLessThan) result = cmpGreaterThan; - else if(result == cmpGreaterThan) + else if (result == cmpGreaterThan) result = cmpLessThan; } } @@ -1691,82 +1938,74 @@ APFloat::convert(const fltSemantics &toSemantics, lostFraction lostFraction; unsigned int newPartCount, oldPartCount; opStatus fs; + int shift; + const fltSemantics &fromSemantics = *semantics; - assertArithmeticOK(*semantics); - assertArithmeticOK(toSemantics); lostFraction = lfExactlyZero; newPartCount = partCountForBits(toSemantics.precision + 1); oldPartCount = partCount(); + shift = toSemantics.precision - fromSemantics.precision; - /* Handle storage complications. If our new form is wider, - re-allocate our bit pattern into wider storage. If it is - narrower, we ignore the excess parts, but if narrowing to a - single part we need to free the old storage. - Be careful not to reference significandParts for zeroes - and infinities, since it aborts. */ + bool X86SpecialNan = false; + if (&fromSemantics == &APFloat::x87DoubleExtended && + &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN && + (!(*significandParts() & 0x8000000000000000ULL) || + !(*significandParts() & 0x4000000000000000ULL))) { + // x86 has some unusual NaNs which cannot be represented in any other + // format; note them here. + X86SpecialNan = true; + } + + // If this is a truncation, perform the shift before we narrow the storage. + if (shift < 0 && (isFiniteNonZero() || category==fcNaN)) + lostFraction = shiftRight(significandParts(), oldPartCount, -shift); + + // Fix the storage so it can hold to new value. if (newPartCount > oldPartCount) { + // The new type requires more storage; make it available. integerPart *newParts; newParts = new integerPart[newPartCount]; APInt::tcSet(newParts, 0, newPartCount); - if (category==fcNormal || category==fcNaN) + if (isFiniteNonZero() || category==fcNaN) APInt::tcAssign(newParts, significandParts(), oldPartCount); freeSignificand(); significand.parts = newParts; - } else if (newPartCount < oldPartCount) { - /* Capture any lost fraction through truncation of parts so we get - correct rounding whilst normalizing. */ - if (category==fcNormal) - lostFraction = lostFractionThroughTruncation - (significandParts(), oldPartCount, toSemantics.precision); - if (newPartCount == 1) { - integerPart newPart = 0; - if (category==fcNormal || category==fcNaN) - newPart = significandParts()[0]; - freeSignificand(); - significand.part = newPart; - } + } else if (newPartCount == 1 && oldPartCount != 1) { + // Switch to built-in storage for a single part. + integerPart newPart = 0; + if (isFiniteNonZero() || category==fcNaN) + newPart = significandParts()[0]; + freeSignificand(); + significand.part = newPart; } - if(category == fcNormal) { - /* Re-interpret our bit-pattern. */ - exponent += toSemantics.precision - semantics->precision; - semantics = &toSemantics; + // Now that we have the right storage, switch the semantics. + semantics = &toSemantics; + + // If this is an extension, perform the shift now that the storage is + // available. + if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) + APInt::tcShiftLeft(significandParts(), newPartCount, shift); + + if (isFiniteNonZero()) { fs = normalize(rounding_mode, lostFraction); *losesInfo = (fs != opOK); } else if (category == fcNaN) { - int shift = toSemantics.precision - semantics->precision; - // Do this now so significandParts gets the right answer - const fltSemantics *oldSemantics = semantics; - semantics = &toSemantics; - *losesInfo = false; - // No normalization here, just truncate - if (shift>0) - APInt::tcShiftLeft(significandParts(), newPartCount, shift); - else if (shift < 0) { - unsigned ushift = -shift; - // Figure out if we are losing information. This happens - // if are shifting out something other than 0s, or if the x87 long - // double input did not have its integer bit set (pseudo-NaN), or if the - // x87 long double input did not have its QNan bit set (because the x87 - // hardware sets this bit when converting a lower-precision NaN to - // x87 long double). - if (APInt::tcLSB(significandParts(), newPartCount) < ushift) - *losesInfo = true; - if (oldSemantics == &APFloat::x87DoubleExtended && - (!(*significandParts() & 0x8000000000000000ULL) || - !(*significandParts() & 0x4000000000000000ULL))) - *losesInfo = true; - APInt::tcShiftRight(significandParts(), newPartCount, ushift); - } + *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; + + // For x87 extended precision, we want to make a NaN, not a special NaN if + // the input wasn't special either. + if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended) + APInt::tcSetBit(significandParts(), semantics->precision - 1); + // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) // does not give you back the same bits. This is dubious, and we // don't currently do it. You're really supposed to get // an invalid operation signal at runtime, but nobody does that. fs = opOK; } else { - semantics = &toSemantics; - fs = opOK; *losesInfo = false; + fs = opOK; } return fs; @@ -1792,18 +2031,15 @@ APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width, const integerPart *src; unsigned int dstPartsCount, truncatedBits; - if (!isArithmeticOk(*semantics)) - return opInvalidOp; - *isExact = false; /* Handle the three special cases first. */ - if(category == fcInfinity || category == fcNaN) + if (category == fcInfinity || category == fcNaN) return opInvalidOp; dstPartsCount = partCountForBits(width); - if(category == fcZero) { + if (category == fcZero) { APInt::tcSet(parts, 0, dstPartsCount); // Negative zero can't be represented as an int. *isExact = !sign; @@ -1817,7 +2053,9 @@ APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width, if (exponent < 0) { /* Our absolute value is less than one; truncate everything. */ APInt::tcSet(parts, 0, dstPartsCount); - truncatedBits = semantics->precision; + /* For exponent -1 the integer bit represents .5, look at that. + For smaller exponents leftmost truncated bit is 0. */ + truncatedBits = semantics->precision -1U - exponent; } else { /* We want the most significant (exponent + 1) bits; the rest are truncated. */ @@ -1844,8 +2082,8 @@ APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width, if (truncatedBits) { lost_fraction = lostFractionThroughTruncation(src, partCount(), truncatedBits); - if (lost_fraction != lfExactlyZero - && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { + if (lost_fraction != lfExactlyZero && + roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { if (APInt::tcIncrement(parts, dstPartsCount)) return opInvalidOp; /* Overflow. */ } @@ -1902,7 +2140,7 @@ APFloat::convertToInteger(integerPart *parts, unsigned int width, { opStatus fs; - fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, + fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode, isExact); if (fs == opInvalidOp) { @@ -1925,6 +2163,23 @@ APFloat::convertToInteger(integerPart *parts, unsigned int width, return fs; } +/* Same as convertToInteger(integerPart*, ...), except the result is returned in + an APSInt, whose initial bit-width and signed-ness are used to determine the + precision of the conversion. + */ +APFloat::opStatus +APFloat::convertToInteger(APSInt &result, + roundingMode rounding_mode, bool *isExact) const +{ + unsigned bitWidth = result.getBitWidth(); + SmallVector parts(result.getNumWords()); + opStatus status = convertToInteger( + parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact); + // Keeps the original signed-ness. + result = APInt(bitWidth, parts); + return status; +} + /* Convert an unsigned integer SRC to a floating point number, rounding according to ROUNDING_MODE. The sign of the floating point number is not modified. */ @@ -1937,14 +2192,13 @@ APFloat::convertFromUnsignedParts(const integerPart *src, integerPart *dst; lostFraction lost_fraction; - assertArithmeticOK(*semantics); category = fcNormal; omsb = APInt::tcMSB(src, srcCount) + 1; dst = significandParts(); dstCount = partCount(); precision = semantics->precision; - /* We want the most significant PRECISON bits of SRC. There may not + /* We want the most significant PRECISION bits of SRC. There may not be that many; extract what we can. */ if (precision <= omsb) { exponent = omsb - 1; @@ -1988,9 +2242,8 @@ APFloat::convertFromSignExtendedInteger(const integerPart *src, { opStatus status; - assertArithmeticOK(*semantics); - if (isSigned - && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { + if (isSigned && + APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { integerPart *copy; /* If we're signed and negative negate a copy. */ @@ -2015,10 +2268,10 @@ APFloat::convertFromZeroExtendedInteger(const integerPart *parts, roundingMode rounding_mode) { unsigned int partCount = partCountForBits(width); - APInt api = APInt(width, partCount, parts); + APInt api = APInt(width, makeArrayRef(parts, partCount)); sign = false; - if(isSigned && APInt::tcExtractBit(parts, width - 1)) { + if (isSigned && APInt::tcExtractBit(parts, width - 1)) { sign = true; api = -api; } @@ -2027,13 +2280,12 @@ APFloat::convertFromZeroExtendedInteger(const integerPart *parts, } APFloat::opStatus -APFloat::convertFromHexadecimalString(const char *p, - roundingMode rounding_mode) +APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode) { - lostFraction lost_fraction; + lostFraction lost_fraction = lfExactlyZero; integerPart *significand; unsigned int bitPos, partsCount; - const char *dot, *firstSignificantDigit; + StringRef::iterator dot, firstSignificantDigit; zeroSignificand(); exponent = 0; @@ -2044,53 +2296,64 @@ APFloat::convertFromHexadecimalString(const char *p, bitPos = partsCount * integerPartWidth; /* Skip leading zeroes and any (hexa)decimal point. */ - p = skipLeadingZeroesAndAnyDot(p, &dot); + StringRef::iterator begin = s.begin(); + StringRef::iterator end = s.end(); + StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot); firstSignificantDigit = p; - for(;;) { + for (; p != end;) { integerPart hex_value; - if(*p == '.') { - assert(dot == 0); + if (*p == '.') { + assert(dot == end && "String contains multiple dots"); dot = p++; + if (p == end) { + break; + } } hex_value = hexDigitValue(*p); - if(hex_value == -1U) { - lost_fraction = lfExactlyZero; + if (hex_value == -1U) { break; } p++; - /* Store the number whilst 4-bit nibbles remain. */ - if(bitPos) { - bitPos -= 4; - hex_value <<= bitPos % integerPartWidth; - significand[bitPos / integerPartWidth] |= hex_value; - } else { - lost_fraction = trailingHexadecimalFraction(p, hex_value); - while(hexDigitValue(*p) != -1U) - p++; + if (p == end) { break; + } else { + /* Store the number whilst 4-bit nibbles remain. */ + if (bitPos) { + bitPos -= 4; + hex_value <<= bitPos % integerPartWidth; + significand[bitPos / integerPartWidth] |= hex_value; + } else { + lost_fraction = trailingHexadecimalFraction(p, end, hex_value); + while (p != end && hexDigitValue(*p) != -1U) + p++; + break; + } } } /* Hex floats require an exponent but not a hexadecimal point. */ - assert(*p == 'p' || *p == 'P'); + assert(p != end && "Hex strings require an exponent"); + assert((*p == 'p' || *p == 'P') && "Invalid character in significand"); + assert(p != begin && "Significand has no digits"); + assert((dot == end || p - begin != 1) && "Significand has no digits"); /* Ignore the exponent if we are zero. */ - if(p != firstSignificantDigit) { + if (p != firstSignificantDigit) { int expAdjustment; /* Implicit hexadecimal point? */ - if(!dot) + if (dot == end) dot = p; /* Calculate the exponent adjustment implicit in the number of significant digits. */ expAdjustment = static_cast(dot - firstSignificantDigit); - if(expAdjustment < 0) + if (expAdjustment < 0) expAdjustment++; expAdjustment = expAdjustment * 4 - 1; @@ -2100,7 +2363,7 @@ APFloat::convertFromHexadecimalString(const char *p, expAdjustment -= partsCount * integerPartWidth; /* Adjust for the given exponent. */ - exponent = totalExponent(p, expAdjustment); + exponent = totalExponent(p + 1, end, expAdjustment); } return normalize(rounding_mode, lost_fraction); @@ -2112,12 +2375,12 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts, roundingMode rounding_mode) { unsigned int parts, pow5PartCount; - fltSemantics calcSemantics = { 32767, -32767, 0, true }; + fltSemantics calcSemantics = { 32767, -32767, 0 }; integerPart pow5Parts[maxPowerOfFiveParts]; bool isNearest; - isNearest = (rounding_mode == rmNearestTiesToEven - || rounding_mode == rmNearestTiesToAway); + isNearest = (rounding_mode == rmNearestTiesToEven || + rounding_mode == rmNearestTiesToAway); parts = partCountForBits(semantics->precision + 11); @@ -2132,8 +2395,8 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts, excessPrecision = calcSemantics.precision - semantics->precision; truncatedBits = excessPrecision; - APFloat decSig(calcSemantics, fcZero, sign); - APFloat pow5(calcSemantics, fcZero, false); + APFloat decSig = APFloat::getZero(calcSemantics, sign); + APFloat pow5(calcSemantics); sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, rmNearestTiesToEven); @@ -2165,8 +2428,8 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts, /* Both multiplySignificand and divideSignificand return the result with the integer bit set. */ - assert (APInt::tcExtractBit - (decSig.significandParts(), calcSemantics.precision - 1) == 1); + assert(APInt::tcExtractBit + (decSig.significandParts(), calcSemantics.precision - 1) == 1); HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, powHUerr); @@ -2192,13 +2455,14 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts, } APFloat::opStatus -APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode) +APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { decimalInfo D; opStatus fs; /* Scan the text. */ - interpretDecimal(p, &D); + StringRef::iterator p = str.begin(); + interpretDecimal(p, str.end(), &D); /* Handle the quick cases. First the case of no significant digits, i.e. zero, and then exponents that are obviously too large or too @@ -2217,14 +2481,34 @@ APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode) 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] */ - if (decDigitValue(*D.firstSigDigit) >= 10U) { + // Test if we have a zero number allowing for strings with no null terminators + // and zero decimals with non-zero exponents. + // + // We computed firstSigDigit by ignoring all zeros and dots. Thus if + // D->firstSigDigit equals str.end(), every digit must be a zero and there can + // be at most one dot. On the other hand, if we have a zero with a non-zero + // exponent, then we know that D.firstSigDigit will be non-numeric. + if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { category = fcZero; fs = opOK; - } else if ((D.normalizedExponent + 1) * 28738 - <= 8651 * (semantics->minExponent - (int) semantics->precision)) { + + /* Check whether the normalized exponent is high enough to overflow + max during the log-rebasing in the max-exponent check below. */ + } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { + fs = handleOverflow(rounding_mode); + + /* If it wasn't, then it also wasn't high enough to overflow max + during the log-rebasing in the min-exponent check. Check that it + won't overflow min in either check, then perform the min-exponent + check. */ + } else if (D.normalizedExponent - 1 < INT_MIN / 42039 || + (D.normalizedExponent + 1) * 28738 <= + 8651 * (semantics->minExponent - (int) semantics->precision)) { /* Underflow to zero and round. */ zeroSignificand(); fs = normalize(rounding_mode, lfLessThanHalf); + + /* We can finally safely perform the max-exponent check. */ } else if ((D.normalizedExponent - 1) * 42039 >= 12655 * semantics->maxExponent) { /* Overflow and round. */ @@ -2253,10 +2537,14 @@ APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode) multiplier = 1; do { - if (*p == '.') + if (*p == '.') { p++; - + if (p == str.end()) { + break; + } + } decValue = decDigitValue(*p++); + assert(decValue < 10U && "Invalid character in significand"); multiplier *= 10; val = val * 10 + decValue; /* The maximum number that can be multiplied by ten with any @@ -2283,21 +2571,57 @@ APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode) return fs; } -APFloat::opStatus -APFloat::convertFromString(const char *p, roundingMode rounding_mode) -{ - assertArithmeticOK(*semantics); - - /* Handle a leading minus sign. */ - if(*p == '-') - sign = 1, p++; - else - sign = 0; +bool +APFloat::convertFromStringSpecials(StringRef str) { + if (str.equals("inf") || str.equals("INFINITY")) { + makeInf(false); + return true; + } - if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) - return convertFromHexadecimalString(p + 2, rounding_mode); - else - return convertFromDecimalString(p, rounding_mode); + if (str.equals("-inf") || str.equals("-INFINITY")) { + makeInf(true); + return true; + } + + if (str.equals("nan") || str.equals("NaN")) { + makeNaN(false, false); + return true; + } + + if (str.equals("-nan") || str.equals("-NaN")) { + makeNaN(false, true); + return true; + } + + return false; +} + +APFloat::opStatus +APFloat::convertFromString(StringRef str, roundingMode rounding_mode) +{ + assert(!str.empty() && "Invalid string length"); + + // Handle special cases. + if (convertFromStringSpecials(str)) + return opOK; + + /* Handle a leading minus sign. */ + StringRef::iterator p = str.begin(); + size_t slen = str.size(); + sign = *p == '-' ? 1 : 0; + if (*p == '-' || *p == '+') { + p++; + slen--; + assert(slen && "String has no digits"); + } + + if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { + assert(slen - 2 && "Invalid string"); + return convertFromHexadecimalString(StringRef(p + 2, slen - 2), + rounding_mode); + } + + return convertFromDecimalString(StringRef(p, slen), rounding_mode); } /* Write out a hexadecimal representation of the floating point value @@ -2330,8 +2654,6 @@ APFloat::convertToHexString(char *dst, unsigned int hexDigits, { char *p; - assertArithmeticOK(*semantics); - p = dst; if (sign) *dst++ = '-'; @@ -2456,7 +2778,7 @@ APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, q--; *q = hexDigitChars[hexDigitValue (*q) + 1]; } while (*q == '0'); - assert (q >= p); + assert(q >= p); } else { /* Add trailing zeroes. */ memset (dst, '0', outputDigits); @@ -2478,21 +2800,19 @@ APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, return writeSignedDecimal (dst, exponent); } -// For good performance it is desirable for different APFloats -// to produce different integers. -uint32_t -APFloat::getHashValue() const -{ - if (category==fcZero) return sign<<8 | semantics->precision ; - else if (category==fcInfinity) return sign<<9 | semantics->precision; - else if (category==fcNaN) return 1<<10 | semantics->precision; - else { - uint32_t hash = sign<<11 | semantics->precision | exponent<<12; - const integerPart* p = significandParts(); - for (int i=partCount(); i>0; i--, p++) - hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32); - return hash; - } +hash_code llvm::hash_value(const APFloat &Arg) { + if (!Arg.isFiniteNonZero()) + return hash_combine((uint8_t)Arg.category, + // NaN has no sign, fix it at zero. + Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, + Arg.semantics->precision); + + // Normal floats need their exponent and significand hashed. + return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign, + Arg.semantics->precision, Arg.exponent, + hash_combine_range( + Arg.significandParts(), + Arg.significandParts() + Arg.partCount())); } // Conversion from APFloat to/from host float/double. It may eventually be @@ -2508,11 +2828,11 @@ APInt APFloat::convertF80LongDoubleAPFloatToAPInt() const { assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended); - assert (partCount()==2); + assert(partCount()==2); uint64_t myexponent, mysignificand; - if (category==fcNormal) { + if (isFiniteNonZero()) { myexponent = exponent+16383; //bias mysignificand = significandParts()[0]; if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) @@ -2530,67 +2850,108 @@ APFloat::convertF80LongDoubleAPFloatToAPInt() const } uint64_t words[2]; - words[0] = ((uint64_t)(sign & 1) << 63) | - ((myexponent & 0x7fffLL) << 48) | - ((mysignificand >>16) & 0xffffffffffffLL); - words[1] = mysignificand & 0xffff; - return APInt(80, 2, words); + words[0] = mysignificand; + words[1] = ((uint64_t)(sign & 1) << 15) | + (myexponent & 0x7fffLL); + return APInt(80, words); } APInt APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble); - assert (partCount()==2); + assert(partCount()==2); - uint64_t myexponent, mysignificand, myexponent2, mysignificand2; + uint64_t words[2]; + opStatus fs; + bool losesInfo; + + // Convert number to double. To avoid spurious underflows, we re- + // normalize against the "double" minExponent first, and only *then* + // truncate the mantissa. The result of that second conversion + // may be inexact, but should never underflow. + // Declare fltSemantics before APFloat that uses it (and + // saves pointer to it) to ensure correct destruction order. + fltSemantics extendedSemantics = *semantics; + extendedSemantics.minExponent = IEEEdouble.minExponent; + APFloat extended(*this); + fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); + assert(fs == opOK && !losesInfo); + (void)fs; + + APFloat u(extended); + fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); + assert(fs == opOK || fs == opInexact); + (void)fs; + words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); + + // If conversion was exact or resulted in a special case, we're done; + // just set the second double to zero. Otherwise, re-convert back to + // the extended format and compute the difference. This now should + // convert exactly to double. + if (u.isFiniteNonZero() && losesInfo) { + fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); + assert(fs == opOK && !losesInfo); + (void)fs; + + APFloat v(extended); + v.subtract(u, rmNearestTiesToEven); + fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); + assert(fs == opOK && !losesInfo); + (void)fs; + words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); + } else { + words[1] = 0; + } + + return APInt(128, words); +} + +APInt +APFloat::convertQuadrupleAPFloatToAPInt() const +{ + assert(semantics == (const llvm::fltSemantics*)&IEEEquad); + assert(partCount()==2); + + uint64_t myexponent, mysignificand, mysignificand2; - if (category==fcNormal) { - myexponent = exponent + 1023; //bias - myexponent2 = exponent2 + 1023; + if (isFiniteNonZero()) { + myexponent = exponent+16383; //bias mysignificand = significandParts()[0]; mysignificand2 = significandParts()[1]; - if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) + if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) myexponent = 0; // denormal - if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL)) - myexponent2 = 0; // denormal } else if (category==fcZero) { myexponent = 0; - mysignificand = 0; - myexponent2 = 0; - mysignificand2 = 0; + mysignificand = mysignificand2 = 0; } else if (category==fcInfinity) { - myexponent = 0x7ff; - myexponent2 = 0; - mysignificand = 0; - mysignificand2 = 0; + myexponent = 0x7fff; + mysignificand = mysignificand2 = 0; } else { - assert(category == fcNaN && "Unknown category"); - myexponent = 0x7ff; + assert(category == fcNaN && "Unknown category!"); + myexponent = 0x7fff; mysignificand = significandParts()[0]; - myexponent2 = exponent2; mysignificand2 = significandParts()[1]; } uint64_t words[2]; - words[0] = ((uint64_t)(sign & 1) << 63) | - ((myexponent & 0x7ff) << 52) | - (mysignificand & 0xfffffffffffffLL); - words[1] = ((uint64_t)(sign2 & 1) << 63) | - ((myexponent2 & 0x7ff) << 52) | - (mysignificand2 & 0xfffffffffffffLL); - return APInt(128, 2, words); + words[0] = mysignificand; + words[1] = ((uint64_t)(sign & 1) << 63) | + ((myexponent & 0x7fff) << 48) | + (mysignificand2 & 0xffffffffffffLL); + + return APInt(128, words); } APInt APFloat::convertDoubleAPFloatToAPInt() const { assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); - assert (partCount()==1); + assert(partCount()==1); uint64_t myexponent, mysignificand; - if (category==fcNormal) { + if (isFiniteNonZero()) { myexponent = exponent+1023; //bias mysignificand = *significandParts(); if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) @@ -2616,11 +2977,11 @@ APInt APFloat::convertFloatAPFloatToAPInt() const { assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); - assert (partCount()==1); + assert(partCount()==1); uint32_t myexponent, mysignificand; - if (category==fcNormal) { + if (isFiniteNonZero()) { myexponent = exponent+127; //bias mysignificand = (uint32_t)*significandParts(); if (myexponent == 1 && !(mysignificand & 0x800000)) @@ -2641,6 +3002,35 @@ APFloat::convertFloatAPFloatToAPInt() const (mysignificand & 0x7fffff))); } +APInt +APFloat::convertHalfAPFloatToAPInt() const +{ + assert(semantics == (const llvm::fltSemantics*)&IEEEhalf); + assert(partCount()==1); + + uint32_t myexponent, mysignificand; + + if (isFiniteNonZero()) { + myexponent = exponent+15; //bias + mysignificand = (uint32_t)*significandParts(); + if (myexponent == 1 && !(mysignificand & 0x400)) + myexponent = 0; // denormal + } else if (category==fcZero) { + myexponent = 0; + mysignificand = 0; + } else if (category==fcInfinity) { + myexponent = 0x1f; + mysignificand = 0; + } else { + assert(category == fcNaN && "Unknown category!"); + myexponent = 0x1f; + mysignificand = (uint32_t)*significandParts(); + } + + return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | + (mysignificand & 0x3ff))); +} + // This function creates an APInt that is just a bit map of the floating // point constant as it would appear in memory. It is not a conversion, // and treating the result as a normal integer is unlikely to be useful. @@ -2648,12 +3038,18 @@ APFloat::convertFloatAPFloatToAPInt() const APInt APFloat::bitcastToAPInt() const { + if (semantics == (const llvm::fltSemantics*)&IEEEhalf) + return convertHalfAPFloatToAPInt(); + if (semantics == (const llvm::fltSemantics*)&IEEEsingle) return convertFloatAPFloatToAPInt(); - + if (semantics == (const llvm::fltSemantics*)&IEEEdouble) return convertDoubleAPFloatToAPInt(); + if (semantics == (const llvm::fltSemantics*)&IEEEquad) + return convertQuadrupleAPFloatToAPInt(); + if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble) return convertPPCDoubleDoubleAPFloatToAPInt(); @@ -2665,7 +3061,8 @@ APFloat::bitcastToAPInt() const float APFloat::convertToFloat() const { - assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); + assert(semantics == (const llvm::fltSemantics*)&IEEEsingle && + "Float semantics are not IEEEsingle"); APInt api = bitcastToAPInt(); return api.bitsToFloat(); } @@ -2673,7 +3070,8 @@ APFloat::convertToFloat() const double APFloat::convertToDouble() const { - assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); + assert(semantics == (const llvm::fltSemantics*)&IEEEdouble && + "Float semantics are not IEEEdouble"); APInt api = bitcastToAPInt(); return api.bitsToDouble(); } @@ -2691,14 +3089,13 @@ APFloat::initFromF80LongDoubleAPInt(const APInt &api) assert(api.getBitWidth()==80); uint64_t i1 = api.getRawData()[0]; uint64_t i2 = api.getRawData()[1]; - uint64_t myexponent = (i1 >> 48) & 0x7fff; - uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) | - (i2 & 0xffff); + uint64_t myexponent = (i2 & 0x7fff); + uint64_t mysignificand = i1; initialize(&APFloat::x87DoubleExtended); assert(partCount()==2); - sign = static_cast(i1>>63); + sign = static_cast(i2>>15); if (myexponent==0 && mysignificand==0) { // exponent, significand meaningless category = fcZero; @@ -2726,47 +3123,63 @@ APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) assert(api.getBitWidth()==128); uint64_t i1 = api.getRawData()[0]; uint64_t i2 = api.getRawData()[1]; - uint64_t myexponent = (i1 >> 52) & 0x7ff; - uint64_t mysignificand = i1 & 0xfffffffffffffLL; - uint64_t myexponent2 = (i2 >> 52) & 0x7ff; - uint64_t mysignificand2 = i2 & 0xfffffffffffffLL; + opStatus fs; + bool losesInfo; + + // Get the first double and convert to our format. + initFromDoubleAPInt(APInt(64, i1)); + fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); + assert(fs == opOK && !losesInfo); + (void)fs; + + // Unless we have a special case, add in second double. + if (isFiniteNonZero()) { + APFloat v(IEEEdouble, APInt(64, i2)); + fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); + assert(fs == opOK && !losesInfo); + (void)fs; + + add(v, rmNearestTiesToEven); + } +} - initialize(&APFloat::PPCDoubleDouble); +void +APFloat::initFromQuadrupleAPInt(const APInt &api) +{ + assert(api.getBitWidth()==128); + uint64_t i1 = api.getRawData()[0]; + uint64_t i2 = api.getRawData()[1]; + uint64_t myexponent = (i2 >> 48) & 0x7fff; + uint64_t mysignificand = i1; + uint64_t mysignificand2 = i2 & 0xffffffffffffLL; + + initialize(&APFloat::IEEEquad); assert(partCount()==2); - sign = static_cast(i1>>63); - sign2 = static_cast(i2>>63); - if (myexponent==0 && mysignificand==0) { + sign = static_cast(i2>>63); + if (myexponent==0 && + (mysignificand==0 && mysignificand2==0)) { // exponent, significand meaningless - // exponent2 and significand2 are required to be 0; we don't check category = fcZero; - } else if (myexponent==0x7ff && mysignificand==0) { + } else if (myexponent==0x7fff && + (mysignificand==0 && mysignificand2==0)) { // exponent, significand meaningless - // exponent2 and significand2 are required to be 0; we don't check category = fcInfinity; - } else if (myexponent==0x7ff && mysignificand!=0) { - // exponent meaningless. So is the whole second word, but keep it - // for determinism. + } else if (myexponent==0x7fff && + (mysignificand!=0 || mysignificand2 !=0)) { + // exponent meaningless category = fcNaN; - exponent2 = myexponent2; significandParts()[0] = mysignificand; significandParts()[1] = mysignificand2; } else { category = fcNormal; - // Note there is no category2; the second word is treated as if it is - // fcNormal, although it might be something else considered by itself. - exponent = myexponent - 1023; - exponent2 = myexponent2 - 1023; + exponent = myexponent - 16383; significandParts()[0] = mysignificand; significandParts()[1] = mysignificand2; if (myexponent==0) // denormal - exponent = -1022; - else - significandParts()[0] |= 0x10000000000000LL; // integer bit - if (myexponent2==0) - exponent2 = -1022; + exponent = -16382; else - significandParts()[1] |= 0x10000000000000LL; // integer bit + significandParts()[1] |= 0x1000000000000LL; // integer bit } } @@ -2836,38 +3249,623 @@ APFloat::initFromFloatAPInt(const APInt & api) } } +void +APFloat::initFromHalfAPInt(const APInt & api) +{ + assert(api.getBitWidth()==16); + uint32_t i = (uint32_t)*api.getRawData(); + uint32_t myexponent = (i >> 10) & 0x1f; + uint32_t mysignificand = i & 0x3ff; + + initialize(&APFloat::IEEEhalf); + assert(partCount()==1); + + sign = i >> 15; + if (myexponent==0 && mysignificand==0) { + // exponent, significand meaningless + category = fcZero; + } else if (myexponent==0x1f && mysignificand==0) { + // exponent, significand meaningless + category = fcInfinity; + } else if (myexponent==0x1f && mysignificand!=0) { + // sign, exponent, significand meaningless + category = fcNaN; + *significandParts() = mysignificand; + } else { + category = fcNormal; + exponent = myexponent - 15; //bias + *significandParts() = mysignificand; + if (myexponent==0) // denormal + exponent = -14; + else + *significandParts() |= 0x400; // integer bit + } +} + /// Treat api as containing the bits of a floating point number. Currently /// we infer the floating point type from the size of the APInt. The /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful /// when the size is anything else). void -APFloat::initFromAPInt(const APInt& api, bool isIEEE) +APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api) { - if (api.getBitWidth() == 32) + if (Sem == &IEEEhalf) + return initFromHalfAPInt(api); + if (Sem == &IEEEsingle) return initFromFloatAPInt(api); - else if (api.getBitWidth()==64) + if (Sem == &IEEEdouble) return initFromDoubleAPInt(api); - else if (api.getBitWidth()==80) + if (Sem == &x87DoubleExtended) return initFromF80LongDoubleAPInt(api); - else if (api.getBitWidth()==128 && !isIEEE) + if (Sem == &IEEEquad) + return initFromQuadrupleAPInt(api); + if (Sem == &PPCDoubleDouble) return initFromPPCDoubleDoubleAPInt(api); - else - assert(0); + + llvm_unreachable(0); +} + +APFloat +APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) +{ + switch (BitWidth) { + case 16: + return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth)); + case 32: + return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth)); + case 64: + return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth)); + case 80: + return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth)); + case 128: + if (isIEEE) + return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth)); + return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth)); + default: + llvm_unreachable("Unknown floating bit width"); + } } -APFloat::APFloat(const APInt& api, bool isIEEE) -{ - initFromAPInt(api, isIEEE); +/// Make this number the largest magnitude normal number in the given +/// semantics. +void APFloat::makeLargest(bool Negative) { + // We want (in interchange format): + // sign = {Negative} + // exponent = 1..10 + // significand = 1..1 + category = fcNormal; + sign = Negative; + exponent = semantics->maxExponent; + + // Use memset to set all but the highest integerPart to all ones. + integerPart *significand = significandParts(); + unsigned PartCount = partCount(); + memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); + + // Set the high integerPart especially setting all unused top bits for + // internal consistency. + const unsigned NumUnusedHighBits = + PartCount*integerPartWidth - semantics->precision; + significand[PartCount - 1] = ~integerPart(0) >> NumUnusedHighBits; +} + +/// Make this number the smallest magnitude denormal number in the given +/// semantics. +void APFloat::makeSmallest(bool Negative) { + // We want (in interchange format): + // sign = {Negative} + // exponent = 0..0 + // significand = 0..01 + category = fcNormal; + sign = Negative; + exponent = semantics->minExponent; + APInt::tcSet(significandParts(), 1, partCount()); } -APFloat::APFloat(float f) -{ - APInt api = APInt(32, 0); - initFromAPInt(api.floatToBits(f)); + +APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) { + // We want (in interchange format): + // sign = {Negative} + // exponent = 1..10 + // significand = 1..1 + APFloat Val(Sem, uninitialized); + Val.makeLargest(Negative); + return Val; } -APFloat::APFloat(double d) -{ - APInt api = APInt(64, 0); - initFromAPInt(api.doubleToBits(d)); +APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) { + // We want (in interchange format): + // sign = {Negative} + // exponent = 0..0 + // significand = 0..01 + APFloat Val(Sem, uninitialized); + Val.makeSmallest(Negative); + return Val; +} + +APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) { + APFloat Val(Sem, uninitialized); + + // We want (in interchange format): + // sign = {Negative} + // exponent = 0..0 + // significand = 10..0 + + Val.zeroSignificand(); + Val.sign = Negative; + Val.exponent = Sem.minExponent; + Val.significandParts()[partCountForBits(Sem.precision)-1] |= + (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth)); + + return Val; +} + +APFloat::APFloat(const fltSemantics &Sem, const APInt &API) { + initFromAPInt(&Sem, API); +} + +APFloat::APFloat(float f) { + initFromAPInt(&IEEEsingle, APInt::floatToBits(f)); +} + +APFloat::APFloat(double d) { + initFromAPInt(&IEEEdouble, APInt::doubleToBits(d)); +} + +namespace { + void append(SmallVectorImpl &Buffer, StringRef Str) { + Buffer.append(Str.begin(), Str.end()); + } + + /// Removes data from the given significand until it is no more + /// precise than is required for the desired precision. + void AdjustToPrecision(APInt &significand, + int &exp, unsigned FormatPrecision) { + unsigned bits = significand.getActiveBits(); + + // 196/59 is a very slight overestimate of lg_2(10). + unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59; + + if (bits <= bitsRequired) return; + + unsigned tensRemovable = (bits - bitsRequired) * 59 / 196; + if (!tensRemovable) return; + + exp += tensRemovable; + + APInt divisor(significand.getBitWidth(), 1); + APInt powten(significand.getBitWidth(), 10); + while (true) { + if (tensRemovable & 1) + divisor *= powten; + tensRemovable >>= 1; + if (!tensRemovable) break; + powten *= powten; + } + + significand = significand.udiv(divisor); + + // Truncate the significand down to its active bit count. + significand = significand.trunc(significand.getActiveBits()); + } + + + void AdjustToPrecision(SmallVectorImpl &buffer, + int &exp, unsigned FormatPrecision) { + unsigned N = buffer.size(); + if (N <= FormatPrecision) return; + + // The most significant figures are the last ones in the buffer. + unsigned FirstSignificant = N - FormatPrecision; + + // Round. + // FIXME: this probably shouldn't use 'round half up'. + + // Rounding down is just a truncation, except we also want to drop + // trailing zeros from the new result. + if (buffer[FirstSignificant - 1] < '5') { + while (FirstSignificant < N && buffer[FirstSignificant] == '0') + FirstSignificant++; + + exp += FirstSignificant; + buffer.erase(&buffer[0], &buffer[FirstSignificant]); + return; + } + + // Rounding up requires a decimal add-with-carry. If we continue + // the carry, the newly-introduced zeros will just be truncated. + for (unsigned I = FirstSignificant; I != N; ++I) { + if (buffer[I] == '9') { + FirstSignificant++; + } else { + buffer[I]++; + break; + } + } + + // If we carried through, we have exactly one digit of precision. + if (FirstSignificant == N) { + exp += FirstSignificant; + buffer.clear(); + buffer.push_back('1'); + return; + } + + exp += FirstSignificant; + buffer.erase(&buffer[0], &buffer[FirstSignificant]); + } +} + +void APFloat::toString(SmallVectorImpl &Str, + unsigned FormatPrecision, + unsigned FormatMaxPadding) const { + switch (category) { + case fcInfinity: + if (isNegative()) + return append(Str, "-Inf"); + else + return append(Str, "+Inf"); + + case fcNaN: return append(Str, "NaN"); + + case fcZero: + if (isNegative()) + Str.push_back('-'); + + if (!FormatMaxPadding) + append(Str, "0.0E+0"); + else + Str.push_back('0'); + return; + + case fcNormal: + break; + } + + if (isNegative()) + Str.push_back('-'); + + // Decompose the number into an APInt and an exponent. + int exp = exponent - ((int) semantics->precision - 1); + APInt significand(semantics->precision, + makeArrayRef(significandParts(), + partCountForBits(semantics->precision))); + + // Set FormatPrecision if zero. We want to do this before we + // truncate trailing zeros, as those are part of the precision. + if (!FormatPrecision) { + // It's an interesting question whether to use the nominal + // precision or the active precision here for denormals. + + // FormatPrecision = ceil(significandBits / lg_2(10)) + FormatPrecision = (semantics->precision * 59 + 195) / 196; + } + + // Ignore trailing binary zeros. + int trailingZeros = significand.countTrailingZeros(); + exp += trailingZeros; + significand = significand.lshr(trailingZeros); + + // Change the exponent from 2^e to 10^e. + if (exp == 0) { + // Nothing to do. + } else if (exp > 0) { + // Just shift left. + significand = significand.zext(semantics->precision + exp); + significand <<= exp; + exp = 0; + } else { /* exp < 0 */ + int texp = -exp; + + // We transform this using the identity: + // (N)(2^-e) == (N)(5^e)(10^-e) + // This means we have to multiply N (the significand) by 5^e. + // To avoid overflow, we have to operate on numbers large + // enough to store N * 5^e: + // log2(N * 5^e) == log2(N) + e * log2(5) + // <= semantics->precision + e * 137 / 59 + // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59) + + unsigned precision = semantics->precision + (137 * texp + 136) / 59; + + // Multiply significand by 5^e. + // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) + significand = significand.zext(precision); + APInt five_to_the_i(precision, 5); + while (true) { + if (texp & 1) significand *= five_to_the_i; + + texp >>= 1; + if (!texp) break; + five_to_the_i *= five_to_the_i; + } + } + + AdjustToPrecision(significand, exp, FormatPrecision); + + SmallVector buffer; + + // Fill the buffer. + unsigned precision = significand.getBitWidth(); + APInt ten(precision, 10); + APInt digit(precision, 0); + + bool inTrail = true; + while (significand != 0) { + // digit <- significand % 10 + // significand <- significand / 10 + APInt::udivrem(significand, ten, significand, digit); + + unsigned d = digit.getZExtValue(); + + // Drop trailing zeros. + if (inTrail && !d) exp++; + else { + buffer.push_back((char) ('0' + d)); + inTrail = false; + } + } + + assert(!buffer.empty() && "no characters in buffer!"); + + // Drop down to FormatPrecision. + // TODO: don't do more precise calculations above than are required. + AdjustToPrecision(buffer, exp, FormatPrecision); + + unsigned NDigits = buffer.size(); + + // Check whether we should use scientific notation. + bool FormatScientific; + if (!FormatMaxPadding) + FormatScientific = true; + else { + if (exp >= 0) { + // 765e3 --> 765000 + // ^^^ + // But we shouldn't make the number look more precise than it is. + FormatScientific = ((unsigned) exp > FormatMaxPadding || + NDigits + (unsigned) exp > FormatPrecision); + } else { + // Power of the most significant digit. + int MSD = exp + (int) (NDigits - 1); + if (MSD >= 0) { + // 765e-2 == 7.65 + FormatScientific = false; + } else { + // 765e-5 == 0.00765 + // ^ ^^ + FormatScientific = ((unsigned) -MSD) > FormatMaxPadding; + } + } + } + + // Scientific formatting is pretty straightforward. + if (FormatScientific) { + exp += (NDigits - 1); + + Str.push_back(buffer[NDigits-1]); + Str.push_back('.'); + if (NDigits == 1) + Str.push_back('0'); + else + for (unsigned I = 1; I != NDigits; ++I) + Str.push_back(buffer[NDigits-1-I]); + Str.push_back('E'); + + Str.push_back(exp >= 0 ? '+' : '-'); + if (exp < 0) exp = -exp; + SmallVector expbuf; + do { + expbuf.push_back((char) ('0' + (exp % 10))); + exp /= 10; + } while (exp); + for (unsigned I = 0, E = expbuf.size(); I != E; ++I) + Str.push_back(expbuf[E-1-I]); + return; + } + + // Non-scientific, positive exponents. + if (exp >= 0) { + for (unsigned I = 0; I != NDigits; ++I) + Str.push_back(buffer[NDigits-1-I]); + for (unsigned I = 0; I != (unsigned) exp; ++I) + Str.push_back('0'); + return; + } + + // Non-scientific, negative exponents. + + // The number of digits to the left of the decimal point. + int NWholeDigits = exp + (int) NDigits; + + unsigned I = 0; + if (NWholeDigits > 0) { + for (; I != (unsigned) NWholeDigits; ++I) + Str.push_back(buffer[NDigits-I-1]); + Str.push_back('.'); + } else { + unsigned NZeros = 1 + (unsigned) -NWholeDigits; + + Str.push_back('0'); + Str.push_back('.'); + for (unsigned Z = 1; Z != NZeros; ++Z) + Str.push_back('0'); + } + + for (; I != NDigits; ++I) + Str.push_back(buffer[NDigits-I-1]); +} + +bool APFloat::getExactInverse(APFloat *inv) const { + // Special floats and denormals have no exact inverse. + if (!isFiniteNonZero()) + return false; + + // Check that the number is a power of two by making sure that only the + // integer bit is set in the significand. + if (significandLSB() != semantics->precision - 1) + return false; + + // Get the inverse. + APFloat reciprocal(*semantics, 1ULL); + if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) + return false; + + // Avoid multiplication with a denormal, it is not safe on all platforms and + // may be slower than a normal division. + if (reciprocal.isDenormal()) + return false; + + assert(reciprocal.isFiniteNonZero() && + reciprocal.significandLSB() == reciprocal.semantics->precision - 1); + + if (inv) + *inv = reciprocal; + + return true; +} + +bool APFloat::isSignaling() const { + if (!isNaN()) + return false; + + // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the + // first bit of the trailing significand being 0. + return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); +} + +/// IEEE-754R 2008 5.3.1: nextUp/nextDown. +/// +/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with +/// appropriate sign switching before/after the computation. +APFloat::opStatus APFloat::next(bool nextDown) { + // If we are performing nextDown, swap sign so we have -x. + if (nextDown) + changeSign(); + + // Compute nextUp(x) + opStatus result = opOK; + + // Handle each float category separately. + switch (category) { + case fcInfinity: + // nextUp(+inf) = +inf + if (!isNegative()) + break; + // nextUp(-inf) = -getLargest() + makeLargest(true); + break; + case fcNaN: + // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. + // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not + // change the payload. + if (isSignaling()) { + result = opInvalidOp; + // For consistency, propogate the sign of the sNaN to the qNaN. + makeNaN(false, isNegative(), 0); + } + break; + case fcZero: + // nextUp(pm 0) = +getSmallest() + makeSmallest(false); + break; + case fcNormal: + // nextUp(-getSmallest()) = -0 + if (isSmallest() && isNegative()) { + APInt::tcSet(significandParts(), 0, partCount()); + category = fcZero; + exponent = 0; + break; + } + + // nextUp(getLargest()) == INFINITY + if (isLargest() && !isNegative()) { + APInt::tcSet(significandParts(), 0, partCount()); + category = fcInfinity; + exponent = semantics->maxExponent + 1; + break; + } + + // nextUp(normal) == normal + inc. + if (isNegative()) { + // If we are negative, we need to decrement the significand. + + // We only cross a binade boundary that requires adjusting the exponent + // if: + // 1. exponent != semantics->minExponent. This implies we are not in the + // smallest binade or are dealing with denormals. + // 2. Our significand excluding the integral bit is all zeros. + bool WillCrossBinadeBoundary = + exponent != semantics->minExponent && isSignificandAllZeros(); + + // Decrement the significand. + // + // We always do this since: + // 1. If we are dealing with a non binade decrement, by definition we + // just decrement the significand. + // 2. If we are dealing with a normal -> normal binade decrement, since + // we have an explicit integral bit the fact that all bits but the + // integral bit are zero implies that subtracting one will yield a + // significand with 0 integral bit and 1 in all other spots. Thus we + // must just adjust the exponent and set the integral bit to 1. + // 3. If we are dealing with a normal -> denormal binade decrement, + // since we set the integral bit to 0 when we represent denormals, we + // just decrement the significand. + integerPart *Parts = significandParts(); + APInt::tcDecrement(Parts, partCount()); + + if (WillCrossBinadeBoundary) { + // Our result is a normal number. Do the following: + // 1. Set the integral bit to 1. + // 2. Decrement the exponent. + APInt::tcSetBit(Parts, semantics->precision - 1); + exponent--; + } + } else { + // If we are positive, we need to increment the significand. + + // We only cross a binade boundary that requires adjusting the exponent if + // the input is not a denormal and all of said input's significand bits + // are set. If all of said conditions are true: clear the significand, set + // the integral bit to 1, and increment the exponent. If we have a + // denormal always increment since moving denormals and the numbers in the + // smallest normal binade have the same exponent in our representation. + bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); + + if (WillCrossBinadeBoundary) { + integerPart *Parts = significandParts(); + APInt::tcSet(Parts, 0, partCount()); + APInt::tcSetBit(Parts, semantics->precision - 1); + assert(exponent != semantics->maxExponent && + "We can not increment an exponent beyond the maxExponent allowed" + " by the given floating point semantics."); + exponent++; + } else { + incrementSignificand(); + } + } + break; + } + + // If we are performing nextDown, swap sign so we have -nextUp(-x) + if (nextDown) + changeSign(); + + return result; +} + +void +APFloat::makeInf(bool Negative) { + category = fcInfinity; + sign = Negative; + exponent = semantics->maxExponent + 1; + APInt::tcSet(significandParts(), 0, partCount()); +} + +void +APFloat::makeZero(bool Negative) { + category = fcZero; + sign = Negative; + exponent = semantics->minExponent-1; + APInt::tcSet(significandParts(), 0, partCount()); }