X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FSupport%2FAPFloat.cpp;h=cc86e795e7927e6616b05b0441bd3a66f062a099;hb=e6be34a53ecbe8c2ff9f0793b13d847e94c0de91;hp=b61e6220a11201476e7d5c774a5001116c494b96;hpb=eaf089430e7681fcddc3465c3b33b9645273ab02;p=oota-llvm.git diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index b61e6220a11..cc86e795e79 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -2,8 +2,8 @@ // // The LLVM Compiler Infrastructure // -// This file was developed by Neil Booth and is distributed under the -// University of Illinois Open Source License. See LICENSE.TXT for details. +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // @@ -12,15 +12,18 @@ // //===----------------------------------------------------------------------===// -#include #include "llvm/ADT/APFloat.h" +#include "llvm/ADT/FoldingSet.h" +#include +#include #include "llvm/Support/MathExtras.h" using namespace llvm; #define convolve(lhs, rhs) ((lhs) * 4 + (rhs)) -/* Assumed in hexadecimal significand parsing. */ +/* Assumed in hexadecimal significand parsing, and conversion to + hexadecimal strings. */ COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0); namespace llvm { @@ -37,17 +40,39 @@ namespace llvm { /* Number of bits in the significand. This includes the integer bit. */ - unsigned char precision; + unsigned int precision; - /* If the target format has an implicit integer bit. */ - bool implicitIntegerBit; + /* True if arithmetic is supported. */ + unsigned int arithmeticOK; }; const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true }; const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true }; const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true }; - const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, false }; - const fltSemantics APFloat::Bogus = { 0, 0, 0, false }; + const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true }; + const fltSemantics APFloat::Bogus = { 0, 0, 0, true }; + + // The PowerPC format consists of two doubles. It does not map cleanly + // onto the usual format above. For now only storage of constants of + // this type is supported, no arithmetic. + const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false }; + + /* A tight upper bound on number of parts required to hold the value + pow(5, power) is + + power * 815 / (351 * integerPartWidth) + 1 + + However, whilst the result may require only this many parts, + because we are multiplying two values to get it, the + multiplication may require an extra part with the excess part + being zero (consider the trivial case of 1 * 1, tcFullMultiply + requires two parts to hold the single-part result). So we add an + extra one to guarantee enough space whilst multiplying. */ + const unsigned int maxExponent = 16383; + const unsigned int maxPrecision = 113; + const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; + const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) + / (351 * integerPartWidth)); } /* Put a bunch of private, handy routines in an anonymous namespace. */ @@ -59,20 +84,15 @@ namespace { return ((bits) + integerPartWidth - 1) / integerPartWidth; } - unsigned int - digitValue(unsigned int c) + /* Returns 0U-9U. Return values >= 10U are not digits. */ + inline unsigned int + decDigitValue(unsigned int c) { - unsigned int r; - - r = c - '0'; - if(r <= 9) - return r; - - return -1U; + return c - '0'; } unsigned int - hexDigitValue (unsigned int c) + hexDigitValue(unsigned int c) { unsigned int r; @@ -91,9 +111,56 @@ namespace { return -1U; } + inline void + assertArithmeticOK(const llvm::fltSemantics &semantics) { + assert(semantics.arithmeticOK + && "Compile-time arithmetic does not support these semantics"); + } + + /* Return the value of a decimal exponent of the form + [+-]ddddddd. + + If the exponent overflows, returns a large exponent with the + appropriate sign. */ + int + readExponent(const char *p) + { + bool isNegative; + unsigned int absExponent; + const unsigned int overlargeExponent = 24000; /* FIXME. */ + + isNegative = (*p == '-'); + if (*p == '-' || *p == '+') + p++; + + absExponent = decDigitValue(*p++); + assert (absExponent < 10U); + + for (;;) { + unsigned int value; + + value = decDigitValue(*p); + if (value >= 10U) + break; + + p++; + value += absExponent * 10; + if (absExponent >= overlargeExponent) { + absExponent = overlargeExponent; + break; + } + absExponent = value; + } + + if (isNegative) + return -(int) absExponent; + else + return (int) absExponent; + } + /* This is ugly and needs cleaning up, but I don't immediately see how whilst remaining safe. */ - static int + int totalExponent(const char *p, int exponentAdjustment) { integerPart unsignedExponent; @@ -111,14 +178,14 @@ namespace { for(;;) { unsigned int value; - value = digitValue(*p); - if(value == -1U) - break; + value = decDigitValue(*p); + if(value >= 10U) + break; p++; unsignedExponent = unsignedExponent * 10 + value; if(unsignedExponent > 65535) - overflow = true; + overflow = true; } if(exponentAdjustment > 65535 || exponentAdjustment < -65536) @@ -127,10 +194,10 @@ namespace { if(!overflow) { exponent = unsignedExponent; if(negative) - exponent = -exponent; + exponent = -exponent; exponent += exponentAdjustment; if(exponent > 65535 || exponent < -65536) - overflow = true; + overflow = true; } if(overflow) @@ -149,12 +216,78 @@ namespace { if(*p == '.') { *dot = p++; while(*p == '0') - p++; + p++; } return p; } + /* Given a normal decimal floating point number of the form + + dddd.dddd[eE][+-]ddd + + where the decimal point and exponent are optional, fill out the + structure D. Exponent is appropriate if the significand is + treated as an integer, and normalizedExponent if the significand + is taken to have the decimal point after a single leading + non-zero digit. + + If the value is zero, V->firstSigDigit points to a non-digit, and + the return exponent is zero. + */ + struct decimalInfo { + const char *firstSigDigit; + const char *lastSigDigit; + int exponent; + int normalizedExponent; + }; + + void + interpretDecimal(const char *p, decimalInfo *D) + { + const char *dot; + + p = skipLeadingZeroesAndAnyDot (p, &dot); + + D->firstSigDigit = p; + D->exponent = 0; + D->normalizedExponent = 0; + + for (;;) { + if (*p == '.') { + assert(dot == 0); + dot = p++; + } + if (decDigitValue(*p) >= 10U) + break; + p++; + } + + /* If number is all zerooes accept any exponent. */ + if (p != D->firstSigDigit) { + if (*p == 'e' || *p == 'E') + D->exponent = readExponent(p + 1); + + /* Implied decimal point? */ + if (!dot) + dot = p; + + /* Drop insignificant trailing zeroes. */ + do + do + p--; + while (*p == '0'); + while (*p == '.'); + + /* Adjust the exponents for any decimal point. */ + D->exponent += (dot - p) - (dot > p); + D->normalizedExponent = (D->exponent + (p - D->firstSigDigit) + - (dot > D->firstSigDigit && dot < p)); + } + + D->lastSigDigit = p; + } + /* Return the trailing fraction of a hexadecimal number. DIGITVALUE is the first hex digit of the fraction, P points to the next digit. */ @@ -184,11 +317,12 @@ namespace { return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; } - /* Return the fraction lost were a bignum truncated. */ + /* Return the fraction lost were a bignum truncated losing the least + significant BITS bits. */ lostFraction - lostFractionThroughTruncation(integerPart *parts, - unsigned int partCount, - unsigned int bits) + lostFractionThroughTruncation(const integerPart *parts, + unsigned int partCount, + unsigned int bits) { unsigned int lsb; @@ -218,6 +352,206 @@ namespace { return lost_fraction; } + + /* Combine the effect of two lost fractions. */ + lostFraction + combineLostFractions(lostFraction moreSignificant, + lostFraction lessSignificant) + { + if(lessSignificant != lfExactlyZero) { + if(moreSignificant == lfExactlyZero) + moreSignificant = lfLessThanHalf; + else if(moreSignificant == lfExactlyHalf) + moreSignificant = lfMoreThanHalf; + } + + return moreSignificant; + } + + /* The error from the true value, in half-ulps, on multiplying two + floating point numbers, which differ from the value they + approximate by at most HUE1 and HUE2 half-ulps, is strictly less + than the returned value. + + See "How to Read Floating Point Numbers Accurately" by William D + Clinger. */ + unsigned int + HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2) + { + assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8)); + + if (HUerr1 + HUerr2 == 0) + return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */ + else + return inexactMultiply + 2 * (HUerr1 + HUerr2); + } + + /* The number of ulps from the boundary (zero, or half if ISNEAREST) + when the least significant BITS are truncated. BITS cannot be + zero. */ + integerPart + ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) + { + unsigned int count, partBits; + integerPart part, boundary; + + assert (bits != 0); + + bits--; + count = bits / integerPartWidth; + partBits = bits % integerPartWidth + 1; + + part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits)); + + if (isNearest) + boundary = (integerPart) 1 << (partBits - 1); + else + boundary = 0; + + if (count == 0) { + if (part - boundary <= boundary - part) + return part - boundary; + else + return boundary - part; + } + + if (part == boundary) { + while (--count) + if (parts[count]) + return ~(integerPart) 0; /* A lot. */ + + return parts[0]; + } else if (part == boundary - 1) { + while (--count) + if (~parts[count]) + return ~(integerPart) 0; /* A lot. */ + + return -parts[0]; + } + + return ~(integerPart) 0; /* A lot. */ + } + + /* Place pow(5, power) in DST, and return the number of parts used. + DST must be at least one part larger than size of the answer. */ + unsigned int + powerOf5(integerPart *dst, unsigned int power) + { + static integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, + 15625, 78125 }; + static integerPart pow5s[maxPowerOfFiveParts * 2 + 5] = { 78125 * 5 }; + static unsigned int partsCount[16] = { 1 }; + + integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5; + unsigned int result; + + assert(power <= maxExponent); + + p1 = dst; + p2 = scratch; + + *p1 = firstEightPowers[power & 7]; + power >>= 3; + + result = 1; + pow5 = pow5s; + + for (unsigned int n = 0; power; power >>= 1, n++) { + unsigned int pc; + + pc = partsCount[n]; + + /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */ + if (pc == 0) { + pc = partsCount[n - 1]; + APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc); + pc *= 2; + if (pow5[pc - 1] == 0) + pc--; + partsCount[n] = pc; + } + + if (power & 1) { + integerPart *tmp; + + APInt::tcFullMultiply(p2, p1, pow5, result, pc); + result += pc; + if (p2[result - 1] == 0) + result--; + + /* Now result is in p1 with partsCount parts and p2 is scratch + space. */ + tmp = p1, p1 = p2, p2 = tmp; + } + + pow5 += pc; + } + + if (p1 != dst) + APInt::tcAssign(dst, p1, result); + + return result; + } + + /* Zero at the end to avoid modular arithmetic when adding one; used + when rounding up during hexadecimal output. */ + static const char hexDigitsLower[] = "0123456789abcdef0"; + static const char hexDigitsUpper[] = "0123456789ABCDEF0"; + static const char infinityL[] = "infinity"; + static const char infinityU[] = "INFINITY"; + static const char NaNL[] = "nan"; + static const char NaNU[] = "NAN"; + + /* Write out an integerPart in hexadecimal, starting with the most + significant nibble. Write out exactly COUNT hexdigits, return + COUNT. */ + unsigned int + partAsHex (char *dst, integerPart part, unsigned int count, + const char *hexDigitChars) + { + unsigned int result = count; + + assert (count != 0 && count <= integerPartWidth / 4); + + part >>= (integerPartWidth - 4 * count); + while (count--) { + dst[count] = hexDigitChars[part & 0xf]; + part >>= 4; + } + + return result; + } + + /* Write out an unsigned decimal integer. */ + char * + writeUnsignedDecimal (char *dst, unsigned int n) + { + char buff[40], *p; + + p = buff; + do + *p++ = '0' + n % 10; + while (n /= 10); + + do + *dst++ = *--p; + while (p != buff); + + return dst; + } + + /* Write out a signed decimal integer. */ + char * + writeSignedDecimal (char *dst, int value) + { + if (value < 0) { + *dst++ = '-'; + dst = writeUnsignedDecimal(dst, -(unsigned) value); + } else + dst = writeUnsignedDecimal(dst, value); + + return dst; + } } /* Constructors. */ @@ -247,6 +581,8 @@ APFloat::assign(const APFloat &rhs) sign = rhs.sign; category = rhs.category; exponent = rhs.exponent; + sign2 = rhs.sign2; + exponent2 = rhs.exponent2; if(category == fcNormal || category == fcNaN) copySignificand(rhs); } @@ -258,7 +594,16 @@ APFloat::copySignificand(const APFloat &rhs) assert(rhs.partCount() >= partCount()); APInt::tcAssign(significandParts(), rhs.significandParts(), - partCount()); + partCount()); +} + +/* Make this number a NaN, with an arbitrary but deterministic value + for the significand. */ +void +APFloat::makeNaN(void) +{ + category = fcNaN; + APInt::tcSet(significandParts(), ~0U, partCount()); } APFloat & @@ -283,10 +628,16 @@ APFloat::bitwiseIsEqual(const APFloat &rhs) const { category != rhs.category || sign != rhs.sign) return false; + if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble && + sign2 != rhs.sign2) + return false; if (category==fcZero || category==fcInfinity) return true; else if (category==fcNormal && exponent!=rhs.exponent) return false; + else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble && + exponent2!=rhs.exponent2) + return false; else { int i= partCount(); const integerPart* p=significandParts(); @@ -301,6 +652,7 @@ APFloat::bitwiseIsEqual(const APFloat &rhs) const { APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) { + assertArithmeticOK(ourSemantics); initialize(&ourSemantics); sign = 0; zeroSignificand(); @@ -310,17 +662,21 @@ APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) } APFloat::APFloat(const fltSemantics &ourSemantics, - fltCategory ourCategory, bool negative) + fltCategory ourCategory, bool negative) { + assertArithmeticOK(ourSemantics); initialize(&ourSemantics); category = ourCategory; sign = negative; if(category == fcNormal) category = fcZero; + else if (ourCategory == fcNaN) + makeNaN(); } APFloat::APFloat(const fltSemantics &ourSemantics, const char *text) { + assertArithmeticOK(ourSemantics); initialize(&ourSemantics); convertFromString(text, rmNearestTiesToEven); } @@ -336,6 +692,11 @@ APFloat::~APFloat() freeSignificand(); } +// Profile - This method 'profiles' an APFloat for use with FoldingSet. +void APFloat::Profile(FoldingSetNodeID& ID) const { + ID.Add(convertToAPInt()); +} + unsigned int APFloat::partCount() const { @@ -365,21 +726,6 @@ APFloat::significandParts() return &significand.part; } -/* Combine the effect of two lost fractions. */ -lostFraction -APFloat::combineLostFractions(lostFraction moreSignificant, - lostFraction lessSignificant) -{ - if(lessSignificant != lfExactlyZero) { - if(moreSignificant == lfExactlyZero) - moreSignificant = lfLessThanHalf; - else if(moreSignificant == lfExactlyHalf) - moreSignificant = lfMoreThanHalf; - } - - return moreSignificant; -} - void APFloat::zeroSignificand() { @@ -426,7 +772,7 @@ APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow) assert(exponent == rhs.exponent); return APInt::tcSubtract(parts, rhs.significandParts(), borrow, - partCount()); + partCount()); } /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it @@ -435,7 +781,7 @@ APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow) lostFraction APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) { - unsigned int omsb; // One, not zero, based MSB. + unsigned int omsb; // One, not zero, based MSB. unsigned int partsCount, newPartsCount, precision; integerPart *lhsSignificand; integerPart scratch[4]; @@ -456,7 +802,7 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) partsCount = partCount(); APInt::tcFullMultiply(fullSignificand, lhsSignificand, - rhs.significandParts(), partsCount); + rhs.significandParts(), partsCount, partsCount); lost_fraction = lfExactlyZero; omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; @@ -473,9 +819,9 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) extendedPrecision = precision + precision - 1; if(omsb != extendedPrecision) { - APInt::tcShiftLeft(fullSignificand, newPartsCount, - extendedPrecision - omsb); - exponent -= extendedPrecision - omsb; + APInt::tcShiftLeft(fullSignificand, newPartsCount, + extendedPrecision - omsb); + exponent -= extendedPrecision - omsb; } /* Create new semantics. */ @@ -571,6 +917,9 @@ APFloat::divideSignificand(const APFloat &rhs) APInt::tcShiftLeft(dividend, partsCount, bit); } + /* Ensure the dividend >= divisor initially for the loop below. + Incidentally, this means that the division loop below is + guaranteed to set the integer bit to one. */ if(APInt::tcCompare(dividend, divisor, partsCount) < 0) { exponent--; APInt::tcShiftLeft(dividend, partsCount, 1); @@ -660,7 +1009,7 @@ APFloat::compareAbsoluteValue(const APFloat &rhs) const significands. */ if(compare == 0) compare = APInt::tcCompare(significandParts(), rhs.significandParts(), - partCount()); + partCount()); if(compare > 0) return cmpGreaterThan; @@ -689,21 +1038,25 @@ APFloat::handleOverflow(roundingMode rounding_mode) category = fcNormal; exponent = semantics->maxExponent; APInt::tcSetLeastSignificantBits(significandParts(), partCount(), - semantics->precision); + semantics->precision); return opInexact; } -/* This routine must work for fcZero of both signs, and fcNormal - numbers. */ +/* Returns TRUE if, when truncating the current number, with BIT the + new LSB, with the given lost fraction and rounding mode, the result + would need to be rounded away from zero (i.e., by increasing the + signficand). This routine must work for fcZero of both signs, and + fcNormal numbers. */ bool APFloat::roundAwayFromZero(roundingMode rounding_mode, - lostFraction lost_fraction) + lostFraction lost_fraction, + unsigned int bit) const { /* NaNs and infinities should not have lost fractions. */ assert(category == fcNormal || category == fcZero); - /* Our caller has already handled this case. */ + /* Current callers never pass this so we don't handle it. */ assert(lost_fraction != lfExactlyZero); switch(rounding_mode) { @@ -719,7 +1072,7 @@ APFloat::roundAwayFromZero(roundingMode rounding_mode, /* Our zeroes don't have a significand to test. */ if(lost_fraction == lfExactlyHalf && category != fcZero) - return significandParts()[0] & 1; + return APInt::tcExtractBit(significandParts(), bit); return false; @@ -736,9 +1089,9 @@ APFloat::roundAwayFromZero(roundingMode rounding_mode, APFloat::opStatus APFloat::normalize(roundingMode rounding_mode, - lostFraction lost_fraction) + lostFraction lost_fraction) { - unsigned int omsb; /* One, not zero, based MSB. */ + unsigned int omsb; /* One, not zero, based MSB. */ int exponentChange; if(category != fcNormal) @@ -782,9 +1135,9 @@ APFloat::normalize(roundingMode rounding_mode, /* Keep OMSB up-to-date. */ if(omsb > (unsigned) exponentChange) - omsb -= (unsigned) exponentChange; + omsb -= exponentChange; else - omsb = 0; + omsb = 0; } } @@ -802,7 +1155,7 @@ APFloat::normalize(roundingMode rounding_mode, } /* Increment the significand if we're rounding away from zero. */ - if(roundAwayFromZero(rounding_mode, lost_fraction)) { + if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) { if(omsb == 0) exponent = semantics->minExponent; @@ -812,12 +1165,12 @@ APFloat::normalize(roundingMode rounding_mode, /* Did the significand increment overflow? */ if(omsb == (unsigned) semantics->precision + 1) { /* Renormalize by incrementing the exponent and shifting our - significand right one. However if we already have the - maximum exponent we overflow to infinity. */ + significand right one. However if we already have the + maximum exponent we overflow to infinity. */ if(exponent == semantics->maxExponent) { - category = fcInfinity; + category = fcInfinity; - return (opStatus) (opOverflow | opInexact); + return (opStatus) (opOverflow | opInexact); } shiftSignificandRight(1); @@ -833,7 +1186,6 @@ APFloat::normalize(roundingMode rounding_mode, /* We have a non-zero denormal. */ assert(omsb < semantics->precision); - assert(exponent == semantics->minExponent); /* Canonicalize zeroes. */ if(omsb == 0) @@ -884,10 +1236,8 @@ APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) case convolve(fcInfinity, fcInfinity): /* Differently signed infinities can only be validly subtracted. */ - if(sign ^ rhs.sign != subtract) { - category = fcNaN; - // Arbitrary but deterministic value for significand - APInt::tcSet(significandParts(), ~0U, partCount()); + if((sign ^ rhs.sign) != subtract) { + makeNaN(); return opInvalidOp; } @@ -908,7 +1258,7 @@ APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) /* Determine if the operation on the absolute values is effectively an addition or subtraction. */ - subtract ^= (sign ^ rhs.sign); + subtract ^= (sign ^ rhs.sign) ? true : false; /* Are we bigger exponent-wise than the RHS? */ bits = exponent - rhs.exponent; @@ -933,12 +1283,12 @@ APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) if (reverse) { carry = temp_rhs.subtractSignificand - (*this, lost_fraction != lfExactlyZero); + (*this, lost_fraction != lfExactlyZero); copySignificand(temp_rhs); sign = !sign; } else { carry = subtractSignificand - (temp_rhs, lost_fraction != lfExactlyZero); + (temp_rhs, lost_fraction != lfExactlyZero); } /* Invert the lost fraction - it was on the RHS and @@ -1003,9 +1353,7 @@ APFloat::multiplySpecials(const APFloat &rhs) case convolve(fcZero, fcInfinity): case convolve(fcInfinity, fcZero): - category = fcNaN; - // Arbitrary but deterministic value for significand - APInt::tcSet(significandParts(), ~0U, partCount()); + makeNaN(); return opInvalidOp; case convolve(fcNormal, fcNormal): @@ -1047,9 +1395,7 @@ APFloat::divideSpecials(const APFloat &rhs) case convolve(fcInfinity, fcInfinity): case convolve(fcZero, fcZero): - category = fcNaN; - // Arbitrary but deterministic value for significand - APInt::tcSet(significandParts(), ~0U, partCount()); + makeNaN(); return opInvalidOp; case convolve(fcNormal, fcNormal): @@ -1065,13 +1411,29 @@ APFloat::changeSign() sign = !sign; } +void +APFloat::clearSign() +{ + /* So is this one. */ + sign = 0; +} + +void +APFloat::copySign(const APFloat &rhs) +{ + /* And this one. */ + sign = rhs.sign; +} + /* Normalized addition or subtraction. */ APFloat::opStatus APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, - bool subtract) + bool subtract) { opStatus fs; + assertArithmeticOK(*semantics); + fs = addOrSubtractSpecials(rhs, subtract); /* This return code means it was not a simple case. */ @@ -1116,6 +1478,7 @@ APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) { opStatus fs; + assertArithmeticOK(*semantics); sign ^= rhs.sign; fs = multiplySpecials(rhs); @@ -1135,6 +1498,7 @@ APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) { opStatus fs; + assertArithmeticOK(*semantics); sign ^= rhs.sign; fs = divideSpecials(rhs); @@ -1148,14 +1512,52 @@ APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) return fs; } +/* Normalized remainder. This is not currently doing TRT. */ +APFloat::opStatus +APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) +{ + opStatus fs; + APFloat V = *this; + unsigned int origSign = sign; + + assertArithmeticOK(*semantics); + fs = V.divide(rhs, rmNearestTiesToEven); + if (fs == opDivByZero) + return fs; + + int parts = partCount(); + integerPart *x = new integerPart[parts]; + fs = V.convertToInteger(x, parts * integerPartWidth, true, + rmNearestTiesToEven); + if (fs==opInvalidOp) + return fs; + + fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true, + rmNearestTiesToEven); + assert(fs==opOK); // should always work + + fs = V.multiply(rhs, rounding_mode); + assert(fs==opOK || fs==opInexact); // should not overflow or underflow + + fs = subtract(V, rounding_mode); + assert(fs==opOK || fs==opInexact); // likewise + + if (isZero()) + sign = origSign; // IEEE754 requires this + delete[] x; + return fs; +} + /* Normalized fused-multiply-add. */ APFloat::opStatus APFloat::fusedMultiplyAdd(const APFloat &multiplicand, - const APFloat &addend, - roundingMode rounding_mode) + const APFloat &addend, + roundingMode rounding_mode) { opStatus fs; + assertArithmeticOK(*semantics); + /* Post-multiplication sign, before addition. */ sign ^= multiplicand.sign; @@ -1199,6 +1601,7 @@ APFloat::compare(const APFloat &rhs) const { cmpResult result; + assertArithmeticOK(*semantics); assert(semantics == rhs.semantics); switch(convolve(category, rhs.category)) { @@ -1257,9 +1660,9 @@ APFloat::compare(const APFloat &rhs) const if(sign) { if(result == cmpLessThan) - result = cmpGreaterThan; + result = cmpGreaterThan; else if(result == cmpGreaterThan) - result = cmpLessThan; + result = cmpLessThan; } } @@ -1268,30 +1671,65 @@ APFloat::compare(const APFloat &rhs) const APFloat::opStatus APFloat::convert(const fltSemantics &toSemantics, - roundingMode rounding_mode) + roundingMode rounding_mode) { - unsigned int newPartCount; + lostFraction lostFraction; + unsigned int newPartCount, oldPartCount; opStatus fs; + assertArithmeticOK(*semantics); + lostFraction = lfExactlyZero; newPartCount = partCountForBits(toSemantics.precision + 1); - - /* If our new form is wider, re-allocate our bit pattern into wider - storage. */ - if(newPartCount > partCount()) { + oldPartCount = partCount(); + + /* Handle storage complications. If our new form is wider, + re-allocate our bit pattern into wider storage. If it is + narrower, we ignore the excess parts, but if narrowing to a + single part we need to free the old storage. + Be careful not to reference significandParts for zeroes + and infinities, since it aborts. */ + if (newPartCount > oldPartCount) { integerPart *newParts; - newParts = new integerPart[newPartCount]; APInt::tcSet(newParts, 0, newPartCount); - APInt::tcAssign(newParts, significandParts(), partCount()); + if (category==fcNormal || category==fcNaN) + APInt::tcAssign(newParts, significandParts(), oldPartCount); freeSignificand(); significand.parts = newParts; + } else if (newPartCount < oldPartCount) { + /* Capture any lost fraction through truncation of parts so we get + correct rounding whilst normalizing. */ + if (category==fcNormal) + lostFraction = lostFractionThroughTruncation + (significandParts(), oldPartCount, toSemantics.precision); + if (newPartCount == 1) { + integerPart newPart = 0; + if (category==fcNormal || category==fcNaN) + newPart = significandParts()[0]; + freeSignificand(); + significand.part = newPart; + } } if(category == fcNormal) { /* Re-interpret our bit-pattern. */ exponent += toSemantics.precision - semantics->precision; semantics = &toSemantics; - fs = normalize(rounding_mode, lfExactlyZero); + fs = normalize(rounding_mode, lostFraction); + } else if (category == fcNaN) { + int shift = toSemantics.precision - semantics->precision; + // Do this now so significandParts gets the right answer + semantics = &toSemantics; + // No normalization here, just truncate + if (shift>0) + APInt::tcShiftLeft(significandParts(), newPartCount, shift); + else if (shift < 0) + APInt::tcShiftRight(significandParts(), newPartCount, -shift); + // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) + // does not give you back the same bits. This is dubious, and we + // don't currently do it. You're really supposed to get + // an invalid operation signal at runtime, but nobody does that. + fs = opOK; } else { semantics = &toSemantics; fs = opOK; @@ -1302,7 +1740,8 @@ APFloat::convert(const fltSemantics &toSemantics, /* Convert a floating point number to an integer according to the rounding mode. If the rounded integer value is out of range this - returns an invalid operation exception. If the rounded value is in + returns an invalid operation exception and the contents of the + destination parts are unspecified. If the rounded value is in range but the floating point number is not the exact integer, the C standard doesn't require an inexact exception to be raised. IEEE 854 does require it so we do that. @@ -1310,125 +1749,222 @@ APFloat::convert(const fltSemantics &toSemantics, Note that for conversions to integer type the C standard requires round-to-zero to always be used. */ APFloat::opStatus -APFloat::convertToInteger(integerPart *parts, unsigned int width, - bool isSigned, - roundingMode rounding_mode) const +APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width, + bool isSigned, + roundingMode rounding_mode) const { lostFraction lost_fraction; - unsigned int msb, partsCount; - int bits; + const integerPart *src; + unsigned int dstPartsCount, truncatedBits; + + assertArithmeticOK(*semantics); /* Handle the three special cases first. */ if(category == fcInfinity || category == fcNaN) return opInvalidOp; - partsCount = partCountForBits(width); + dstPartsCount = partCountForBits(width); if(category == fcZero) { - APInt::tcSet(parts, 0, partsCount); + APInt::tcSet(parts, 0, dstPartsCount); return opOK; } - /* Shift the bit pattern so the fraction is lost. */ - APFloat tmp(*this); + src = significandParts(); + + /* Step 1: place our absolute value, with any fraction truncated, in + the destination. */ + if (exponent < 0) { + /* Our absolute value is less than one; truncate everything. */ + APInt::tcSet(parts, 0, dstPartsCount); + truncatedBits = semantics->precision; + } else { + /* We want the most significant (exponent + 1) bits; the rest are + truncated. */ + unsigned int bits = exponent + 1U; - bits = (int) semantics->precision - 1 - exponent; + /* Hopelessly large in magnitude? */ + if (bits > width) + return opInvalidOp; - if(bits > 0) { - lost_fraction = tmp.shiftSignificandRight(bits); + if (bits < semantics->precision) { + /* We truncate (semantics->precision - bits) bits. */ + truncatedBits = semantics->precision - bits; + APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits); + } else { + /* We want at least as many bits as are available. */ + APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0); + APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision); + truncatedBits = 0; + } + } + + /* Step 2: work out any lost fraction, and increment the absolute + value if we would round away from zero. */ + if (truncatedBits) { + lost_fraction = lostFractionThroughTruncation(src, partCount(), + truncatedBits); + if (lost_fraction != lfExactlyZero + && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) { + if (APInt::tcIncrement(parts, dstPartsCount)) + return opInvalidOp; /* Overflow. */ + } } else { - tmp.shiftSignificandLeft(-bits); lost_fraction = lfExactlyZero; } - if(lost_fraction != lfExactlyZero - && tmp.roundAwayFromZero(rounding_mode, lost_fraction)) - tmp.incrementSignificand(); + /* Step 3: check if we fit in the destination. */ + unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1; - msb = tmp.significandMSB(); + if (sign) { + if (!isSigned) { + /* Negative numbers cannot be represented as unsigned. */ + if (omsb != 0) + return opInvalidOp; + } else { + /* It takes omsb bits to represent the unsigned integer value. + We lose a bit for the sign, but care is needed as the + maximally negative integer is a special case. */ + if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb) + return opInvalidOp; + + /* This case can happen because of rounding. */ + if (omsb > width) + return opInvalidOp; + } - /* Negative numbers cannot be represented as unsigned. */ - if(!isSigned && tmp.sign && msb != -1U) - return opInvalidOp; + APInt::tcNegate (parts, dstPartsCount); + } else { + if (omsb >= width + !isSigned) + return opInvalidOp; + } - /* It takes exponent + 1 bits to represent the truncated floating - point number without its sign. We lose a bit for the sign, but - the maximally negative integer is a special case. */ - if(msb + 1 > width) /* !! Not same as msb >= width !! */ - return opInvalidOp; + if (lost_fraction == lfExactlyZero) + return opOK; + else + return opInexact; +} - if(isSigned && msb + 1 == width - && (!tmp.sign || tmp.significandLSB() != msb)) - return opInvalidOp; +/* Same as convertToSignExtendedInteger, except we provide + deterministic values in case of an invalid operation exception, + namely zero for NaNs and the minimal or maximal value respectively + for underflow or overflow. */ +APFloat::opStatus +APFloat::convertToInteger(integerPart *parts, unsigned int width, + bool isSigned, + roundingMode rounding_mode) const +{ + opStatus fs; - APInt::tcAssign(parts, tmp.significandParts(), partsCount); + fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode); - if(tmp.sign) - APInt::tcNegate(parts, partsCount); + if (fs == opInvalidOp) { + unsigned int bits, dstPartsCount; - if(lost_fraction == lfExactlyZero) - return opOK; - else - return opInexact; + dstPartsCount = partCountForBits(width); + + if (category == fcNaN) + bits = 0; + else if (sign) + bits = isSigned; + else + bits = width - isSigned; + + APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits); + if (sign && isSigned) + APInt::tcShiftLeft(parts, dstPartsCount, width - 1); + } + + return fs; } +/* Convert an unsigned integer SRC to a floating point number, + rounding according to ROUNDING_MODE. The sign of the floating + point number is not modified. */ APFloat::opStatus -APFloat::convertFromUnsignedInteger(integerPart *parts, - unsigned int partCount, - roundingMode rounding_mode) +APFloat::convertFromUnsignedParts(const integerPart *src, + unsigned int srcCount, + roundingMode rounding_mode) { - unsigned int msb, precision; + unsigned int omsb, precision, dstCount; + integerPart *dst; lostFraction lost_fraction; - msb = APInt::tcMSB(parts, partCount) + 1; - precision = semantics->precision; - + assertArithmeticOK(*semantics); category = fcNormal; - exponent = precision - 1; + omsb = APInt::tcMSB(src, srcCount) + 1; + dst = significandParts(); + dstCount = partCount(); + precision = semantics->precision; - if(msb > precision) { - exponent += (msb - precision); - lost_fraction = shiftRight(parts, partCount, msb - precision); - msb = precision; - } else + /* We want the most significant PRECISON bits of SRC. There may not + be that many; extract what we can. */ + if (precision <= omsb) { + exponent = omsb - 1; + lost_fraction = lostFractionThroughTruncation(src, srcCount, + omsb - precision); + APInt::tcExtract(dst, dstCount, src, precision, omsb - precision); + } else { + exponent = precision - 1; lost_fraction = lfExactlyZero; - - /* Copy the bit image. */ - zeroSignificand(); - APInt::tcAssign(significandParts(), parts, partCountForBits(msb)); + APInt::tcExtract(dst, dstCount, src, omsb, 0); + } return normalize(rounding_mode, lost_fraction); } +/* Convert a two's complement integer SRC to a floating point number, + rounding according to ROUNDING_MODE. ISSIGNED is true if the + integer is signed, in which case it must be sign-extended. */ APFloat::opStatus -APFloat::convertFromInteger(const integerPart *parts, - unsigned int partCount, bool isSigned, - roundingMode rounding_mode) +APFloat::convertFromSignExtendedInteger(const integerPart *src, + unsigned int srcCount, + bool isSigned, + roundingMode rounding_mode) { - unsigned int width; opStatus status; - integerPart *copy; - copy = new integerPart[partCount]; - APInt::tcAssign(copy, parts, partCount); + assertArithmeticOK(*semantics); + if (isSigned + && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { + integerPart *copy; + + /* If we're signed and negative negate a copy. */ + sign = true; + copy = new integerPart[srcCount]; + APInt::tcAssign(copy, src, srcCount); + APInt::tcNegate(copy, srcCount); + status = convertFromUnsignedParts(copy, srcCount, rounding_mode); + delete [] copy; + } else { + sign = false; + status = convertFromUnsignedParts(src, srcCount, rounding_mode); + } + + return status; +} - width = partCount * integerPartWidth; +/* FIXME: should this just take a const APInt reference? */ +APFloat::opStatus +APFloat::convertFromZeroExtendedInteger(const integerPart *parts, + unsigned int width, bool isSigned, + roundingMode rounding_mode) +{ + unsigned int partCount = partCountForBits(width); + APInt api = APInt(width, partCount, parts); sign = false; if(isSigned && APInt::tcExtractBit(parts, width - 1)) { sign = true; - APInt::tcNegate(copy, partCount); + api = -api; } - status = convertFromUnsignedInteger(copy, partCount, rounding_mode); - delete [] copy; - - return status; + return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode); } APFloat::opStatus APFloat::convertFromHexadecimalString(const char *p, - roundingMode rounding_mode) + roundingMode rounding_mode) { lostFraction lost_fraction; integerPart *significand; @@ -1443,7 +1979,7 @@ APFloat::convertFromHexadecimalString(const char *p, partsCount = partCount(); bitPos = partsCount * integerPartWidth; - /* Skip leading zeroes and any(hexa)decimal point. */ + /* Skip leading zeroes and any (hexa)decimal point. */ p = skipLeadingZeroesAndAnyDot(p, &dot); firstSignificantDigit = p; @@ -1471,7 +2007,7 @@ APFloat::convertFromHexadecimalString(const char *p, } else { lost_fraction = trailingHexadecimalFraction(p, hex_value); while(hexDigitValue(*p) != -1U) - p++; + p++; break; } } @@ -1507,7 +2043,186 @@ APFloat::convertFromHexadecimalString(const char *p, } APFloat::opStatus -APFloat::convertFromString(const char *p, roundingMode rounding_mode) { +APFloat::roundSignificandWithExponent(const integerPart *decSigParts, + unsigned sigPartCount, int exp, + roundingMode rounding_mode) +{ + unsigned int parts, pow5PartCount; + fltSemantics calcSemantics = { 32767, -32767, 0, true }; + integerPart pow5Parts[maxPowerOfFiveParts]; + bool isNearest; + + isNearest = (rounding_mode == rmNearestTiesToEven + || rounding_mode == rmNearestTiesToAway); + + parts = partCountForBits(semantics->precision + 11); + + /* Calculate pow(5, abs(exp)). */ + pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp); + + for (;; parts *= 2) { + opStatus sigStatus, powStatus; + unsigned int excessPrecision, truncatedBits; + + calcSemantics.precision = parts * integerPartWidth - 1; + excessPrecision = calcSemantics.precision - semantics->precision; + truncatedBits = excessPrecision; + + APFloat decSig(calcSemantics, fcZero, sign); + APFloat pow5(calcSemantics, fcZero, false); + + sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, + rmNearestTiesToEven); + powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount, + rmNearestTiesToEven); + /* Add exp, as 10^n = 5^n * 2^n. */ + decSig.exponent += exp; + + lostFraction calcLostFraction; + integerPart HUerr, HUdistance, powHUerr; + + if (exp >= 0) { + /* multiplySignificand leaves the precision-th bit set to 1. */ + calcLostFraction = decSig.multiplySignificand(pow5, NULL); + powHUerr = powStatus != opOK; + } else { + calcLostFraction = decSig.divideSignificand(pow5); + /* Denormal numbers have less precision. */ + if (decSig.exponent < semantics->minExponent) { + excessPrecision += (semantics->minExponent - decSig.exponent); + truncatedBits = excessPrecision; + if (excessPrecision > calcSemantics.precision) + excessPrecision = calcSemantics.precision; + } + /* Extra half-ulp lost in reciprocal of exponent. */ + powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0: 2; + } + + /* Both multiplySignificand and divideSignificand return the + result with the integer bit set. */ + assert (APInt::tcExtractBit + (decSig.significandParts(), calcSemantics.precision - 1) == 1); + + HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK, + powHUerr); + HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(), + excessPrecision, isNearest); + + /* Are we guaranteed to round correctly if we truncate? */ + if (HUdistance >= HUerr) { + APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(), + calcSemantics.precision - excessPrecision, + excessPrecision); + /* Take the exponent of decSig. If we tcExtract-ed less bits + above we must adjust our exponent to compensate for the + implicit right shift. */ + exponent = (decSig.exponent + semantics->precision + - (calcSemantics.precision - excessPrecision)); + calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(), + decSig.partCount(), + truncatedBits); + return normalize(rounding_mode, calcLostFraction); + } + } +} + +APFloat::opStatus +APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode) +{ + decimalInfo D; + opStatus fs; + + /* Scan the text. */ + interpretDecimal(p, &D); + + /* Handle the quick cases. First the case of no significant digits, + i.e. zero, and then exponents that are obviously too large or too + small. Writing L for log 10 / log 2, a number d.ddddd*10^exp + definitely overflows if + + (exp - 1) * L >= maxExponent + + and definitely underflows to zero where + + (exp + 1) * L <= minExponent - precision + + With integer arithmetic the tightest bounds for L are + + 93/28 < L < 196/59 [ numerator <= 256 ] + 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] + */ + + if (decDigitValue(*D.firstSigDigit) >= 10U) { + category = fcZero; + fs = opOK; + } else if ((D.normalizedExponent + 1) * 28738 + <= 8651 * (semantics->minExponent - (int) semantics->precision)) { + /* Underflow to zero and round. */ + zeroSignificand(); + fs = normalize(rounding_mode, lfLessThanHalf); + } else if ((D.normalizedExponent - 1) * 42039 + >= 12655 * semantics->maxExponent) { + /* Overflow and round. */ + fs = handleOverflow(rounding_mode); + } else { + integerPart *decSignificand; + unsigned int partCount; + + /* A tight upper bound on number of bits required to hold an + N-digit decimal integer is N * 196 / 59. Allocate enough space + to hold the full significand, and an extra part required by + tcMultiplyPart. */ + partCount = (D.lastSigDigit - D.firstSigDigit) + 1; + partCount = partCountForBits(1 + 196 * partCount / 59); + decSignificand = new integerPart[partCount + 1]; + partCount = 0; + + /* Convert to binary efficiently - we do almost all multiplication + in an integerPart. When this would overflow do we do a single + bignum multiplication, and then revert again to multiplication + in an integerPart. */ + do { + integerPart decValue, val, multiplier; + + val = 0; + multiplier = 1; + + do { + if (*p == '.') + p++; + + decValue = decDigitValue(*p++); + multiplier *= 10; + val = val * 10 + decValue; + /* The maximum number that can be multiplied by ten with any + digit added without overflowing an integerPart. */ + } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10); + + /* Multiply out the current part. */ + APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val, + partCount, partCount + 1, false); + + /* If we used another part (likely but not guaranteed), increase + the count. */ + if (decSignificand[partCount]) + partCount++; + } while (p <= D.lastSigDigit); + + category = fcNormal; + fs = roundSignificandWithExponent(decSignificand, partCount, + D.exponent, rounding_mode); + + delete [] decSignificand; + } + + return fs; +} + +APFloat::opStatus +APFloat::convertFromString(const char *p, roundingMode rounding_mode) +{ + assertArithmeticOK(*semantics); + /* Handle a leading minus sign. */ if(*p == '-') sign = 1, p++; @@ -1516,15 +2231,193 @@ APFloat::convertFromString(const char *p, roundingMode rounding_mode) { if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) return convertFromHexadecimalString(p + 2, rounding_mode); + else + return convertFromDecimalString(p, rounding_mode); +} - assert(0 && "Decimal to binary conversions not yet implemented"); - abort(); +/* Write out a hexadecimal representation of the floating point value + to DST, which must be of sufficient size, in the C99 form + [-]0xh.hhhhp[+-]d. Return the number of characters written, + excluding the terminating NUL. + + If UPPERCASE, the output is in upper case, otherwise in lower case. + + HEXDIGITS digits appear altogether, rounding the value if + necessary. If HEXDIGITS is 0, the minimal precision to display the + number precisely is used instead. If nothing would appear after + the decimal point it is suppressed. + + The decimal exponent is always printed and has at least one digit. + Zero values display an exponent of zero. Infinities and NaNs + appear as "infinity" or "nan" respectively. + + The above rules are as specified by C99. There is ambiguity about + what the leading hexadecimal digit should be. This implementation + uses whatever is necessary so that the exponent is displayed as + stored. This implies the exponent will fall within the IEEE format + range, and the leading hexadecimal digit will be 0 (for denormals), + 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with + any other digits zero). +*/ +unsigned int +APFloat::convertToHexString(char *dst, unsigned int hexDigits, + bool upperCase, roundingMode rounding_mode) const +{ + char *p; + + assertArithmeticOK(*semantics); + + p = dst; + if (sign) + *dst++ = '-'; + + switch (category) { + case fcInfinity: + memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1); + dst += sizeof infinityL - 1; + break; + + case fcNaN: + memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1); + dst += sizeof NaNU - 1; + break; + + case fcZero: + *dst++ = '0'; + *dst++ = upperCase ? 'X': 'x'; + *dst++ = '0'; + if (hexDigits > 1) { + *dst++ = '.'; + memset (dst, '0', hexDigits - 1); + dst += hexDigits - 1; + } + *dst++ = upperCase ? 'P': 'p'; + *dst++ = '0'; + break; + + case fcNormal: + dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode); + break; + } + + *dst = 0; + + return dst - p; +} + +/* Does the hard work of outputting the correctly rounded hexadecimal + form of a normal floating point number with the specified number of + hexadecimal digits. If HEXDIGITS is zero the minimum number of + digits necessary to print the value precisely is output. */ +char * +APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, + bool upperCase, + roundingMode rounding_mode) const +{ + unsigned int count, valueBits, shift, partsCount, outputDigits; + const char *hexDigitChars; + const integerPart *significand; + char *p; + bool roundUp; + + *dst++ = '0'; + *dst++ = upperCase ? 'X': 'x'; + + roundUp = false; + hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower; + + significand = significandParts(); + partsCount = partCount(); + + /* +3 because the first digit only uses the single integer bit, so + we have 3 virtual zero most-significant-bits. */ + valueBits = semantics->precision + 3; + shift = integerPartWidth - valueBits % integerPartWidth; + + /* The natural number of digits required ignoring trailing + insignificant zeroes. */ + outputDigits = (valueBits - significandLSB () + 3) / 4; + + /* hexDigits of zero means use the required number for the + precision. Otherwise, see if we are truncating. If we are, + find out if we need to round away from zero. */ + if (hexDigits) { + if (hexDigits < outputDigits) { + /* We are dropping non-zero bits, so need to check how to round. + "bits" is the number of dropped bits. */ + unsigned int bits; + lostFraction fraction; + + bits = valueBits - hexDigits * 4; + fraction = lostFractionThroughTruncation (significand, partsCount, bits); + roundUp = roundAwayFromZero(rounding_mode, fraction, bits); + } + outputDigits = hexDigits; + } + + /* Write the digits consecutively, and start writing in the location + of the hexadecimal point. We move the most significant digit + left and add the hexadecimal point later. */ + p = ++dst; + + count = (valueBits + integerPartWidth - 1) / integerPartWidth; + + while (outputDigits && count) { + integerPart part; + + /* Put the most significant integerPartWidth bits in "part". */ + if (--count == partsCount) + part = 0; /* An imaginary higher zero part. */ + else + part = significand[count] << shift; + + if (count && shift) + part |= significand[count - 1] >> (integerPartWidth - shift); + + /* Convert as much of "part" to hexdigits as we can. */ + unsigned int curDigits = integerPartWidth / 4; + + if (curDigits > outputDigits) + curDigits = outputDigits; + dst += partAsHex (dst, part, curDigits, hexDigitChars); + outputDigits -= curDigits; + } + + if (roundUp) { + char *q = dst; + + /* Note that hexDigitChars has a trailing '0'. */ + do { + q--; + *q = hexDigitChars[hexDigitValue (*q) + 1]; + } while (*q == '0'); + assert (q >= p); + } else { + /* Add trailing zeroes. */ + memset (dst, '0', outputDigits); + dst += outputDigits; + } + + /* Move the most significant digit to before the point, and if there + is something after the decimal point add it. This must come + after rounding above. */ + p[-1] = p[0]; + if (dst -1 == p) + dst--; + else + p[0] = '.'; + + /* Finally output the exponent. */ + *dst++ = upperCase ? 'P': 'p'; + + return writeSignedDecimal (dst, exponent); } // For good performance it is desirable for different APFloats // to produce different integers. uint32_t -APFloat::getHashValue() const { +APFloat::getHashValue() const +{ if (category==fcZero) return sign<<8 | semantics->precision ; else if (category==fcInfinity) return sign<<9 | semantics->precision; else if (category==fcNaN) return 1<<10 | semantics->precision; @@ -1540,39 +2433,124 @@ APFloat::getHashValue() const { // Conversion from APFloat to/from host float/double. It may eventually be // possible to eliminate these and have everybody deal with APFloats, but that // will take a while. This approach will not easily extend to long double. -// Current implementation requires partCount()==1, which is correct at the -// moment but could be made more general. +// Current implementation requires integerPartWidth==64, which is correct at +// the moment but could be made more general. -double -APFloat::convertToDouble() const { - assert(semantics == (const llvm::fltSemantics* const)&IEEEdouble); +// Denormals have exponent minExponent in APFloat, but minExponent-1 in +// the actual IEEE respresentations. We compensate for that here. + +APInt +APFloat::convertF80LongDoubleAPFloatToAPInt() const +{ + assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended); + assert (partCount()==2); + + uint64_t myexponent, mysignificand; + + if (category==fcNormal) { + myexponent = exponent+16383; //bias + mysignificand = significandParts()[0]; + if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) + myexponent = 0; // denormal + } else if (category==fcZero) { + myexponent = 0; + mysignificand = 0; + } else if (category==fcInfinity) { + myexponent = 0x7fff; + mysignificand = 0x8000000000000000ULL; + } else { + assert(category == fcNaN && "Unknown category"); + myexponent = 0x7fff; + mysignificand = significandParts()[0]; + } + + uint64_t words[2]; + words[0] = (((uint64_t)sign & 1) << 63) | + ((myexponent & 0x7fff) << 48) | + ((mysignificand >>16) & 0xffffffffffffLL); + words[1] = mysignificand & 0xffff; + return APInt(80, 2, words); +} + +APInt +APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const +{ + assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble); + assert (partCount()==2); + + uint64_t myexponent, mysignificand, myexponent2, mysignificand2; + + if (category==fcNormal) { + myexponent = exponent + 1023; //bias + myexponent2 = exponent2 + 1023; + mysignificand = significandParts()[0]; + mysignificand2 = significandParts()[1]; + if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) + myexponent = 0; // denormal + if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL)) + myexponent2 = 0; // denormal + } else if (category==fcZero) { + myexponent = 0; + mysignificand = 0; + myexponent2 = 0; + mysignificand2 = 0; + } else if (category==fcInfinity) { + myexponent = 0x7ff; + myexponent2 = 0; + mysignificand = 0; + mysignificand2 = 0; + } else { + assert(category == fcNaN && "Unknown category"); + myexponent = 0x7ff; + mysignificand = significandParts()[0]; + myexponent2 = exponent2; + mysignificand2 = significandParts()[1]; + } + + uint64_t words[2]; + words[0] = (((uint64_t)sign & 1) << 63) | + ((myexponent & 0x7ff) << 52) | + (mysignificand & 0xfffffffffffffLL); + words[1] = (((uint64_t)sign2 & 1) << 63) | + ((myexponent2 & 0x7ff) << 52) | + (mysignificand2 & 0xfffffffffffffLL); + return APInt(128, 2, words); +} + +APInt +APFloat::convertDoubleAPFloatToAPInt() const +{ + assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); assert (partCount()==1); uint64_t myexponent, mysignificand; if (category==fcNormal) { - mysignificand = *significandParts(); myexponent = exponent+1023; //bias + mysignificand = *significandParts(); + if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) + myexponent = 0; // denormal } else if (category==fcZero) { myexponent = 0; mysignificand = 0; } else if (category==fcInfinity) { myexponent = 0x7ff; mysignificand = 0; - } else if (category==fcNaN) { + } else { + assert(category == fcNaN && "Unknown category!"); myexponent = 0x7ff; mysignificand = *significandParts(); - } else - assert(0); + } - return BitsToDouble((((uint64_t)sign & 1) << 63) | - ((myexponent & 0x7ff) << 52) | - (mysignificand & 0xfffffffffffffLL)); + return APInt(64, (((((uint64_t)sign & 1) << 63) | + ((myexponent & 0x7ff) << 52) | + (mysignificand & 0xfffffffffffffLL)))); } -float -APFloat::convertToFloat() const { - assert(semantics == (const llvm::fltSemantics* const)&IEEEsingle); +APInt +APFloat::convertFloatAPFloatToAPInt() const +{ + assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); assert (partCount()==1); uint32_t myexponent, mysignificand; @@ -1580,24 +2558,156 @@ APFloat::convertToFloat() const { if (category==fcNormal) { myexponent = exponent+127; //bias mysignificand = *significandParts(); + if (myexponent == 1 && !(mysignificand & 0x800000)) + myexponent = 0; // denormal } else if (category==fcZero) { myexponent = 0; mysignificand = 0; } else if (category==fcInfinity) { myexponent = 0xff; mysignificand = 0; - } else if (category==fcNaN) { - myexponent = 0x7ff; + } else { + assert(category == fcNaN && "Unknown category!"); + myexponent = 0xff; mysignificand = *significandParts(); - } else - assert(0); + } + + return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | + (mysignificand & 0x7fffff))); +} + +// This function creates an APInt that is just a bit map of the floating +// point constant as it would appear in memory. It is not a conversion, +// and treating the result as a normal integer is unlikely to be useful. + +APInt +APFloat::convertToAPInt() const +{ + if (semantics == (const llvm::fltSemantics*)&IEEEsingle) + return convertFloatAPFloatToAPInt(); + + if (semantics == (const llvm::fltSemantics*)&IEEEdouble) + return convertDoubleAPFloatToAPInt(); + + if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble) + return convertPPCDoubleDoubleAPFloatToAPInt(); + + assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended && + "unknown format!"); + return convertF80LongDoubleAPFloatToAPInt(); +} - return BitsToFloat(((sign&1) << 31) | ((myexponent&0xff) << 23) | - (mysignificand & 0x7fffff)); +float +APFloat::convertToFloat() const +{ + assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); + APInt api = convertToAPInt(); + return api.bitsToFloat(); } -APFloat::APFloat(double d) { - uint64_t i = DoubleToBits(d); +double +APFloat::convertToDouble() const +{ + assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); + APInt api = convertToAPInt(); + return api.bitsToDouble(); +} + +/// Integer bit is explicit in this format. Current Intel book does not +/// define meaning of: +/// exponent = all 1's, integer bit not set. +/// exponent = 0, integer bit set. (formerly "psuedodenormals") +/// exponent!=0 nor all 1's, integer bit not set. (formerly "unnormals") +void +APFloat::initFromF80LongDoubleAPInt(const APInt &api) +{ + assert(api.getBitWidth()==80); + uint64_t i1 = api.getRawData()[0]; + uint64_t i2 = api.getRawData()[1]; + uint64_t myexponent = (i1 >> 48) & 0x7fff; + uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) | + (i2 & 0xffff); + + initialize(&APFloat::x87DoubleExtended); + assert(partCount()==2); + + sign = i1>>63; + if (myexponent==0 && mysignificand==0) { + // exponent, significand meaningless + category = fcZero; + } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) { + // exponent, significand meaningless + category = fcInfinity; + } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) { + // exponent meaningless + category = fcNaN; + significandParts()[0] = mysignificand; + significandParts()[1] = 0; + } else { + category = fcNormal; + exponent = myexponent - 16383; + significandParts()[0] = mysignificand; + significandParts()[1] = 0; + if (myexponent==0) // denormal + exponent = -16382; + } +} + +void +APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) +{ + assert(api.getBitWidth()==128); + uint64_t i1 = api.getRawData()[0]; + uint64_t i2 = api.getRawData()[1]; + uint64_t myexponent = (i1 >> 52) & 0x7ff; + uint64_t mysignificand = i1 & 0xfffffffffffffLL; + uint64_t myexponent2 = (i2 >> 52) & 0x7ff; + uint64_t mysignificand2 = i2 & 0xfffffffffffffLL; + + initialize(&APFloat::PPCDoubleDouble); + assert(partCount()==2); + + sign = i1>>63; + sign2 = i2>>63; + if (myexponent==0 && mysignificand==0) { + // exponent, significand meaningless + // exponent2 and significand2 are required to be 0; we don't check + category = fcZero; + } else if (myexponent==0x7ff && mysignificand==0) { + // exponent, significand meaningless + // exponent2 and significand2 are required to be 0; we don't check + category = fcInfinity; + } else if (myexponent==0x7ff && mysignificand!=0) { + // exponent meaningless. So is the whole second word, but keep it + // for determinism. + category = fcNaN; + exponent2 = myexponent2; + significandParts()[0] = mysignificand; + significandParts()[1] = mysignificand2; + } else { + category = fcNormal; + // Note there is no category2; the second word is treated as if it is + // fcNormal, although it might be something else considered by itself. + exponent = myexponent - 1023; + exponent2 = myexponent2 - 1023; + significandParts()[0] = mysignificand; + significandParts()[1] = mysignificand2; + if (myexponent==0) // denormal + exponent = -1022; + else + significandParts()[0] |= 0x10000000000000LL; // integer bit + if (myexponent2==0) + exponent2 = -1022; + else + significandParts()[1] |= 0x10000000000000LL; // integer bit + } +} + +void +APFloat::initFromDoubleAPInt(const APInt &api) +{ + assert(api.getBitWidth()==64); + uint64_t i = *api.getRawData(); uint64_t myexponent = (i >> 52) & 0x7ff; uint64_t mysignificand = i & 0xfffffffffffffLL; @@ -1618,12 +2728,19 @@ APFloat::APFloat(double d) { } else { category = fcNormal; exponent = myexponent - 1023; - *significandParts() = mysignificand | 0x10000000000000LL; - } + *significandParts() = mysignificand; + if (myexponent==0) // denormal + exponent = -1022; + else + *significandParts() |= 0x10000000000000LL; // integer bit + } } -APFloat::APFloat(float f) { - uint32_t i = FloatToBits(f); +void +APFloat::initFromFloatAPInt(const APInt & api) +{ + assert(api.getBitWidth()==32); + uint32_t i = (uint32_t)*api.getRawData(); uint32_t myexponent = (i >> 23) & 0xff; uint32_t mysignificand = i & 0x7fffff; @@ -1637,13 +2754,53 @@ APFloat::APFloat(float f) { } else if (myexponent==0xff && mysignificand==0) { // exponent, significand meaningless category = fcInfinity; - } else if (myexponent==0xff && (mysignificand & 0x400000)) { + } else if (myexponent==0xff && mysignificand!=0) { // sign, exponent, significand meaningless category = fcNaN; *significandParts() = mysignificand; } else { category = fcNormal; exponent = myexponent - 127; //bias - *significandParts() = mysignificand | 0x800000; // integer bit + *significandParts() = mysignificand; + if (myexponent==0) // denormal + exponent = -126; + else + *significandParts() |= 0x800000; // integer bit } } + +/// Treat api as containing the bits of a floating point number. Currently +/// we infer the floating point type from the size of the APInt. The +/// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful +/// when the size is anything else). +void +APFloat::initFromAPInt(const APInt& api, bool isIEEE) +{ + if (api.getBitWidth() == 32) + return initFromFloatAPInt(api); + else if (api.getBitWidth()==64) + return initFromDoubleAPInt(api); + else if (api.getBitWidth()==80) + return initFromF80LongDoubleAPInt(api); + else if (api.getBitWidth()==128 && !isIEEE) + return initFromPPCDoubleDoubleAPInt(api); + else + assert(0); +} + +APFloat::APFloat(const APInt& api, bool isIEEE) +{ + initFromAPInt(api, isIEEE); +} + +APFloat::APFloat(float f) +{ + APInt api = APInt(32, 0); + initFromAPInt(api.floatToBits(f)); +} + +APFloat::APFloat(double d) +{ + APInt api = APInt(64, 0); + initFromAPInt(api.doubleToBits(d)); +}