1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Neil Booth and is distributed under the
6 // University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
17 #include "llvm/ADT/APFloat.h"
18 #include "llvm/Support/MathExtras.h"
22 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
24 /* Assumed in hexadecimal significand parsing, and conversion to
25 hexadecimal strings. */
26 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
30 /* Represents floating point arithmetic semantics. */
32 /* The largest E such that 2^E is representable; this matches the
33 definition of IEEE 754. */
34 exponent_t maxExponent;
36 /* The smallest E such that 2^E is a normalized number; this
37 matches the definition of IEEE 754. */
38 exponent_t minExponent;
40 /* Number of bits in the significand. This includes the integer
42 unsigned int precision;
44 /* True if arithmetic is supported. */
45 unsigned int arithmeticOK;
48 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
49 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
50 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
51 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
52 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
54 // The PowerPC format consists of two doubles. It does not map cleanly
55 // onto the usual format above. For now only storage of constants of
56 // this type is supported, no arithmetic.
57 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
59 /* A tight upper bound on number of parts required to hold the value
62 power * 1024 / (441 * integerPartWidth) + 1
64 However, whilst the result may require only this many parts,
65 because we are multiplying two values to get it, the
66 multiplication may require an extra part with the excess part
67 being zero (consider the trivial case of 1 * 1, tcFullMultiply
68 requires two parts to hold the single-part result). So we add an
69 extra one to guarantee enough space whilst multiplying. */
70 const unsigned int maxExponent = 16383;
71 const unsigned int maxPrecision = 113;
72 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
73 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 1024)
74 / (441 * integerPartWidth));
77 /* Put a bunch of private, handy routines in an anonymous namespace. */
81 partCountForBits(unsigned int bits)
83 return ((bits) + integerPartWidth - 1) / integerPartWidth;
86 /* Returns 0U-9U. Return values >= 10U are not digits. */
88 decDigitValue(unsigned int c)
94 hexDigitValue(unsigned int c)
114 assertArithmeticOK(const llvm::fltSemantics &semantics) {
115 assert(semantics.arithmeticOK
116 && "Compile-time arithmetic does not support these semantics");
119 /* Return the value of a decimal exponent of the form
122 If the exponent overflows, returns a large exponent with the
125 readExponent(const char *p)
128 unsigned int absExponent;
129 const unsigned int overlargeExponent = 24000; /* FIXME. */
131 isNegative = (*p == '-');
132 if (*p == '-' || *p == '+')
135 absExponent = decDigitValue(*p++);
136 assert (absExponent < 10U);
141 value = decDigitValue(*p);
146 value += absExponent * 10;
147 if (absExponent >= overlargeExponent) {
148 absExponent = overlargeExponent;
155 return -(int) absExponent;
157 return (int) absExponent;
160 /* This is ugly and needs cleaning up, but I don't immediately see
161 how whilst remaining safe. */
163 totalExponent(const char *p, int exponentAdjustment)
165 integerPart unsignedExponent;
166 bool negative, overflow;
169 /* Move past the exponent letter and sign to the digits. */
171 negative = *p == '-';
172 if(*p == '-' || *p == '+')
175 unsignedExponent = 0;
180 value = decDigitValue(*p);
185 unsignedExponent = unsignedExponent * 10 + value;
186 if(unsignedExponent > 65535)
190 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
194 exponent = unsignedExponent;
196 exponent = -exponent;
197 exponent += exponentAdjustment;
198 if(exponent > 65535 || exponent < -65536)
203 exponent = negative ? -65536: 65535;
209 skipLeadingZeroesAndAnyDot(const char *p, const char **dot)
224 /* Given a normal decimal floating point number of the form
228 where the decimal point and exponent are optional, fill out the
229 structure D. If the value is zero, V->firstSigDigit
230 points to a zero, and the return exponent is zero. */
232 const char *firstSigDigit;
233 const char *lastSigDigit;
238 interpretDecimal(const char *p, decimalInfo *D)
242 p = skipLeadingZeroesAndAnyDot (p, &dot);
244 D->firstSigDigit = p;
252 if (decDigitValue(*p) >= 10U)
257 /* If number is all zerooes accept any exponent. */
258 if (p != D->firstSigDigit) {
259 if (*p == 'e' || *p == 'E')
260 D->exponent = readExponent(p + 1);
262 /* Implied decimal point? */
266 /* Drop insignificant trailing zeroes. */
273 /* Adjust the specified exponent for any decimal point. */
274 D->exponent += (dot - p) - (dot > p);
280 /* Return the trailing fraction of a hexadecimal number.
281 DIGITVALUE is the first hex digit of the fraction, P points to
284 trailingHexadecimalFraction(const char *p, unsigned int digitValue)
286 unsigned int hexDigit;
288 /* If the first trailing digit isn't 0 or 8 we can work out the
289 fraction immediately. */
291 return lfMoreThanHalf;
292 else if(digitValue < 8 && digitValue > 0)
293 return lfLessThanHalf;
295 /* Otherwise we need to find the first non-zero digit. */
299 hexDigit = hexDigitValue(*p);
301 /* If we ran off the end it is exactly zero or one-half, otherwise
304 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
306 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
309 /* Return the fraction lost were a bignum truncated losing the least
310 significant BITS bits. */
312 lostFractionThroughTruncation(const integerPart *parts,
313 unsigned int partCount,
318 lsb = APInt::tcLSB(parts, partCount);
320 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
322 return lfExactlyZero;
324 return lfExactlyHalf;
325 if(bits <= partCount * integerPartWidth
326 && APInt::tcExtractBit(parts, bits - 1))
327 return lfMoreThanHalf;
329 return lfLessThanHalf;
332 /* Shift DST right BITS bits noting lost fraction. */
334 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
336 lostFraction lost_fraction;
338 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
340 APInt::tcShiftRight(dst, parts, bits);
342 return lost_fraction;
345 /* Combine the effect of two lost fractions. */
347 combineLostFractions(lostFraction moreSignificant,
348 lostFraction lessSignificant)
350 if(lessSignificant != lfExactlyZero) {
351 if(moreSignificant == lfExactlyZero)
352 moreSignificant = lfLessThanHalf;
353 else if(moreSignificant == lfExactlyHalf)
354 moreSignificant = lfMoreThanHalf;
357 return moreSignificant;
360 /* The error from the true value, in half-ulps, on multiplying two
361 floating point numbers, which differ from the value they
362 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
363 than the returned value.
365 See "How to Read Floating Point Numbers Accurately" by William D
368 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
370 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
372 if (HUerr1 + HUerr2 == 0)
373 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
375 return inexactMultiply + 2 * (HUerr1 + HUerr2);
378 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
379 when the least significant BITS are truncated. BITS cannot be
382 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
384 unsigned int count, partBits;
385 integerPart part, boundary;
390 count = bits / integerPartWidth;
391 partBits = bits % integerPartWidth + 1;
393 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
396 boundary = (integerPart) 1 << (partBits - 1);
401 if (part - boundary <= boundary - part)
402 return part - boundary;
404 return boundary - part;
407 if (part == boundary) {
410 return ~(integerPart) 0; /* A lot. */
413 } else if (part == boundary - 1) {
416 return ~(integerPart) 0; /* A lot. */
421 return ~(integerPart) 0; /* A lot. */
424 /* Place pow(5, power) in DST, and return the number of parts used.
425 DST must be at least one part larger than size of the answer. */
427 powerOf5(integerPart *dst, unsigned int power)
429 static integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
431 static integerPart pow5s[maxPowerOfFiveParts * 2 + 5] = { 78125 * 5 };
432 static unsigned int partsCount[16] = { 1 };
434 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
437 assert(power <= maxExponent);
442 *p1 = firstEightPowers[power & 7];
448 for (unsigned int n = 0; power; power >>= 1, n++) {
453 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
455 pc = partsCount[n - 1];
456 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
458 if (pow5[pc - 1] == 0)
466 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
468 if (p2[result - 1] == 0)
471 /* Now result is in p1 with partsCount parts and p2 is scratch
473 tmp = p1, p1 = p2, p2 = tmp;
480 APInt::tcAssign(dst, p1, result);
485 /* Zero at the end to avoid modular arithmetic when adding one; used
486 when rounding up during hexadecimal output. */
487 static const char hexDigitsLower[] = "0123456789abcdef0";
488 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
489 static const char infinityL[] = "infinity";
490 static const char infinityU[] = "INFINITY";
491 static const char NaNL[] = "nan";
492 static const char NaNU[] = "NAN";
494 /* Write out an integerPart in hexadecimal, starting with the most
495 significant nibble. Write out exactly COUNT hexdigits, return
498 partAsHex (char *dst, integerPart part, unsigned int count,
499 const char *hexDigitChars)
501 unsigned int result = count;
503 assert (count != 0 && count <= integerPartWidth / 4);
505 part >>= (integerPartWidth - 4 * count);
507 dst[count] = hexDigitChars[part & 0xf];
514 /* Write out an unsigned decimal integer. */
516 writeUnsignedDecimal (char *dst, unsigned int n)
532 /* Write out a signed decimal integer. */
534 writeSignedDecimal (char *dst, int value)
538 dst = writeUnsignedDecimal(dst, -(unsigned) value);
540 dst = writeUnsignedDecimal(dst, value);
548 APFloat::initialize(const fltSemantics *ourSemantics)
552 semantics = ourSemantics;
555 significand.parts = new integerPart[count];
559 APFloat::freeSignificand()
562 delete [] significand.parts;
566 APFloat::assign(const APFloat &rhs)
568 assert(semantics == rhs.semantics);
571 category = rhs.category;
572 exponent = rhs.exponent;
574 exponent2 = rhs.exponent2;
575 if(category == fcNormal || category == fcNaN)
576 copySignificand(rhs);
580 APFloat::copySignificand(const APFloat &rhs)
582 assert(category == fcNormal || category == fcNaN);
583 assert(rhs.partCount() >= partCount());
585 APInt::tcAssign(significandParts(), rhs.significandParts(),
589 /* Make this number a NaN, with an arbitrary but deterministic value
590 for the significand. */
592 APFloat::makeNaN(void)
595 APInt::tcSet(significandParts(), ~0U, partCount());
599 APFloat::operator=(const APFloat &rhs)
602 if(semantics != rhs.semantics) {
604 initialize(rhs.semantics);
613 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
616 if (semantics != rhs.semantics ||
617 category != rhs.category ||
620 if (semantics==(const llvm::fltSemantics* const)&PPCDoubleDouble &&
623 if (category==fcZero || category==fcInfinity)
625 else if (category==fcNormal && exponent!=rhs.exponent)
627 else if (semantics==(const llvm::fltSemantics* const)&PPCDoubleDouble &&
628 exponent2!=rhs.exponent2)
632 const integerPart* p=significandParts();
633 const integerPart* q=rhs.significandParts();
634 for (; i>0; i--, p++, q++) {
642 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
644 assertArithmeticOK(ourSemantics);
645 initialize(&ourSemantics);
648 exponent = ourSemantics.precision - 1;
649 significandParts()[0] = value;
650 normalize(rmNearestTiesToEven, lfExactlyZero);
653 APFloat::APFloat(const fltSemantics &ourSemantics,
654 fltCategory ourCategory, bool negative)
656 assertArithmeticOK(ourSemantics);
657 initialize(&ourSemantics);
658 category = ourCategory;
660 if(category == fcNormal)
662 else if (ourCategory == fcNaN)
666 APFloat::APFloat(const fltSemantics &ourSemantics, const char *text)
668 assertArithmeticOK(ourSemantics);
669 initialize(&ourSemantics);
670 convertFromString(text, rmNearestTiesToEven);
673 APFloat::APFloat(const APFloat &rhs)
675 initialize(rhs.semantics);
685 APFloat::partCount() const
687 return partCountForBits(semantics->precision + 1);
691 APFloat::semanticsPrecision(const fltSemantics &semantics)
693 return semantics.precision;
697 APFloat::significandParts() const
699 return const_cast<APFloat *>(this)->significandParts();
703 APFloat::significandParts()
705 assert(category == fcNormal || category == fcNaN);
708 return significand.parts;
710 return &significand.part;
714 APFloat::zeroSignificand()
717 APInt::tcSet(significandParts(), 0, partCount());
720 /* Increment an fcNormal floating point number's significand. */
722 APFloat::incrementSignificand()
726 carry = APInt::tcIncrement(significandParts(), partCount());
728 /* Our callers should never cause us to overflow. */
732 /* Add the significand of the RHS. Returns the carry flag. */
734 APFloat::addSignificand(const APFloat &rhs)
738 parts = significandParts();
740 assert(semantics == rhs.semantics);
741 assert(exponent == rhs.exponent);
743 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
746 /* Subtract the significand of the RHS with a borrow flag. Returns
749 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
753 parts = significandParts();
755 assert(semantics == rhs.semantics);
756 assert(exponent == rhs.exponent);
758 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
762 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
763 on to the full-precision result of the multiplication. Returns the
766 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
768 unsigned int omsb; // One, not zero, based MSB.
769 unsigned int partsCount, newPartsCount, precision;
770 integerPart *lhsSignificand;
771 integerPart scratch[4];
772 integerPart *fullSignificand;
773 lostFraction lost_fraction;
775 assert(semantics == rhs.semantics);
777 precision = semantics->precision;
778 newPartsCount = partCountForBits(precision * 2);
780 if(newPartsCount > 4)
781 fullSignificand = new integerPart[newPartsCount];
783 fullSignificand = scratch;
785 lhsSignificand = significandParts();
786 partsCount = partCount();
788 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
789 rhs.significandParts(), partsCount, partsCount);
791 lost_fraction = lfExactlyZero;
792 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
793 exponent += rhs.exponent;
796 Significand savedSignificand = significand;
797 const fltSemantics *savedSemantics = semantics;
798 fltSemantics extendedSemantics;
800 unsigned int extendedPrecision;
802 /* Normalize our MSB. */
803 extendedPrecision = precision + precision - 1;
804 if(omsb != extendedPrecision)
806 APInt::tcShiftLeft(fullSignificand, newPartsCount,
807 extendedPrecision - omsb);
808 exponent -= extendedPrecision - omsb;
811 /* Create new semantics. */
812 extendedSemantics = *semantics;
813 extendedSemantics.precision = extendedPrecision;
815 if(newPartsCount == 1)
816 significand.part = fullSignificand[0];
818 significand.parts = fullSignificand;
819 semantics = &extendedSemantics;
821 APFloat extendedAddend(*addend);
822 status = extendedAddend.convert(extendedSemantics, rmTowardZero);
823 assert(status == opOK);
824 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
826 /* Restore our state. */
827 if(newPartsCount == 1)
828 fullSignificand[0] = significand.part;
829 significand = savedSignificand;
830 semantics = savedSemantics;
832 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
835 exponent -= (precision - 1);
837 if(omsb > precision) {
838 unsigned int bits, significantParts;
841 bits = omsb - precision;
842 significantParts = partCountForBits(omsb);
843 lf = shiftRight(fullSignificand, significantParts, bits);
844 lost_fraction = combineLostFractions(lf, lost_fraction);
848 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
850 if(newPartsCount > 4)
851 delete [] fullSignificand;
853 return lost_fraction;
856 /* Multiply the significands of LHS and RHS to DST. */
858 APFloat::divideSignificand(const APFloat &rhs)
860 unsigned int bit, i, partsCount;
861 const integerPart *rhsSignificand;
862 integerPart *lhsSignificand, *dividend, *divisor;
863 integerPart scratch[4];
864 lostFraction lost_fraction;
866 assert(semantics == rhs.semantics);
868 lhsSignificand = significandParts();
869 rhsSignificand = rhs.significandParts();
870 partsCount = partCount();
873 dividend = new integerPart[partsCount * 2];
877 divisor = dividend + partsCount;
879 /* Copy the dividend and divisor as they will be modified in-place. */
880 for(i = 0; i < partsCount; i++) {
881 dividend[i] = lhsSignificand[i];
882 divisor[i] = rhsSignificand[i];
883 lhsSignificand[i] = 0;
886 exponent -= rhs.exponent;
888 unsigned int precision = semantics->precision;
890 /* Normalize the divisor. */
891 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
894 APInt::tcShiftLeft(divisor, partsCount, bit);
897 /* Normalize the dividend. */
898 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
901 APInt::tcShiftLeft(dividend, partsCount, bit);
904 /* Ensure the dividend >= divisor initially for the loop below.
905 Incidentally, this means that the division loop below is
906 guaranteed to set the integer bit to one. */
907 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
909 APInt::tcShiftLeft(dividend, partsCount, 1);
910 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
914 for(bit = precision; bit; bit -= 1) {
915 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
916 APInt::tcSubtract(dividend, divisor, 0, partsCount);
917 APInt::tcSetBit(lhsSignificand, bit - 1);
920 APInt::tcShiftLeft(dividend, partsCount, 1);
923 /* Figure out the lost fraction. */
924 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
927 lost_fraction = lfMoreThanHalf;
929 lost_fraction = lfExactlyHalf;
930 else if(APInt::tcIsZero(dividend, partsCount))
931 lost_fraction = lfExactlyZero;
933 lost_fraction = lfLessThanHalf;
938 return lost_fraction;
942 APFloat::significandMSB() const
944 return APInt::tcMSB(significandParts(), partCount());
948 APFloat::significandLSB() const
950 return APInt::tcLSB(significandParts(), partCount());
953 /* Note that a zero result is NOT normalized to fcZero. */
955 APFloat::shiftSignificandRight(unsigned int bits)
957 /* Our exponent should not overflow. */
958 assert((exponent_t) (exponent + bits) >= exponent);
962 return shiftRight(significandParts(), partCount(), bits);
965 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
967 APFloat::shiftSignificandLeft(unsigned int bits)
969 assert(bits < semantics->precision);
972 unsigned int partsCount = partCount();
974 APInt::tcShiftLeft(significandParts(), partsCount, bits);
977 assert(!APInt::tcIsZero(significandParts(), partsCount));
982 APFloat::compareAbsoluteValue(const APFloat &rhs) const
986 assert(semantics == rhs.semantics);
987 assert(category == fcNormal);
988 assert(rhs.category == fcNormal);
990 compare = exponent - rhs.exponent;
992 /* If exponents are equal, do an unsigned bignum comparison of the
995 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
999 return cmpGreaterThan;
1000 else if(compare < 0)
1006 /* Handle overflow. Sign is preserved. We either become infinity or
1007 the largest finite number. */
1009 APFloat::handleOverflow(roundingMode rounding_mode)
1012 if(rounding_mode == rmNearestTiesToEven
1013 || rounding_mode == rmNearestTiesToAway
1014 || (rounding_mode == rmTowardPositive && !sign)
1015 || (rounding_mode == rmTowardNegative && sign))
1017 category = fcInfinity;
1018 return (opStatus) (opOverflow | opInexact);
1021 /* Otherwise we become the largest finite number. */
1022 category = fcNormal;
1023 exponent = semantics->maxExponent;
1024 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1025 semantics->precision);
1030 /* Returns TRUE if, when truncating the current number, with BIT the
1031 new LSB, with the given lost fraction and rounding mode, the result
1032 would need to be rounded away from zero (i.e., by increasing the
1033 signficand). This routine must work for fcZero of both signs, and
1034 fcNormal numbers. */
1036 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1037 lostFraction lost_fraction,
1038 unsigned int bit) const
1040 /* NaNs and infinities should not have lost fractions. */
1041 assert(category == fcNormal || category == fcZero);
1043 /* Current callers never pass this so we don't handle it. */
1044 assert(lost_fraction != lfExactlyZero);
1046 switch(rounding_mode) {
1050 case rmNearestTiesToAway:
1051 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1053 case rmNearestTiesToEven:
1054 if(lost_fraction == lfMoreThanHalf)
1057 /* Our zeroes don't have a significand to test. */
1058 if(lost_fraction == lfExactlyHalf && category != fcZero)
1059 return APInt::tcExtractBit(significandParts(), bit);
1066 case rmTowardPositive:
1067 return sign == false;
1069 case rmTowardNegative:
1070 return sign == true;
1075 APFloat::normalize(roundingMode rounding_mode,
1076 lostFraction lost_fraction)
1078 unsigned int omsb; /* One, not zero, based MSB. */
1081 if(category != fcNormal)
1084 /* Before rounding normalize the exponent of fcNormal numbers. */
1085 omsb = significandMSB() + 1;
1088 /* OMSB is numbered from 1. We want to place it in the integer
1089 bit numbered PRECISON if possible, with a compensating change in
1091 exponentChange = omsb - semantics->precision;
1093 /* If the resulting exponent is too high, overflow according to
1094 the rounding mode. */
1095 if(exponent + exponentChange > semantics->maxExponent)
1096 return handleOverflow(rounding_mode);
1098 /* Subnormal numbers have exponent minExponent, and their MSB
1099 is forced based on that. */
1100 if(exponent + exponentChange < semantics->minExponent)
1101 exponentChange = semantics->minExponent - exponent;
1103 /* Shifting left is easy as we don't lose precision. */
1104 if(exponentChange < 0) {
1105 assert(lost_fraction == lfExactlyZero);
1107 shiftSignificandLeft(-exponentChange);
1112 if(exponentChange > 0) {
1115 /* Shift right and capture any new lost fraction. */
1116 lf = shiftSignificandRight(exponentChange);
1118 lost_fraction = combineLostFractions(lf, lost_fraction);
1120 /* Keep OMSB up-to-date. */
1121 if(omsb > (unsigned) exponentChange)
1122 omsb -= exponentChange;
1128 /* Now round the number according to rounding_mode given the lost
1131 /* As specified in IEEE 754, since we do not trap we do not report
1132 underflow for exact results. */
1133 if(lost_fraction == lfExactlyZero) {
1134 /* Canonicalize zeroes. */
1141 /* Increment the significand if we're rounding away from zero. */
1142 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1144 exponent = semantics->minExponent;
1146 incrementSignificand();
1147 omsb = significandMSB() + 1;
1149 /* Did the significand increment overflow? */
1150 if(omsb == (unsigned) semantics->precision + 1) {
1151 /* Renormalize by incrementing the exponent and shifting our
1152 significand right one. However if we already have the
1153 maximum exponent we overflow to infinity. */
1154 if(exponent == semantics->maxExponent) {
1155 category = fcInfinity;
1157 return (opStatus) (opOverflow | opInexact);
1160 shiftSignificandRight(1);
1166 /* The normal case - we were and are not denormal, and any
1167 significand increment above didn't overflow. */
1168 if(omsb == semantics->precision)
1171 /* We have a non-zero denormal. */
1172 assert(omsb < semantics->precision);
1174 /* Canonicalize zeroes. */
1178 /* The fcZero case is a denormal that underflowed to zero. */
1179 return (opStatus) (opUnderflow | opInexact);
1183 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1185 switch(convolve(category, rhs.category)) {
1189 case convolve(fcNaN, fcZero):
1190 case convolve(fcNaN, fcNormal):
1191 case convolve(fcNaN, fcInfinity):
1192 case convolve(fcNaN, fcNaN):
1193 case convolve(fcNormal, fcZero):
1194 case convolve(fcInfinity, fcNormal):
1195 case convolve(fcInfinity, fcZero):
1198 case convolve(fcZero, fcNaN):
1199 case convolve(fcNormal, fcNaN):
1200 case convolve(fcInfinity, fcNaN):
1202 copySignificand(rhs);
1205 case convolve(fcNormal, fcInfinity):
1206 case convolve(fcZero, fcInfinity):
1207 category = fcInfinity;
1208 sign = rhs.sign ^ subtract;
1211 case convolve(fcZero, fcNormal):
1213 sign = rhs.sign ^ subtract;
1216 case convolve(fcZero, fcZero):
1217 /* Sign depends on rounding mode; handled by caller. */
1220 case convolve(fcInfinity, fcInfinity):
1221 /* Differently signed infinities can only be validly
1223 if(sign ^ rhs.sign != subtract) {
1230 case convolve(fcNormal, fcNormal):
1235 /* Add or subtract two normal numbers. */
1237 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1240 lostFraction lost_fraction;
1243 /* Determine if the operation on the absolute values is effectively
1244 an addition or subtraction. */
1245 subtract ^= (sign ^ rhs.sign);
1247 /* Are we bigger exponent-wise than the RHS? */
1248 bits = exponent - rhs.exponent;
1250 /* Subtraction is more subtle than one might naively expect. */
1252 APFloat temp_rhs(rhs);
1256 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1257 lost_fraction = lfExactlyZero;
1258 } else if (bits > 0) {
1259 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1260 shiftSignificandLeft(1);
1263 lost_fraction = shiftSignificandRight(-bits - 1);
1264 temp_rhs.shiftSignificandLeft(1);
1269 carry = temp_rhs.subtractSignificand
1270 (*this, lost_fraction != lfExactlyZero);
1271 copySignificand(temp_rhs);
1274 carry = subtractSignificand
1275 (temp_rhs, lost_fraction != lfExactlyZero);
1278 /* Invert the lost fraction - it was on the RHS and
1280 if(lost_fraction == lfLessThanHalf)
1281 lost_fraction = lfMoreThanHalf;
1282 else if(lost_fraction == lfMoreThanHalf)
1283 lost_fraction = lfLessThanHalf;
1285 /* The code above is intended to ensure that no borrow is
1290 APFloat temp_rhs(rhs);
1292 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1293 carry = addSignificand(temp_rhs);
1295 lost_fraction = shiftSignificandRight(-bits);
1296 carry = addSignificand(rhs);
1299 /* We have a guard bit; generating a carry cannot happen. */
1303 return lost_fraction;
1307 APFloat::multiplySpecials(const APFloat &rhs)
1309 switch(convolve(category, rhs.category)) {
1313 case convolve(fcNaN, fcZero):
1314 case convolve(fcNaN, fcNormal):
1315 case convolve(fcNaN, fcInfinity):
1316 case convolve(fcNaN, fcNaN):
1319 case convolve(fcZero, fcNaN):
1320 case convolve(fcNormal, fcNaN):
1321 case convolve(fcInfinity, fcNaN):
1323 copySignificand(rhs);
1326 case convolve(fcNormal, fcInfinity):
1327 case convolve(fcInfinity, fcNormal):
1328 case convolve(fcInfinity, fcInfinity):
1329 category = fcInfinity;
1332 case convolve(fcZero, fcNormal):
1333 case convolve(fcNormal, fcZero):
1334 case convolve(fcZero, fcZero):
1338 case convolve(fcZero, fcInfinity):
1339 case convolve(fcInfinity, fcZero):
1343 case convolve(fcNormal, fcNormal):
1349 APFloat::divideSpecials(const APFloat &rhs)
1351 switch(convolve(category, rhs.category)) {
1355 case convolve(fcNaN, fcZero):
1356 case convolve(fcNaN, fcNormal):
1357 case convolve(fcNaN, fcInfinity):
1358 case convolve(fcNaN, fcNaN):
1359 case convolve(fcInfinity, fcZero):
1360 case convolve(fcInfinity, fcNormal):
1361 case convolve(fcZero, fcInfinity):
1362 case convolve(fcZero, fcNormal):
1365 case convolve(fcZero, fcNaN):
1366 case convolve(fcNormal, fcNaN):
1367 case convolve(fcInfinity, fcNaN):
1369 copySignificand(rhs);
1372 case convolve(fcNormal, fcInfinity):
1376 case convolve(fcNormal, fcZero):
1377 category = fcInfinity;
1380 case convolve(fcInfinity, fcInfinity):
1381 case convolve(fcZero, fcZero):
1385 case convolve(fcNormal, fcNormal):
1392 APFloat::changeSign()
1394 /* Look mummy, this one's easy. */
1399 APFloat::clearSign()
1401 /* So is this one. */
1406 APFloat::copySign(const APFloat &rhs)
1412 /* Normalized addition or subtraction. */
1414 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1419 assertArithmeticOK(*semantics);
1421 fs = addOrSubtractSpecials(rhs, subtract);
1423 /* This return code means it was not a simple case. */
1424 if(fs == opDivByZero) {
1425 lostFraction lost_fraction;
1427 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1428 fs = normalize(rounding_mode, lost_fraction);
1430 /* Can only be zero if we lost no fraction. */
1431 assert(category != fcZero || lost_fraction == lfExactlyZero);
1434 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1435 positive zero unless rounding to minus infinity, except that
1436 adding two like-signed zeroes gives that zero. */
1437 if(category == fcZero) {
1438 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1439 sign = (rounding_mode == rmTowardNegative);
1445 /* Normalized addition. */
1447 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1449 return addOrSubtract(rhs, rounding_mode, false);
1452 /* Normalized subtraction. */
1454 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1456 return addOrSubtract(rhs, rounding_mode, true);
1459 /* Normalized multiply. */
1461 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1465 assertArithmeticOK(*semantics);
1467 fs = multiplySpecials(rhs);
1469 if(category == fcNormal) {
1470 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1471 fs = normalize(rounding_mode, lost_fraction);
1472 if(lost_fraction != lfExactlyZero)
1473 fs = (opStatus) (fs | opInexact);
1479 /* Normalized divide. */
1481 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1485 assertArithmeticOK(*semantics);
1487 fs = divideSpecials(rhs);
1489 if(category == fcNormal) {
1490 lostFraction lost_fraction = divideSignificand(rhs);
1491 fs = normalize(rounding_mode, lost_fraction);
1492 if(lost_fraction != lfExactlyZero)
1493 fs = (opStatus) (fs | opInexact);
1499 /* Normalized remainder. This is not currently doing TRT. */
1501 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1505 unsigned int origSign = sign;
1507 assertArithmeticOK(*semantics);
1508 fs = V.divide(rhs, rmNearestTiesToEven);
1509 if (fs == opDivByZero)
1512 int parts = partCount();
1513 integerPart *x = new integerPart[parts];
1514 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1515 rmNearestTiesToEven);
1516 if (fs==opInvalidOp)
1519 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1520 rmNearestTiesToEven);
1521 assert(fs==opOK); // should always work
1523 fs = V.multiply(rhs, rounding_mode);
1524 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1526 fs = subtract(V, rounding_mode);
1527 assert(fs==opOK || fs==opInexact); // likewise
1530 sign = origSign; // IEEE754 requires this
1535 /* Normalized fused-multiply-add. */
1537 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1538 const APFloat &addend,
1539 roundingMode rounding_mode)
1543 assertArithmeticOK(*semantics);
1545 /* Post-multiplication sign, before addition. */
1546 sign ^= multiplicand.sign;
1548 /* If and only if all arguments are normal do we need to do an
1549 extended-precision calculation. */
1550 if(category == fcNormal
1551 && multiplicand.category == fcNormal
1552 && addend.category == fcNormal) {
1553 lostFraction lost_fraction;
1555 lost_fraction = multiplySignificand(multiplicand, &addend);
1556 fs = normalize(rounding_mode, lost_fraction);
1557 if(lost_fraction != lfExactlyZero)
1558 fs = (opStatus) (fs | opInexact);
1560 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1561 positive zero unless rounding to minus infinity, except that
1562 adding two like-signed zeroes gives that zero. */
1563 if(category == fcZero && sign != addend.sign)
1564 sign = (rounding_mode == rmTowardNegative);
1566 fs = multiplySpecials(multiplicand);
1568 /* FS can only be opOK or opInvalidOp. There is no more work
1569 to do in the latter case. The IEEE-754R standard says it is
1570 implementation-defined in this case whether, if ADDEND is a
1571 quiet NaN, we raise invalid op; this implementation does so.
1573 If we need to do the addition we can do so with normal
1576 fs = addOrSubtract(addend, rounding_mode, false);
1582 /* Comparison requires normalized numbers. */
1584 APFloat::compare(const APFloat &rhs) const
1588 assertArithmeticOK(*semantics);
1589 assert(semantics == rhs.semantics);
1591 switch(convolve(category, rhs.category)) {
1595 case convolve(fcNaN, fcZero):
1596 case convolve(fcNaN, fcNormal):
1597 case convolve(fcNaN, fcInfinity):
1598 case convolve(fcNaN, fcNaN):
1599 case convolve(fcZero, fcNaN):
1600 case convolve(fcNormal, fcNaN):
1601 case convolve(fcInfinity, fcNaN):
1602 return cmpUnordered;
1604 case convolve(fcInfinity, fcNormal):
1605 case convolve(fcInfinity, fcZero):
1606 case convolve(fcNormal, fcZero):
1610 return cmpGreaterThan;
1612 case convolve(fcNormal, fcInfinity):
1613 case convolve(fcZero, fcInfinity):
1614 case convolve(fcZero, fcNormal):
1616 return cmpGreaterThan;
1620 case convolve(fcInfinity, fcInfinity):
1621 if(sign == rhs.sign)
1626 return cmpGreaterThan;
1628 case convolve(fcZero, fcZero):
1631 case convolve(fcNormal, fcNormal):
1635 /* Two normal numbers. Do they have the same sign? */
1636 if(sign != rhs.sign) {
1638 result = cmpLessThan;
1640 result = cmpGreaterThan;
1642 /* Compare absolute values; invert result if negative. */
1643 result = compareAbsoluteValue(rhs);
1646 if(result == cmpLessThan)
1647 result = cmpGreaterThan;
1648 else if(result == cmpGreaterThan)
1649 result = cmpLessThan;
1657 APFloat::convert(const fltSemantics &toSemantics,
1658 roundingMode rounding_mode)
1660 lostFraction lostFraction;
1661 unsigned int newPartCount, oldPartCount;
1664 assertArithmeticOK(*semantics);
1665 lostFraction = lfExactlyZero;
1666 newPartCount = partCountForBits(toSemantics.precision + 1);
1667 oldPartCount = partCount();
1669 /* Handle storage complications. If our new form is wider,
1670 re-allocate our bit pattern into wider storage. If it is
1671 narrower, we ignore the excess parts, but if narrowing to a
1672 single part we need to free the old storage.
1673 Be careful not to reference significandParts for zeroes
1674 and infinities, since it aborts. */
1675 if (newPartCount > oldPartCount) {
1676 integerPart *newParts;
1677 newParts = new integerPart[newPartCount];
1678 APInt::tcSet(newParts, 0, newPartCount);
1679 if (category==fcNormal || category==fcNaN)
1680 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1682 significand.parts = newParts;
1683 } else if (newPartCount < oldPartCount) {
1684 /* Capture any lost fraction through truncation of parts so we get
1685 correct rounding whilst normalizing. */
1686 if (category==fcNormal)
1687 lostFraction = lostFractionThroughTruncation
1688 (significandParts(), oldPartCount, toSemantics.precision);
1689 if (newPartCount == 1) {
1690 integerPart newPart = 0;
1691 if (category==fcNormal || category==fcNaN)
1692 newPart = significandParts()[0];
1694 significand.part = newPart;
1698 if(category == fcNormal) {
1699 /* Re-interpret our bit-pattern. */
1700 exponent += toSemantics.precision - semantics->precision;
1701 semantics = &toSemantics;
1702 fs = normalize(rounding_mode, lostFraction);
1703 } else if (category == fcNaN) {
1704 int shift = toSemantics.precision - semantics->precision;
1705 // No normalization here, just truncate
1707 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1709 APInt::tcShiftRight(significandParts(), newPartCount, -shift);
1710 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1711 // does not give you back the same bits. This is dubious, and we
1712 // don't currently do it. You're really supposed to get
1713 // an invalid operation signal at runtime, but nobody does that.
1714 semantics = &toSemantics;
1717 semantics = &toSemantics;
1724 /* Convert a floating point number to an integer according to the
1725 rounding mode. If the rounded integer value is out of range this
1726 returns an invalid operation exception. If the rounded value is in
1727 range but the floating point number is not the exact integer, the C
1728 standard doesn't require an inexact exception to be raised. IEEE
1729 854 does require it so we do that.
1731 Note that for conversions to integer type the C standard requires
1732 round-to-zero to always be used. */
1734 APFloat::convertToInteger(integerPart *parts, unsigned int width,
1736 roundingMode rounding_mode) const
1738 lostFraction lost_fraction;
1739 unsigned int msb, partsCount;
1742 assertArithmeticOK(*semantics);
1743 partsCount = partCountForBits(width);
1745 /* Handle the three special cases first. We produce
1746 a deterministic result even for the Invalid cases. */
1747 if (category == fcNaN) {
1748 // Neither sign nor isSigned affects this.
1749 APInt::tcSet(parts, 0, partsCount);
1752 if (category == fcInfinity) {
1753 if (!sign && isSigned)
1754 APInt::tcSetLeastSignificantBits(parts, partsCount, width-1);
1755 else if (!sign && !isSigned)
1756 APInt::tcSetLeastSignificantBits(parts, partsCount, width);
1757 else if (sign && isSigned) {
1758 APInt::tcSetLeastSignificantBits(parts, partsCount, 1);
1759 APInt::tcShiftLeft(parts, partsCount, width-1);
1760 } else // sign && !isSigned
1761 APInt::tcSet(parts, 0, partsCount);
1764 if (category == fcZero) {
1765 APInt::tcSet(parts, 0, partsCount);
1769 /* Shift the bit pattern so the fraction is lost. */
1772 bits = (int) semantics->precision - 1 - exponent;
1775 lost_fraction = tmp.shiftSignificandRight(bits);
1777 if ((unsigned) -bits >= semantics->precision) {
1778 // Unrepresentably large.
1779 if (!sign && isSigned)
1780 APInt::tcSetLeastSignificantBits(parts, partsCount, width-1);
1781 else if (!sign && !isSigned)
1782 APInt::tcSetLeastSignificantBits(parts, partsCount, width);
1783 else if (sign && isSigned) {
1784 APInt::tcSetLeastSignificantBits(parts, partsCount, 1);
1785 APInt::tcShiftLeft(parts, partsCount, width-1);
1786 } else // sign && !isSigned
1787 APInt::tcSet(parts, 0, partsCount);
1788 return (opStatus)(opOverflow | opInexact);
1790 tmp.shiftSignificandLeft(-bits);
1791 lost_fraction = lfExactlyZero;
1794 if(lost_fraction != lfExactlyZero
1795 && tmp.roundAwayFromZero(rounding_mode, lost_fraction, 0))
1796 tmp.incrementSignificand();
1798 msb = tmp.significandMSB();
1800 /* Negative numbers cannot be represented as unsigned. */
1801 if(!isSigned && tmp.sign && msb != -1U)
1804 /* It takes exponent + 1 bits to represent the truncated floating
1805 point number without its sign. We lose a bit for the sign, but
1806 the maximally negative integer is a special case. */
1807 if(msb + 1 > width) /* !! Not same as msb >= width !! */
1810 if(isSigned && msb + 1 == width
1811 && (!tmp.sign || tmp.significandLSB() != msb))
1814 APInt::tcAssign(parts, tmp.significandParts(), partsCount);
1817 APInt::tcNegate(parts, partsCount);
1819 if(lost_fraction == lfExactlyZero)
1825 /* Convert an unsigned integer SRC to a floating point number,
1826 rounding according to ROUNDING_MODE. The sign of the floating
1827 point number is not modified. */
1829 APFloat::convertFromUnsignedParts(const integerPart *src,
1830 unsigned int srcCount,
1831 roundingMode rounding_mode)
1833 unsigned int omsb, precision, dstCount;
1835 lostFraction lost_fraction;
1837 assertArithmeticOK(*semantics);
1838 category = fcNormal;
1839 omsb = APInt::tcMSB(src, srcCount) + 1;
1840 dst = significandParts();
1841 dstCount = partCount();
1842 precision = semantics->precision;
1844 /* We want the most significant PRECISON bits of SRC. There may not
1845 be that many; extract what we can. */
1846 if (precision <= omsb) {
1847 exponent = omsb - 1;
1848 lost_fraction = lostFractionThroughTruncation(src, srcCount,
1850 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
1852 exponent = precision - 1;
1853 lost_fraction = lfExactlyZero;
1854 APInt::tcExtract(dst, dstCount, src, omsb, 0);
1857 return normalize(rounding_mode, lost_fraction);
1860 /* Convert a two's complement integer SRC to a floating point number,
1861 rounding according to ROUNDING_MODE. ISSIGNED is true if the
1862 integer is signed, in which case it must be sign-extended. */
1864 APFloat::convertFromSignExtendedInteger(const integerPart *src,
1865 unsigned int srcCount,
1867 roundingMode rounding_mode)
1871 assertArithmeticOK(*semantics);
1873 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
1876 /* If we're signed and negative negate a copy. */
1878 copy = new integerPart[srcCount];
1879 APInt::tcAssign(copy, src, srcCount);
1880 APInt::tcNegate(copy, srcCount);
1881 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
1885 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
1891 /* FIXME: should this just take a const APInt reference? */
1893 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
1894 unsigned int width, bool isSigned,
1895 roundingMode rounding_mode)
1897 unsigned int partCount = partCountForBits(width);
1898 APInt api = APInt(width, partCount, parts);
1901 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
1906 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
1910 APFloat::convertFromHexadecimalString(const char *p,
1911 roundingMode rounding_mode)
1913 lostFraction lost_fraction;
1914 integerPart *significand;
1915 unsigned int bitPos, partsCount;
1916 const char *dot, *firstSignificantDigit;
1920 category = fcNormal;
1922 significand = significandParts();
1923 partsCount = partCount();
1924 bitPos = partsCount * integerPartWidth;
1926 /* Skip leading zeroes and any (hexa)decimal point. */
1927 p = skipLeadingZeroesAndAnyDot(p, &dot);
1928 firstSignificantDigit = p;
1931 integerPart hex_value;
1938 hex_value = hexDigitValue(*p);
1939 if(hex_value == -1U) {
1940 lost_fraction = lfExactlyZero;
1946 /* Store the number whilst 4-bit nibbles remain. */
1949 hex_value <<= bitPos % integerPartWidth;
1950 significand[bitPos / integerPartWidth] |= hex_value;
1952 lost_fraction = trailingHexadecimalFraction(p, hex_value);
1953 while(hexDigitValue(*p) != -1U)
1959 /* Hex floats require an exponent but not a hexadecimal point. */
1960 assert(*p == 'p' || *p == 'P');
1962 /* Ignore the exponent if we are zero. */
1963 if(p != firstSignificantDigit) {
1966 /* Implicit hexadecimal point? */
1970 /* Calculate the exponent adjustment implicit in the number of
1971 significant digits. */
1972 expAdjustment = dot - firstSignificantDigit;
1973 if(expAdjustment < 0)
1975 expAdjustment = expAdjustment * 4 - 1;
1977 /* Adjust for writing the significand starting at the most
1978 significant nibble. */
1979 expAdjustment += semantics->precision;
1980 expAdjustment -= partsCount * integerPartWidth;
1982 /* Adjust for the given exponent. */
1983 exponent = totalExponent(p, expAdjustment);
1986 return normalize(rounding_mode, lost_fraction);
1990 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
1991 unsigned sigPartCount, int exp,
1992 roundingMode rounding_mode)
1994 unsigned int parts, pow5PartCount;
1995 fltSemantics calcSemantics = { 32767, -32767, 0, true };
1996 integerPart pow5Parts[maxPowerOfFiveParts];
1999 isNearest = (rounding_mode == rmNearestTiesToEven
2000 || rounding_mode == rmNearestTiesToAway);
2002 parts = partCountForBits(semantics->precision + 11);
2004 /* Calculate pow(5, abs(exp)). */
2005 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2007 for (;; parts *= 2) {
2008 opStatus sigStatus, powStatus;
2009 unsigned int excessPrecision, truncatedBits;
2011 calcSemantics.precision = parts * integerPartWidth - 1;
2012 excessPrecision = calcSemantics.precision - semantics->precision;
2013 truncatedBits = excessPrecision;
2015 APFloat decSig(calcSemantics, fcZero, sign);
2016 APFloat pow5(calcSemantics, fcZero, false);
2018 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2019 rmNearestTiesToEven);
2020 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2021 rmNearestTiesToEven);
2022 /* Add exp, as 10^n = 5^n * 2^n. */
2023 decSig.exponent += exp;
2025 lostFraction calcLostFraction;
2026 integerPart HUerr, HUdistance, powHUerr;
2029 /* multiplySignificand leaves the precision-th bit set to 1. */
2030 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2031 powHUerr = powStatus != opOK;
2033 calcLostFraction = decSig.divideSignificand(pow5);
2034 /* Denormal numbers have less precision. */
2035 if (decSig.exponent < semantics->minExponent) {
2036 excessPrecision += (semantics->minExponent - decSig.exponent);
2037 truncatedBits = excessPrecision;
2038 if (excessPrecision > calcSemantics.precision)
2039 excessPrecision = calcSemantics.precision;
2041 /* Extra half-ulp lost in reciprocal of exponent. */
2042 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0: 2;
2045 /* Both multiplySignificand and divideSignificand return the
2046 result with the integer bit set. */
2047 assert (APInt::tcExtractBit
2048 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2050 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2052 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2053 excessPrecision, isNearest);
2055 /* Are we guaranteed to round correctly if we truncate? */
2056 if (HUdistance >= HUerr) {
2057 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2058 calcSemantics.precision - excessPrecision,
2060 /* Take the exponent of decSig. If we tcExtract-ed less bits
2061 above we must adjust our exponent to compensate for the
2062 implicit right shift. */
2063 exponent = (decSig.exponent + semantics->precision
2064 - (calcSemantics.precision - excessPrecision));
2065 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2068 return normalize(rounding_mode, calcLostFraction);
2074 APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode)
2079 /* Scan the text. */
2080 interpretDecimal(p, &D);
2082 if (*D.firstSigDigit == '0') {
2086 integerPart *decSignificand;
2087 unsigned int partCount;
2089 /* A tight upper bound on number of bits required to hold an
2090 N-digit decimal integer is N * 256 / 77. Allocate enough space
2091 to hold the full significand, and an extra part required by
2093 partCount = (D.lastSigDigit - D.firstSigDigit) + 1;
2094 partCount = partCountForBits(1 + 256 * partCount / 77);
2095 decSignificand = new integerPart[partCount + 1];
2098 /* Convert to binary efficiently - we do almost all multiplication
2099 in an integerPart. When this would overflow do we do a single
2100 bignum multiplication, and then revert again to multiplication
2101 in an integerPart. */
2103 integerPart decValue, val, multiplier;
2112 decValue = decDigitValue(*p++);
2114 val = val * 10 + decValue;
2115 /* The maximum number that can be multiplied by ten with any
2116 digit added without overflowing an integerPart. */
2117 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2119 /* Multiply out the current part. */
2120 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2121 partCount, partCount + 1, false);
2123 /* If we used another part (likely but not guaranteed), increase
2125 if (decSignificand[partCount])
2127 } while (p <= D.lastSigDigit);
2129 category = fcNormal;
2130 fs = roundSignificandWithExponent(decSignificand, partCount,
2131 D.exponent, rounding_mode);
2133 delete [] decSignificand;
2140 APFloat::convertFromString(const char *p, roundingMode rounding_mode)
2142 assertArithmeticOK(*semantics);
2144 /* Handle a leading minus sign. */
2150 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
2151 return convertFromHexadecimalString(p + 2, rounding_mode);
2153 return convertFromDecimalString(p, rounding_mode);
2156 /* Write out a hexadecimal representation of the floating point value
2157 to DST, which must be of sufficient size, in the C99 form
2158 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2159 excluding the terminating NUL.
2161 If UPPERCASE, the output is in upper case, otherwise in lower case.
2163 HEXDIGITS digits appear altogether, rounding the value if
2164 necessary. If HEXDIGITS is 0, the minimal precision to display the
2165 number precisely is used instead. If nothing would appear after
2166 the decimal point it is suppressed.
2168 The decimal exponent is always printed and has at least one digit.
2169 Zero values display an exponent of zero. Infinities and NaNs
2170 appear as "infinity" or "nan" respectively.
2172 The above rules are as specified by C99. There is ambiguity about
2173 what the leading hexadecimal digit should be. This implementation
2174 uses whatever is necessary so that the exponent is displayed as
2175 stored. This implies the exponent will fall within the IEEE format
2176 range, and the leading hexadecimal digit will be 0 (for denormals),
2177 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2178 any other digits zero).
2181 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2182 bool upperCase, roundingMode rounding_mode) const
2186 assertArithmeticOK(*semantics);
2194 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2195 dst += sizeof infinityL - 1;
2199 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2200 dst += sizeof NaNU - 1;
2205 *dst++ = upperCase ? 'X': 'x';
2207 if (hexDigits > 1) {
2209 memset (dst, '0', hexDigits - 1);
2210 dst += hexDigits - 1;
2212 *dst++ = upperCase ? 'P': 'p';
2217 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2226 /* Does the hard work of outputting the correctly rounded hexadecimal
2227 form of a normal floating point number with the specified number of
2228 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2229 digits necessary to print the value precisely is output. */
2231 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2233 roundingMode rounding_mode) const
2235 unsigned int count, valueBits, shift, partsCount, outputDigits;
2236 const char *hexDigitChars;
2237 const integerPart *significand;
2242 *dst++ = upperCase ? 'X': 'x';
2245 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2247 significand = significandParts();
2248 partsCount = partCount();
2250 /* +3 because the first digit only uses the single integer bit, so
2251 we have 3 virtual zero most-significant-bits. */
2252 valueBits = semantics->precision + 3;
2253 shift = integerPartWidth - valueBits % integerPartWidth;
2255 /* The natural number of digits required ignoring trailing
2256 insignificant zeroes. */
2257 outputDigits = (valueBits - significandLSB () + 3) / 4;
2259 /* hexDigits of zero means use the required number for the
2260 precision. Otherwise, see if we are truncating. If we are,
2261 find out if we need to round away from zero. */
2263 if (hexDigits < outputDigits) {
2264 /* We are dropping non-zero bits, so need to check how to round.
2265 "bits" is the number of dropped bits. */
2267 lostFraction fraction;
2269 bits = valueBits - hexDigits * 4;
2270 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2271 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2273 outputDigits = hexDigits;
2276 /* Write the digits consecutively, and start writing in the location
2277 of the hexadecimal point. We move the most significant digit
2278 left and add the hexadecimal point later. */
2281 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2283 while (outputDigits && count) {
2286 /* Put the most significant integerPartWidth bits in "part". */
2287 if (--count == partsCount)
2288 part = 0; /* An imaginary higher zero part. */
2290 part = significand[count] << shift;
2293 part |= significand[count - 1] >> (integerPartWidth - shift);
2295 /* Convert as much of "part" to hexdigits as we can. */
2296 unsigned int curDigits = integerPartWidth / 4;
2298 if (curDigits > outputDigits)
2299 curDigits = outputDigits;
2300 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2301 outputDigits -= curDigits;
2307 /* Note that hexDigitChars has a trailing '0'. */
2310 *q = hexDigitChars[hexDigitValue (*q) + 1];
2311 } while (*q == '0');
2314 /* Add trailing zeroes. */
2315 memset (dst, '0', outputDigits);
2316 dst += outputDigits;
2319 /* Move the most significant digit to before the point, and if there
2320 is something after the decimal point add it. This must come
2321 after rounding above. */
2328 /* Finally output the exponent. */
2329 *dst++ = upperCase ? 'P': 'p';
2331 return writeSignedDecimal (dst, exponent);
2334 // For good performance it is desirable for different APFloats
2335 // to produce different integers.
2337 APFloat::getHashValue() const
2339 if (category==fcZero) return sign<<8 | semantics->precision ;
2340 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2341 else if (category==fcNaN) return 1<<10 | semantics->precision;
2343 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2344 const integerPart* p = significandParts();
2345 for (int i=partCount(); i>0; i--, p++)
2346 hash ^= ((uint32_t)*p) ^ (*p)>>32;
2351 // Conversion from APFloat to/from host float/double. It may eventually be
2352 // possible to eliminate these and have everybody deal with APFloats, but that
2353 // will take a while. This approach will not easily extend to long double.
2354 // Current implementation requires integerPartWidth==64, which is correct at
2355 // the moment but could be made more general.
2357 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2358 // the actual IEEE respresentations. We compensate for that here.
2361 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2363 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended);
2364 assert (partCount()==2);
2366 uint64_t myexponent, mysignificand;
2368 if (category==fcNormal) {
2369 myexponent = exponent+16383; //bias
2370 mysignificand = significandParts()[0];
2371 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2372 myexponent = 0; // denormal
2373 } else if (category==fcZero) {
2376 } else if (category==fcInfinity) {
2377 myexponent = 0x7fff;
2378 mysignificand = 0x8000000000000000ULL;
2380 assert(category == fcNaN && "Unknown category");
2381 myexponent = 0x7fff;
2382 mysignificand = significandParts()[0];
2386 words[0] = (((uint64_t)sign & 1) << 63) |
2387 ((myexponent & 0x7fff) << 48) |
2388 ((mysignificand >>16) & 0xffffffffffffLL);
2389 words[1] = mysignificand & 0xffff;
2390 return APInt(80, 2, words);
2394 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2396 assert(semantics == (const llvm::fltSemantics* const)&PPCDoubleDouble);
2397 assert (partCount()==2);
2399 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2401 if (category==fcNormal) {
2402 myexponent = exponent + 1023; //bias
2403 myexponent2 = exponent2 + 1023;
2404 mysignificand = significandParts()[0];
2405 mysignificand2 = significandParts()[1];
2406 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2407 myexponent = 0; // denormal
2408 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2409 myexponent2 = 0; // denormal
2410 } else if (category==fcZero) {
2415 } else if (category==fcInfinity) {
2421 assert(category == fcNaN && "Unknown category");
2423 mysignificand = significandParts()[0];
2424 myexponent2 = exponent2;
2425 mysignificand2 = significandParts()[1];
2429 words[0] = (((uint64_t)sign & 1) << 63) |
2430 ((myexponent & 0x7ff) << 52) |
2431 (mysignificand & 0xfffffffffffffLL);
2432 words[1] = (((uint64_t)sign2 & 1) << 63) |
2433 ((myexponent2 & 0x7ff) << 52) |
2434 (mysignificand2 & 0xfffffffffffffLL);
2435 return APInt(128, 2, words);
2439 APFloat::convertDoubleAPFloatToAPInt() const
2441 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2442 assert (partCount()==1);
2444 uint64_t myexponent, mysignificand;
2446 if (category==fcNormal) {
2447 myexponent = exponent+1023; //bias
2448 mysignificand = *significandParts();
2449 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2450 myexponent = 0; // denormal
2451 } else if (category==fcZero) {
2454 } else if (category==fcInfinity) {
2458 assert(category == fcNaN && "Unknown category!");
2460 mysignificand = *significandParts();
2463 return APInt(64, (((((uint64_t)sign & 1) << 63) |
2464 ((myexponent & 0x7ff) << 52) |
2465 (mysignificand & 0xfffffffffffffLL))));
2469 APFloat::convertFloatAPFloatToAPInt() const
2471 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2472 assert (partCount()==1);
2474 uint32_t myexponent, mysignificand;
2476 if (category==fcNormal) {
2477 myexponent = exponent+127; //bias
2478 mysignificand = *significandParts();
2479 if (myexponent == 1 && !(mysignificand & 0x400000))
2480 myexponent = 0; // denormal
2481 } else if (category==fcZero) {
2484 } else if (category==fcInfinity) {
2488 assert(category == fcNaN && "Unknown category!");
2490 mysignificand = *significandParts();
2493 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2494 (mysignificand & 0x7fffff)));
2497 // This function creates an APInt that is just a bit map of the floating
2498 // point constant as it would appear in memory. It is not a conversion,
2499 // and treating the result as a normal integer is unlikely to be useful.
2502 APFloat::convertToAPInt() const
2504 if (semantics == (const llvm::fltSemantics* const)&IEEEsingle)
2505 return convertFloatAPFloatToAPInt();
2507 if (semantics == (const llvm::fltSemantics* const)&IEEEdouble)
2508 return convertDoubleAPFloatToAPInt();
2510 if (semantics == (const llvm::fltSemantics* const)&PPCDoubleDouble)
2511 return convertPPCDoubleDoubleAPFloatToAPInt();
2513 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended &&
2515 return convertF80LongDoubleAPFloatToAPInt();
2519 APFloat::convertToFloat() const
2521 assert(semantics == (const llvm::fltSemantics* const)&IEEEsingle);
2522 APInt api = convertToAPInt();
2523 return api.bitsToFloat();
2527 APFloat::convertToDouble() const
2529 assert(semantics == (const llvm::fltSemantics* const)&IEEEdouble);
2530 APInt api = convertToAPInt();
2531 return api.bitsToDouble();
2534 /// Integer bit is explicit in this format. Current Intel book does not
2535 /// define meaning of:
2536 /// exponent = all 1's, integer bit not set.
2537 /// exponent = 0, integer bit set. (formerly "psuedodenormals")
2538 /// exponent!=0 nor all 1's, integer bit not set. (formerly "unnormals")
2540 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2542 assert(api.getBitWidth()==80);
2543 uint64_t i1 = api.getRawData()[0];
2544 uint64_t i2 = api.getRawData()[1];
2545 uint64_t myexponent = (i1 >> 48) & 0x7fff;
2546 uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) |
2549 initialize(&APFloat::x87DoubleExtended);
2550 assert(partCount()==2);
2553 if (myexponent==0 && mysignificand==0) {
2554 // exponent, significand meaningless
2556 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2557 // exponent, significand meaningless
2558 category = fcInfinity;
2559 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2560 // exponent meaningless
2562 significandParts()[0] = mysignificand;
2563 significandParts()[1] = 0;
2565 category = fcNormal;
2566 exponent = myexponent - 16383;
2567 significandParts()[0] = mysignificand;
2568 significandParts()[1] = 0;
2569 if (myexponent==0) // denormal
2575 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
2577 assert(api.getBitWidth()==128);
2578 uint64_t i1 = api.getRawData()[0];
2579 uint64_t i2 = api.getRawData()[1];
2580 uint64_t myexponent = (i1 >> 52) & 0x7ff;
2581 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
2582 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
2583 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
2585 initialize(&APFloat::PPCDoubleDouble);
2586 assert(partCount()==2);
2590 if (myexponent==0 && mysignificand==0) {
2591 // exponent, significand meaningless
2592 // exponent2 and significand2 are required to be 0; we don't check
2594 } else if (myexponent==0x7ff && mysignificand==0) {
2595 // exponent, significand meaningless
2596 // exponent2 and significand2 are required to be 0; we don't check
2597 category = fcInfinity;
2598 } else if (myexponent==0x7ff && mysignificand!=0) {
2599 // exponent meaningless. So is the whole second word, but keep it
2602 exponent2 = myexponent2;
2603 significandParts()[0] = mysignificand;
2604 significandParts()[1] = mysignificand2;
2606 category = fcNormal;
2607 // Note there is no category2; the second word is treated as if it is
2608 // fcNormal, although it might be something else considered by itself.
2609 exponent = myexponent - 1023;
2610 exponent2 = myexponent2 - 1023;
2611 significandParts()[0] = mysignificand;
2612 significandParts()[1] = mysignificand2;
2613 if (myexponent==0) // denormal
2616 significandParts()[0] |= 0x10000000000000LL; // integer bit
2620 significandParts()[1] |= 0x10000000000000LL; // integer bit
2625 APFloat::initFromDoubleAPInt(const APInt &api)
2627 assert(api.getBitWidth()==64);
2628 uint64_t i = *api.getRawData();
2629 uint64_t myexponent = (i >> 52) & 0x7ff;
2630 uint64_t mysignificand = i & 0xfffffffffffffLL;
2632 initialize(&APFloat::IEEEdouble);
2633 assert(partCount()==1);
2636 if (myexponent==0 && mysignificand==0) {
2637 // exponent, significand meaningless
2639 } else if (myexponent==0x7ff && mysignificand==0) {
2640 // exponent, significand meaningless
2641 category = fcInfinity;
2642 } else if (myexponent==0x7ff && mysignificand!=0) {
2643 // exponent meaningless
2645 *significandParts() = mysignificand;
2647 category = fcNormal;
2648 exponent = myexponent - 1023;
2649 *significandParts() = mysignificand;
2650 if (myexponent==0) // denormal
2653 *significandParts() |= 0x10000000000000LL; // integer bit
2658 APFloat::initFromFloatAPInt(const APInt & api)
2660 assert(api.getBitWidth()==32);
2661 uint32_t i = (uint32_t)*api.getRawData();
2662 uint32_t myexponent = (i >> 23) & 0xff;
2663 uint32_t mysignificand = i & 0x7fffff;
2665 initialize(&APFloat::IEEEsingle);
2666 assert(partCount()==1);
2669 if (myexponent==0 && mysignificand==0) {
2670 // exponent, significand meaningless
2672 } else if (myexponent==0xff && mysignificand==0) {
2673 // exponent, significand meaningless
2674 category = fcInfinity;
2675 } else if (myexponent==0xff && mysignificand!=0) {
2676 // sign, exponent, significand meaningless
2678 *significandParts() = mysignificand;
2680 category = fcNormal;
2681 exponent = myexponent - 127; //bias
2682 *significandParts() = mysignificand;
2683 if (myexponent==0) // denormal
2686 *significandParts() |= 0x800000; // integer bit
2690 /// Treat api as containing the bits of a floating point number. Currently
2691 /// we infer the floating point type from the size of the APInt. The
2692 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
2693 /// when the size is anything else).
2695 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
2697 if (api.getBitWidth() == 32)
2698 return initFromFloatAPInt(api);
2699 else if (api.getBitWidth()==64)
2700 return initFromDoubleAPInt(api);
2701 else if (api.getBitWidth()==80)
2702 return initFromF80LongDoubleAPInt(api);
2703 else if (api.getBitWidth()==128 && !isIEEE)
2704 return initFromPPCDoubleDoubleAPInt(api);
2709 APFloat::APFloat(const APInt& api, bool isIEEE)
2711 initFromAPInt(api, isIEEE);
2714 APFloat::APFloat(float f)
2716 APInt api = APInt(32, 0);
2717 initFromAPInt(api.floatToBits(f));
2720 APFloat::APFloat(double d)
2722 APInt api = APInt(64, 0);
2723 initFromAPInt(api.doubleToBits(d));