1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Neil Booth and is distributed under the
6 // University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
17 #include "llvm/ADT/APFloat.h"
18 #include "llvm/Support/MathExtras.h"
22 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
24 /* Assumed in hexadecimal significand parsing, and conversion to
25 hexadecimal strings. */
26 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
30 /* Represents floating point arithmetic semantics. */
32 /* The largest E such that 2^E is representable; this matches the
33 definition of IEEE 754. */
34 exponent_t maxExponent;
36 /* The smallest E such that 2^E is a normalized number; this
37 matches the definition of IEEE 754. */
38 exponent_t minExponent;
40 /* Number of bits in the significand. This includes the integer
42 unsigned int precision;
44 /* True if arithmetic is supported. */
45 unsigned int arithmeticOK;
48 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
49 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
50 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
51 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
52 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
54 // The PowerPC format consists of two doubles. It does not map cleanly
55 // onto the usual format above. For now only storage of constants of
56 // this type is supported, no arithmetic.
57 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
59 /* A tight upper bound on number of parts required to hold the value
62 power * 815 / (351 * integerPartWidth) + 1
64 However, whilst the result may require only this many parts,
65 because we are multiplying two values to get it, the
66 multiplication may require an extra part with the excess part
67 being zero (consider the trivial case of 1 * 1, tcFullMultiply
68 requires two parts to hold the single-part result). So we add an
69 extra one to guarantee enough space whilst multiplying. */
70 const unsigned int maxExponent = 16383;
71 const unsigned int maxPrecision = 113;
72 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
73 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
74 / (351 * integerPartWidth));
77 /* Put a bunch of private, handy routines in an anonymous namespace. */
81 partCountForBits(unsigned int bits)
83 return ((bits) + integerPartWidth - 1) / integerPartWidth;
86 /* Returns 0U-9U. Return values >= 10U are not digits. */
88 decDigitValue(unsigned int c)
94 hexDigitValue(unsigned int c)
114 assertArithmeticOK(const llvm::fltSemantics &semantics) {
115 assert(semantics.arithmeticOK
116 && "Compile-time arithmetic does not support these semantics");
119 /* Return the value of a decimal exponent of the form
122 If the exponent overflows, returns a large exponent with the
125 readExponent(const char *p)
128 unsigned int absExponent;
129 const unsigned int overlargeExponent = 24000; /* FIXME. */
131 isNegative = (*p == '-');
132 if (*p == '-' || *p == '+')
135 absExponent = decDigitValue(*p++);
136 assert (absExponent < 10U);
141 value = decDigitValue(*p);
146 value += absExponent * 10;
147 if (absExponent >= overlargeExponent) {
148 absExponent = overlargeExponent;
155 return -(int) absExponent;
157 return (int) absExponent;
160 /* This is ugly and needs cleaning up, but I don't immediately see
161 how whilst remaining safe. */
163 totalExponent(const char *p, int exponentAdjustment)
165 integerPart unsignedExponent;
166 bool negative, overflow;
169 /* Move past the exponent letter and sign to the digits. */
171 negative = *p == '-';
172 if(*p == '-' || *p == '+')
175 unsignedExponent = 0;
180 value = decDigitValue(*p);
185 unsignedExponent = unsignedExponent * 10 + value;
186 if(unsignedExponent > 65535)
190 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
194 exponent = unsignedExponent;
196 exponent = -exponent;
197 exponent += exponentAdjustment;
198 if(exponent > 65535 || exponent < -65536)
203 exponent = negative ? -65536: 65535;
209 skipLeadingZeroesAndAnyDot(const char *p, const char **dot)
224 /* Given a normal decimal floating point number of the form
228 where the decimal point and exponent are optional, fill out the
229 structure D. Exponent is appropriate if the significand is
230 treated as an integer, and normalizedExponent if the significand
231 is taken to have the decimal point after a single leading
234 If the value is zero, V->firstSigDigit points to a zero, and the
235 return exponent is zero.
238 const char *firstSigDigit;
239 const char *lastSigDigit;
241 int normalizedExponent;
245 interpretDecimal(const char *p, decimalInfo *D)
249 p = skipLeadingZeroesAndAnyDot (p, &dot);
251 D->firstSigDigit = p;
253 D->normalizedExponent = 0;
260 if (decDigitValue(*p) >= 10U)
265 /* If number is all zerooes accept any exponent. */
266 if (p != D->firstSigDigit) {
267 if (*p == 'e' || *p == 'E')
268 D->exponent = readExponent(p + 1);
270 /* Implied decimal point? */
274 /* Drop insignificant trailing zeroes. */
281 /* Adjust the exponents for any decimal point. */
282 D->exponent += (dot - p) - (dot > p);
283 D->normalizedExponent = (D->exponent + (p - D->firstSigDigit)
284 - (dot > D->firstSigDigit && dot < p));
290 /* Return the trailing fraction of a hexadecimal number.
291 DIGITVALUE is the first hex digit of the fraction, P points to
294 trailingHexadecimalFraction(const char *p, unsigned int digitValue)
296 unsigned int hexDigit;
298 /* If the first trailing digit isn't 0 or 8 we can work out the
299 fraction immediately. */
301 return lfMoreThanHalf;
302 else if(digitValue < 8 && digitValue > 0)
303 return lfLessThanHalf;
305 /* Otherwise we need to find the first non-zero digit. */
309 hexDigit = hexDigitValue(*p);
311 /* If we ran off the end it is exactly zero or one-half, otherwise
314 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
316 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
319 /* Return the fraction lost were a bignum truncated losing the least
320 significant BITS bits. */
322 lostFractionThroughTruncation(const integerPart *parts,
323 unsigned int partCount,
328 lsb = APInt::tcLSB(parts, partCount);
330 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
332 return lfExactlyZero;
334 return lfExactlyHalf;
335 if(bits <= partCount * integerPartWidth
336 && APInt::tcExtractBit(parts, bits - 1))
337 return lfMoreThanHalf;
339 return lfLessThanHalf;
342 /* Shift DST right BITS bits noting lost fraction. */
344 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
346 lostFraction lost_fraction;
348 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
350 APInt::tcShiftRight(dst, parts, bits);
352 return lost_fraction;
355 /* Combine the effect of two lost fractions. */
357 combineLostFractions(lostFraction moreSignificant,
358 lostFraction lessSignificant)
360 if(lessSignificant != lfExactlyZero) {
361 if(moreSignificant == lfExactlyZero)
362 moreSignificant = lfLessThanHalf;
363 else if(moreSignificant == lfExactlyHalf)
364 moreSignificant = lfMoreThanHalf;
367 return moreSignificant;
370 /* The error from the true value, in half-ulps, on multiplying two
371 floating point numbers, which differ from the value they
372 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
373 than the returned value.
375 See "How to Read Floating Point Numbers Accurately" by William D
378 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
380 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
382 if (HUerr1 + HUerr2 == 0)
383 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
385 return inexactMultiply + 2 * (HUerr1 + HUerr2);
388 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
389 when the least significant BITS are truncated. BITS cannot be
392 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
394 unsigned int count, partBits;
395 integerPart part, boundary;
400 count = bits / integerPartWidth;
401 partBits = bits % integerPartWidth + 1;
403 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
406 boundary = (integerPart) 1 << (partBits - 1);
411 if (part - boundary <= boundary - part)
412 return part - boundary;
414 return boundary - part;
417 if (part == boundary) {
420 return ~(integerPart) 0; /* A lot. */
423 } else if (part == boundary - 1) {
426 return ~(integerPart) 0; /* A lot. */
431 return ~(integerPart) 0; /* A lot. */
434 /* Place pow(5, power) in DST, and return the number of parts used.
435 DST must be at least one part larger than size of the answer. */
437 powerOf5(integerPart *dst, unsigned int power)
439 static integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
441 static integerPart pow5s[maxPowerOfFiveParts * 2 + 5] = { 78125 * 5 };
442 static unsigned int partsCount[16] = { 1 };
444 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
447 assert(power <= maxExponent);
452 *p1 = firstEightPowers[power & 7];
458 for (unsigned int n = 0; power; power >>= 1, n++) {
463 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
465 pc = partsCount[n - 1];
466 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
468 if (pow5[pc - 1] == 0)
476 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
478 if (p2[result - 1] == 0)
481 /* Now result is in p1 with partsCount parts and p2 is scratch
483 tmp = p1, p1 = p2, p2 = tmp;
490 APInt::tcAssign(dst, p1, result);
495 /* Zero at the end to avoid modular arithmetic when adding one; used
496 when rounding up during hexadecimal output. */
497 static const char hexDigitsLower[] = "0123456789abcdef0";
498 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
499 static const char infinityL[] = "infinity";
500 static const char infinityU[] = "INFINITY";
501 static const char NaNL[] = "nan";
502 static const char NaNU[] = "NAN";
504 /* Write out an integerPart in hexadecimal, starting with the most
505 significant nibble. Write out exactly COUNT hexdigits, return
508 partAsHex (char *dst, integerPart part, unsigned int count,
509 const char *hexDigitChars)
511 unsigned int result = count;
513 assert (count != 0 && count <= integerPartWidth / 4);
515 part >>= (integerPartWidth - 4 * count);
517 dst[count] = hexDigitChars[part & 0xf];
524 /* Write out an unsigned decimal integer. */
526 writeUnsignedDecimal (char *dst, unsigned int n)
542 /* Write out a signed decimal integer. */
544 writeSignedDecimal (char *dst, int value)
548 dst = writeUnsignedDecimal(dst, -(unsigned) value);
550 dst = writeUnsignedDecimal(dst, value);
558 APFloat::initialize(const fltSemantics *ourSemantics)
562 semantics = ourSemantics;
565 significand.parts = new integerPart[count];
569 APFloat::freeSignificand()
572 delete [] significand.parts;
576 APFloat::assign(const APFloat &rhs)
578 assert(semantics == rhs.semantics);
581 category = rhs.category;
582 exponent = rhs.exponent;
584 exponent2 = rhs.exponent2;
585 if(category == fcNormal || category == fcNaN)
586 copySignificand(rhs);
590 APFloat::copySignificand(const APFloat &rhs)
592 assert(category == fcNormal || category == fcNaN);
593 assert(rhs.partCount() >= partCount());
595 APInt::tcAssign(significandParts(), rhs.significandParts(),
599 /* Make this number a NaN, with an arbitrary but deterministic value
600 for the significand. */
602 APFloat::makeNaN(void)
605 APInt::tcSet(significandParts(), ~0U, partCount());
609 APFloat::operator=(const APFloat &rhs)
612 if(semantics != rhs.semantics) {
614 initialize(rhs.semantics);
623 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
626 if (semantics != rhs.semantics ||
627 category != rhs.category ||
630 if (semantics==(const llvm::fltSemantics* const)&PPCDoubleDouble &&
633 if (category==fcZero || category==fcInfinity)
635 else if (category==fcNormal && exponent!=rhs.exponent)
637 else if (semantics==(const llvm::fltSemantics* const)&PPCDoubleDouble &&
638 exponent2!=rhs.exponent2)
642 const integerPart* p=significandParts();
643 const integerPart* q=rhs.significandParts();
644 for (; i>0; i--, p++, q++) {
652 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
654 assertArithmeticOK(ourSemantics);
655 initialize(&ourSemantics);
658 exponent = ourSemantics.precision - 1;
659 significandParts()[0] = value;
660 normalize(rmNearestTiesToEven, lfExactlyZero);
663 APFloat::APFloat(const fltSemantics &ourSemantics,
664 fltCategory ourCategory, bool negative)
666 assertArithmeticOK(ourSemantics);
667 initialize(&ourSemantics);
668 category = ourCategory;
670 if(category == fcNormal)
672 else if (ourCategory == fcNaN)
676 APFloat::APFloat(const fltSemantics &ourSemantics, const char *text)
678 assertArithmeticOK(ourSemantics);
679 initialize(&ourSemantics);
680 convertFromString(text, rmNearestTiesToEven);
683 APFloat::APFloat(const APFloat &rhs)
685 initialize(rhs.semantics);
695 APFloat::partCount() const
697 return partCountForBits(semantics->precision + 1);
701 APFloat::semanticsPrecision(const fltSemantics &semantics)
703 return semantics.precision;
707 APFloat::significandParts() const
709 return const_cast<APFloat *>(this)->significandParts();
713 APFloat::significandParts()
715 assert(category == fcNormal || category == fcNaN);
718 return significand.parts;
720 return &significand.part;
724 APFloat::zeroSignificand()
727 APInt::tcSet(significandParts(), 0, partCount());
730 /* Increment an fcNormal floating point number's significand. */
732 APFloat::incrementSignificand()
736 carry = APInt::tcIncrement(significandParts(), partCount());
738 /* Our callers should never cause us to overflow. */
742 /* Add the significand of the RHS. Returns the carry flag. */
744 APFloat::addSignificand(const APFloat &rhs)
748 parts = significandParts();
750 assert(semantics == rhs.semantics);
751 assert(exponent == rhs.exponent);
753 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
756 /* Subtract the significand of the RHS with a borrow flag. Returns
759 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
763 parts = significandParts();
765 assert(semantics == rhs.semantics);
766 assert(exponent == rhs.exponent);
768 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
772 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
773 on to the full-precision result of the multiplication. Returns the
776 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
778 unsigned int omsb; // One, not zero, based MSB.
779 unsigned int partsCount, newPartsCount, precision;
780 integerPart *lhsSignificand;
781 integerPart scratch[4];
782 integerPart *fullSignificand;
783 lostFraction lost_fraction;
785 assert(semantics == rhs.semantics);
787 precision = semantics->precision;
788 newPartsCount = partCountForBits(precision * 2);
790 if(newPartsCount > 4)
791 fullSignificand = new integerPart[newPartsCount];
793 fullSignificand = scratch;
795 lhsSignificand = significandParts();
796 partsCount = partCount();
798 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
799 rhs.significandParts(), partsCount, partsCount);
801 lost_fraction = lfExactlyZero;
802 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
803 exponent += rhs.exponent;
806 Significand savedSignificand = significand;
807 const fltSemantics *savedSemantics = semantics;
808 fltSemantics extendedSemantics;
810 unsigned int extendedPrecision;
812 /* Normalize our MSB. */
813 extendedPrecision = precision + precision - 1;
814 if(omsb != extendedPrecision)
816 APInt::tcShiftLeft(fullSignificand, newPartsCount,
817 extendedPrecision - omsb);
818 exponent -= extendedPrecision - omsb;
821 /* Create new semantics. */
822 extendedSemantics = *semantics;
823 extendedSemantics.precision = extendedPrecision;
825 if(newPartsCount == 1)
826 significand.part = fullSignificand[0];
828 significand.parts = fullSignificand;
829 semantics = &extendedSemantics;
831 APFloat extendedAddend(*addend);
832 status = extendedAddend.convert(extendedSemantics, rmTowardZero);
833 assert(status == opOK);
834 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
836 /* Restore our state. */
837 if(newPartsCount == 1)
838 fullSignificand[0] = significand.part;
839 significand = savedSignificand;
840 semantics = savedSemantics;
842 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
845 exponent -= (precision - 1);
847 if(omsb > precision) {
848 unsigned int bits, significantParts;
851 bits = omsb - precision;
852 significantParts = partCountForBits(omsb);
853 lf = shiftRight(fullSignificand, significantParts, bits);
854 lost_fraction = combineLostFractions(lf, lost_fraction);
858 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
860 if(newPartsCount > 4)
861 delete [] fullSignificand;
863 return lost_fraction;
866 /* Multiply the significands of LHS and RHS to DST. */
868 APFloat::divideSignificand(const APFloat &rhs)
870 unsigned int bit, i, partsCount;
871 const integerPart *rhsSignificand;
872 integerPart *lhsSignificand, *dividend, *divisor;
873 integerPart scratch[4];
874 lostFraction lost_fraction;
876 assert(semantics == rhs.semantics);
878 lhsSignificand = significandParts();
879 rhsSignificand = rhs.significandParts();
880 partsCount = partCount();
883 dividend = new integerPart[partsCount * 2];
887 divisor = dividend + partsCount;
889 /* Copy the dividend and divisor as they will be modified in-place. */
890 for(i = 0; i < partsCount; i++) {
891 dividend[i] = lhsSignificand[i];
892 divisor[i] = rhsSignificand[i];
893 lhsSignificand[i] = 0;
896 exponent -= rhs.exponent;
898 unsigned int precision = semantics->precision;
900 /* Normalize the divisor. */
901 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
904 APInt::tcShiftLeft(divisor, partsCount, bit);
907 /* Normalize the dividend. */
908 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
911 APInt::tcShiftLeft(dividend, partsCount, bit);
914 /* Ensure the dividend >= divisor initially for the loop below.
915 Incidentally, this means that the division loop below is
916 guaranteed to set the integer bit to one. */
917 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
919 APInt::tcShiftLeft(dividend, partsCount, 1);
920 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
924 for(bit = precision; bit; bit -= 1) {
925 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
926 APInt::tcSubtract(dividend, divisor, 0, partsCount);
927 APInt::tcSetBit(lhsSignificand, bit - 1);
930 APInt::tcShiftLeft(dividend, partsCount, 1);
933 /* Figure out the lost fraction. */
934 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
937 lost_fraction = lfMoreThanHalf;
939 lost_fraction = lfExactlyHalf;
940 else if(APInt::tcIsZero(dividend, partsCount))
941 lost_fraction = lfExactlyZero;
943 lost_fraction = lfLessThanHalf;
948 return lost_fraction;
952 APFloat::significandMSB() const
954 return APInt::tcMSB(significandParts(), partCount());
958 APFloat::significandLSB() const
960 return APInt::tcLSB(significandParts(), partCount());
963 /* Note that a zero result is NOT normalized to fcZero. */
965 APFloat::shiftSignificandRight(unsigned int bits)
967 /* Our exponent should not overflow. */
968 assert((exponent_t) (exponent + bits) >= exponent);
972 return shiftRight(significandParts(), partCount(), bits);
975 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
977 APFloat::shiftSignificandLeft(unsigned int bits)
979 assert(bits < semantics->precision);
982 unsigned int partsCount = partCount();
984 APInt::tcShiftLeft(significandParts(), partsCount, bits);
987 assert(!APInt::tcIsZero(significandParts(), partsCount));
992 APFloat::compareAbsoluteValue(const APFloat &rhs) const
996 assert(semantics == rhs.semantics);
997 assert(category == fcNormal);
998 assert(rhs.category == fcNormal);
1000 compare = exponent - rhs.exponent;
1002 /* If exponents are equal, do an unsigned bignum comparison of the
1005 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1009 return cmpGreaterThan;
1010 else if(compare < 0)
1016 /* Handle overflow. Sign is preserved. We either become infinity or
1017 the largest finite number. */
1019 APFloat::handleOverflow(roundingMode rounding_mode)
1022 if(rounding_mode == rmNearestTiesToEven
1023 || rounding_mode == rmNearestTiesToAway
1024 || (rounding_mode == rmTowardPositive && !sign)
1025 || (rounding_mode == rmTowardNegative && sign))
1027 category = fcInfinity;
1028 return (opStatus) (opOverflow | opInexact);
1031 /* Otherwise we become the largest finite number. */
1032 category = fcNormal;
1033 exponent = semantics->maxExponent;
1034 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1035 semantics->precision);
1040 /* Returns TRUE if, when truncating the current number, with BIT the
1041 new LSB, with the given lost fraction and rounding mode, the result
1042 would need to be rounded away from zero (i.e., by increasing the
1043 signficand). This routine must work for fcZero of both signs, and
1044 fcNormal numbers. */
1046 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1047 lostFraction lost_fraction,
1048 unsigned int bit) const
1050 /* NaNs and infinities should not have lost fractions. */
1051 assert(category == fcNormal || category == fcZero);
1053 /* Current callers never pass this so we don't handle it. */
1054 assert(lost_fraction != lfExactlyZero);
1056 switch(rounding_mode) {
1060 case rmNearestTiesToAway:
1061 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1063 case rmNearestTiesToEven:
1064 if(lost_fraction == lfMoreThanHalf)
1067 /* Our zeroes don't have a significand to test. */
1068 if(lost_fraction == lfExactlyHalf && category != fcZero)
1069 return APInt::tcExtractBit(significandParts(), bit);
1076 case rmTowardPositive:
1077 return sign == false;
1079 case rmTowardNegative:
1080 return sign == true;
1085 APFloat::normalize(roundingMode rounding_mode,
1086 lostFraction lost_fraction)
1088 unsigned int omsb; /* One, not zero, based MSB. */
1091 if(category != fcNormal)
1094 /* Before rounding normalize the exponent of fcNormal numbers. */
1095 omsb = significandMSB() + 1;
1098 /* OMSB is numbered from 1. We want to place it in the integer
1099 bit numbered PRECISON if possible, with a compensating change in
1101 exponentChange = omsb - semantics->precision;
1103 /* If the resulting exponent is too high, overflow according to
1104 the rounding mode. */
1105 if(exponent + exponentChange > semantics->maxExponent)
1106 return handleOverflow(rounding_mode);
1108 /* Subnormal numbers have exponent minExponent, and their MSB
1109 is forced based on that. */
1110 if(exponent + exponentChange < semantics->minExponent)
1111 exponentChange = semantics->minExponent - exponent;
1113 /* Shifting left is easy as we don't lose precision. */
1114 if(exponentChange < 0) {
1115 assert(lost_fraction == lfExactlyZero);
1117 shiftSignificandLeft(-exponentChange);
1122 if(exponentChange > 0) {
1125 /* Shift right and capture any new lost fraction. */
1126 lf = shiftSignificandRight(exponentChange);
1128 lost_fraction = combineLostFractions(lf, lost_fraction);
1130 /* Keep OMSB up-to-date. */
1131 if(omsb > (unsigned) exponentChange)
1132 omsb -= exponentChange;
1138 /* Now round the number according to rounding_mode given the lost
1141 /* As specified in IEEE 754, since we do not trap we do not report
1142 underflow for exact results. */
1143 if(lost_fraction == lfExactlyZero) {
1144 /* Canonicalize zeroes. */
1151 /* Increment the significand if we're rounding away from zero. */
1152 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1154 exponent = semantics->minExponent;
1156 incrementSignificand();
1157 omsb = significandMSB() + 1;
1159 /* Did the significand increment overflow? */
1160 if(omsb == (unsigned) semantics->precision + 1) {
1161 /* Renormalize by incrementing the exponent and shifting our
1162 significand right one. However if we already have the
1163 maximum exponent we overflow to infinity. */
1164 if(exponent == semantics->maxExponent) {
1165 category = fcInfinity;
1167 return (opStatus) (opOverflow | opInexact);
1170 shiftSignificandRight(1);
1176 /* The normal case - we were and are not denormal, and any
1177 significand increment above didn't overflow. */
1178 if(omsb == semantics->precision)
1181 /* We have a non-zero denormal. */
1182 assert(omsb < semantics->precision);
1184 /* Canonicalize zeroes. */
1188 /* The fcZero case is a denormal that underflowed to zero. */
1189 return (opStatus) (opUnderflow | opInexact);
1193 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1195 switch(convolve(category, rhs.category)) {
1199 case convolve(fcNaN, fcZero):
1200 case convolve(fcNaN, fcNormal):
1201 case convolve(fcNaN, fcInfinity):
1202 case convolve(fcNaN, fcNaN):
1203 case convolve(fcNormal, fcZero):
1204 case convolve(fcInfinity, fcNormal):
1205 case convolve(fcInfinity, fcZero):
1208 case convolve(fcZero, fcNaN):
1209 case convolve(fcNormal, fcNaN):
1210 case convolve(fcInfinity, fcNaN):
1212 copySignificand(rhs);
1215 case convolve(fcNormal, fcInfinity):
1216 case convolve(fcZero, fcInfinity):
1217 category = fcInfinity;
1218 sign = rhs.sign ^ subtract;
1221 case convolve(fcZero, fcNormal):
1223 sign = rhs.sign ^ subtract;
1226 case convolve(fcZero, fcZero):
1227 /* Sign depends on rounding mode; handled by caller. */
1230 case convolve(fcInfinity, fcInfinity):
1231 /* Differently signed infinities can only be validly
1233 if(sign ^ rhs.sign != subtract) {
1240 case convolve(fcNormal, fcNormal):
1245 /* Add or subtract two normal numbers. */
1247 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1250 lostFraction lost_fraction;
1253 /* Determine if the operation on the absolute values is effectively
1254 an addition or subtraction. */
1255 subtract ^= (sign ^ rhs.sign);
1257 /* Are we bigger exponent-wise than the RHS? */
1258 bits = exponent - rhs.exponent;
1260 /* Subtraction is more subtle than one might naively expect. */
1262 APFloat temp_rhs(rhs);
1266 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1267 lost_fraction = lfExactlyZero;
1268 } else if (bits > 0) {
1269 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1270 shiftSignificandLeft(1);
1273 lost_fraction = shiftSignificandRight(-bits - 1);
1274 temp_rhs.shiftSignificandLeft(1);
1279 carry = temp_rhs.subtractSignificand
1280 (*this, lost_fraction != lfExactlyZero);
1281 copySignificand(temp_rhs);
1284 carry = subtractSignificand
1285 (temp_rhs, lost_fraction != lfExactlyZero);
1288 /* Invert the lost fraction - it was on the RHS and
1290 if(lost_fraction == lfLessThanHalf)
1291 lost_fraction = lfMoreThanHalf;
1292 else if(lost_fraction == lfMoreThanHalf)
1293 lost_fraction = lfLessThanHalf;
1295 /* The code above is intended to ensure that no borrow is
1300 APFloat temp_rhs(rhs);
1302 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1303 carry = addSignificand(temp_rhs);
1305 lost_fraction = shiftSignificandRight(-bits);
1306 carry = addSignificand(rhs);
1309 /* We have a guard bit; generating a carry cannot happen. */
1313 return lost_fraction;
1317 APFloat::multiplySpecials(const APFloat &rhs)
1319 switch(convolve(category, rhs.category)) {
1323 case convolve(fcNaN, fcZero):
1324 case convolve(fcNaN, fcNormal):
1325 case convolve(fcNaN, fcInfinity):
1326 case convolve(fcNaN, fcNaN):
1329 case convolve(fcZero, fcNaN):
1330 case convolve(fcNormal, fcNaN):
1331 case convolve(fcInfinity, fcNaN):
1333 copySignificand(rhs);
1336 case convolve(fcNormal, fcInfinity):
1337 case convolve(fcInfinity, fcNormal):
1338 case convolve(fcInfinity, fcInfinity):
1339 category = fcInfinity;
1342 case convolve(fcZero, fcNormal):
1343 case convolve(fcNormal, fcZero):
1344 case convolve(fcZero, fcZero):
1348 case convolve(fcZero, fcInfinity):
1349 case convolve(fcInfinity, fcZero):
1353 case convolve(fcNormal, fcNormal):
1359 APFloat::divideSpecials(const APFloat &rhs)
1361 switch(convolve(category, rhs.category)) {
1365 case convolve(fcNaN, fcZero):
1366 case convolve(fcNaN, fcNormal):
1367 case convolve(fcNaN, fcInfinity):
1368 case convolve(fcNaN, fcNaN):
1369 case convolve(fcInfinity, fcZero):
1370 case convolve(fcInfinity, fcNormal):
1371 case convolve(fcZero, fcInfinity):
1372 case convolve(fcZero, fcNormal):
1375 case convolve(fcZero, fcNaN):
1376 case convolve(fcNormal, fcNaN):
1377 case convolve(fcInfinity, fcNaN):
1379 copySignificand(rhs);
1382 case convolve(fcNormal, fcInfinity):
1386 case convolve(fcNormal, fcZero):
1387 category = fcInfinity;
1390 case convolve(fcInfinity, fcInfinity):
1391 case convolve(fcZero, fcZero):
1395 case convolve(fcNormal, fcNormal):
1402 APFloat::changeSign()
1404 /* Look mummy, this one's easy. */
1409 APFloat::clearSign()
1411 /* So is this one. */
1416 APFloat::copySign(const APFloat &rhs)
1422 /* Normalized addition or subtraction. */
1424 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1429 assertArithmeticOK(*semantics);
1431 fs = addOrSubtractSpecials(rhs, subtract);
1433 /* This return code means it was not a simple case. */
1434 if(fs == opDivByZero) {
1435 lostFraction lost_fraction;
1437 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1438 fs = normalize(rounding_mode, lost_fraction);
1440 /* Can only be zero if we lost no fraction. */
1441 assert(category != fcZero || lost_fraction == lfExactlyZero);
1444 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1445 positive zero unless rounding to minus infinity, except that
1446 adding two like-signed zeroes gives that zero. */
1447 if(category == fcZero) {
1448 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1449 sign = (rounding_mode == rmTowardNegative);
1455 /* Normalized addition. */
1457 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1459 return addOrSubtract(rhs, rounding_mode, false);
1462 /* Normalized subtraction. */
1464 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1466 return addOrSubtract(rhs, rounding_mode, true);
1469 /* Normalized multiply. */
1471 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1475 assertArithmeticOK(*semantics);
1477 fs = multiplySpecials(rhs);
1479 if(category == fcNormal) {
1480 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1481 fs = normalize(rounding_mode, lost_fraction);
1482 if(lost_fraction != lfExactlyZero)
1483 fs = (opStatus) (fs | opInexact);
1489 /* Normalized divide. */
1491 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1495 assertArithmeticOK(*semantics);
1497 fs = divideSpecials(rhs);
1499 if(category == fcNormal) {
1500 lostFraction lost_fraction = divideSignificand(rhs);
1501 fs = normalize(rounding_mode, lost_fraction);
1502 if(lost_fraction != lfExactlyZero)
1503 fs = (opStatus) (fs | opInexact);
1509 /* Normalized remainder. This is not currently doing TRT. */
1511 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1515 unsigned int origSign = sign;
1517 assertArithmeticOK(*semantics);
1518 fs = V.divide(rhs, rmNearestTiesToEven);
1519 if (fs == opDivByZero)
1522 int parts = partCount();
1523 integerPart *x = new integerPart[parts];
1524 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1525 rmNearestTiesToEven);
1526 if (fs==opInvalidOp)
1529 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1530 rmNearestTiesToEven);
1531 assert(fs==opOK); // should always work
1533 fs = V.multiply(rhs, rounding_mode);
1534 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1536 fs = subtract(V, rounding_mode);
1537 assert(fs==opOK || fs==opInexact); // likewise
1540 sign = origSign; // IEEE754 requires this
1545 /* Normalized fused-multiply-add. */
1547 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1548 const APFloat &addend,
1549 roundingMode rounding_mode)
1553 assertArithmeticOK(*semantics);
1555 /* Post-multiplication sign, before addition. */
1556 sign ^= multiplicand.sign;
1558 /* If and only if all arguments are normal do we need to do an
1559 extended-precision calculation. */
1560 if(category == fcNormal
1561 && multiplicand.category == fcNormal
1562 && addend.category == fcNormal) {
1563 lostFraction lost_fraction;
1565 lost_fraction = multiplySignificand(multiplicand, &addend);
1566 fs = normalize(rounding_mode, lost_fraction);
1567 if(lost_fraction != lfExactlyZero)
1568 fs = (opStatus) (fs | opInexact);
1570 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1571 positive zero unless rounding to minus infinity, except that
1572 adding two like-signed zeroes gives that zero. */
1573 if(category == fcZero && sign != addend.sign)
1574 sign = (rounding_mode == rmTowardNegative);
1576 fs = multiplySpecials(multiplicand);
1578 /* FS can only be opOK or opInvalidOp. There is no more work
1579 to do in the latter case. The IEEE-754R standard says it is
1580 implementation-defined in this case whether, if ADDEND is a
1581 quiet NaN, we raise invalid op; this implementation does so.
1583 If we need to do the addition we can do so with normal
1586 fs = addOrSubtract(addend, rounding_mode, false);
1592 /* Comparison requires normalized numbers. */
1594 APFloat::compare(const APFloat &rhs) const
1598 assertArithmeticOK(*semantics);
1599 assert(semantics == rhs.semantics);
1601 switch(convolve(category, rhs.category)) {
1605 case convolve(fcNaN, fcZero):
1606 case convolve(fcNaN, fcNormal):
1607 case convolve(fcNaN, fcInfinity):
1608 case convolve(fcNaN, fcNaN):
1609 case convolve(fcZero, fcNaN):
1610 case convolve(fcNormal, fcNaN):
1611 case convolve(fcInfinity, fcNaN):
1612 return cmpUnordered;
1614 case convolve(fcInfinity, fcNormal):
1615 case convolve(fcInfinity, fcZero):
1616 case convolve(fcNormal, fcZero):
1620 return cmpGreaterThan;
1622 case convolve(fcNormal, fcInfinity):
1623 case convolve(fcZero, fcInfinity):
1624 case convolve(fcZero, fcNormal):
1626 return cmpGreaterThan;
1630 case convolve(fcInfinity, fcInfinity):
1631 if(sign == rhs.sign)
1636 return cmpGreaterThan;
1638 case convolve(fcZero, fcZero):
1641 case convolve(fcNormal, fcNormal):
1645 /* Two normal numbers. Do they have the same sign? */
1646 if(sign != rhs.sign) {
1648 result = cmpLessThan;
1650 result = cmpGreaterThan;
1652 /* Compare absolute values; invert result if negative. */
1653 result = compareAbsoluteValue(rhs);
1656 if(result == cmpLessThan)
1657 result = cmpGreaterThan;
1658 else if(result == cmpGreaterThan)
1659 result = cmpLessThan;
1667 APFloat::convert(const fltSemantics &toSemantics,
1668 roundingMode rounding_mode)
1670 lostFraction lostFraction;
1671 unsigned int newPartCount, oldPartCount;
1674 assertArithmeticOK(*semantics);
1675 lostFraction = lfExactlyZero;
1676 newPartCount = partCountForBits(toSemantics.precision + 1);
1677 oldPartCount = partCount();
1679 /* Handle storage complications. If our new form is wider,
1680 re-allocate our bit pattern into wider storage. If it is
1681 narrower, we ignore the excess parts, but if narrowing to a
1682 single part we need to free the old storage.
1683 Be careful not to reference significandParts for zeroes
1684 and infinities, since it aborts. */
1685 if (newPartCount > oldPartCount) {
1686 integerPart *newParts;
1687 newParts = new integerPart[newPartCount];
1688 APInt::tcSet(newParts, 0, newPartCount);
1689 if (category==fcNormal || category==fcNaN)
1690 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1692 significand.parts = newParts;
1693 } else if (newPartCount < oldPartCount) {
1694 /* Capture any lost fraction through truncation of parts so we get
1695 correct rounding whilst normalizing. */
1696 if (category==fcNormal)
1697 lostFraction = lostFractionThroughTruncation
1698 (significandParts(), oldPartCount, toSemantics.precision);
1699 if (newPartCount == 1) {
1700 integerPart newPart = 0;
1701 if (category==fcNormal || category==fcNaN)
1702 newPart = significandParts()[0];
1704 significand.part = newPart;
1708 if(category == fcNormal) {
1709 /* Re-interpret our bit-pattern. */
1710 exponent += toSemantics.precision - semantics->precision;
1711 semantics = &toSemantics;
1712 fs = normalize(rounding_mode, lostFraction);
1713 } else if (category == fcNaN) {
1714 int shift = toSemantics.precision - semantics->precision;
1715 // No normalization here, just truncate
1717 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1719 APInt::tcShiftRight(significandParts(), newPartCount, -shift);
1720 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1721 // does not give you back the same bits. This is dubious, and we
1722 // don't currently do it. You're really supposed to get
1723 // an invalid operation signal at runtime, but nobody does that.
1724 semantics = &toSemantics;
1727 semantics = &toSemantics;
1734 /* Convert a floating point number to an integer according to the
1735 rounding mode. If the rounded integer value is out of range this
1736 returns an invalid operation exception. If the rounded value is in
1737 range but the floating point number is not the exact integer, the C
1738 standard doesn't require an inexact exception to be raised. IEEE
1739 854 does require it so we do that.
1741 Note that for conversions to integer type the C standard requires
1742 round-to-zero to always be used. */
1744 APFloat::convertToInteger(integerPart *parts, unsigned int width,
1746 roundingMode rounding_mode) const
1748 lostFraction lost_fraction;
1749 unsigned int msb, partsCount;
1752 assertArithmeticOK(*semantics);
1753 partsCount = partCountForBits(width);
1755 /* Handle the three special cases first. We produce
1756 a deterministic result even for the Invalid cases. */
1757 if (category == fcNaN) {
1758 // Neither sign nor isSigned affects this.
1759 APInt::tcSet(parts, 0, partsCount);
1762 if (category == fcInfinity) {
1763 if (!sign && isSigned)
1764 APInt::tcSetLeastSignificantBits(parts, partsCount, width-1);
1765 else if (!sign && !isSigned)
1766 APInt::tcSetLeastSignificantBits(parts, partsCount, width);
1767 else if (sign && isSigned) {
1768 APInt::tcSetLeastSignificantBits(parts, partsCount, 1);
1769 APInt::tcShiftLeft(parts, partsCount, width-1);
1770 } else // sign && !isSigned
1771 APInt::tcSet(parts, 0, partsCount);
1774 if (category == fcZero) {
1775 APInt::tcSet(parts, 0, partsCount);
1779 /* Shift the bit pattern so the fraction is lost. */
1782 bits = (int) semantics->precision - 1 - exponent;
1785 lost_fraction = tmp.shiftSignificandRight(bits);
1787 if ((unsigned) -bits >= semantics->precision) {
1788 // Unrepresentably large.
1789 if (!sign && isSigned)
1790 APInt::tcSetLeastSignificantBits(parts, partsCount, width-1);
1791 else if (!sign && !isSigned)
1792 APInt::tcSetLeastSignificantBits(parts, partsCount, width);
1793 else if (sign && isSigned) {
1794 APInt::tcSetLeastSignificantBits(parts, partsCount, 1);
1795 APInt::tcShiftLeft(parts, partsCount, width-1);
1796 } else // sign && !isSigned
1797 APInt::tcSet(parts, 0, partsCount);
1798 return (opStatus)(opOverflow | opInexact);
1800 tmp.shiftSignificandLeft(-bits);
1801 lost_fraction = lfExactlyZero;
1804 if(lost_fraction != lfExactlyZero
1805 && tmp.roundAwayFromZero(rounding_mode, lost_fraction, 0))
1806 tmp.incrementSignificand();
1808 msb = tmp.significandMSB();
1810 /* Negative numbers cannot be represented as unsigned. */
1811 if(!isSigned && tmp.sign && msb != -1U)
1814 /* It takes exponent + 1 bits to represent the truncated floating
1815 point number without its sign. We lose a bit for the sign, but
1816 the maximally negative integer is a special case. */
1817 if(msb + 1 > width) /* !! Not same as msb >= width !! */
1820 if(isSigned && msb + 1 == width
1821 && (!tmp.sign || tmp.significandLSB() != msb))
1824 APInt::tcAssign(parts, tmp.significandParts(), partsCount);
1827 APInt::tcNegate(parts, partsCount);
1829 if(lost_fraction == lfExactlyZero)
1835 /* Convert an unsigned integer SRC to a floating point number,
1836 rounding according to ROUNDING_MODE. The sign of the floating
1837 point number is not modified. */
1839 APFloat::convertFromUnsignedParts(const integerPart *src,
1840 unsigned int srcCount,
1841 roundingMode rounding_mode)
1843 unsigned int omsb, precision, dstCount;
1845 lostFraction lost_fraction;
1847 assertArithmeticOK(*semantics);
1848 category = fcNormal;
1849 omsb = APInt::tcMSB(src, srcCount) + 1;
1850 dst = significandParts();
1851 dstCount = partCount();
1852 precision = semantics->precision;
1854 /* We want the most significant PRECISON bits of SRC. There may not
1855 be that many; extract what we can. */
1856 if (precision <= omsb) {
1857 exponent = omsb - 1;
1858 lost_fraction = lostFractionThroughTruncation(src, srcCount,
1860 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
1862 exponent = precision - 1;
1863 lost_fraction = lfExactlyZero;
1864 APInt::tcExtract(dst, dstCount, src, omsb, 0);
1867 return normalize(rounding_mode, lost_fraction);
1870 /* Convert a two's complement integer SRC to a floating point number,
1871 rounding according to ROUNDING_MODE. ISSIGNED is true if the
1872 integer is signed, in which case it must be sign-extended. */
1874 APFloat::convertFromSignExtendedInteger(const integerPart *src,
1875 unsigned int srcCount,
1877 roundingMode rounding_mode)
1881 assertArithmeticOK(*semantics);
1883 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
1886 /* If we're signed and negative negate a copy. */
1888 copy = new integerPart[srcCount];
1889 APInt::tcAssign(copy, src, srcCount);
1890 APInt::tcNegate(copy, srcCount);
1891 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
1895 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
1901 /* FIXME: should this just take a const APInt reference? */
1903 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
1904 unsigned int width, bool isSigned,
1905 roundingMode rounding_mode)
1907 unsigned int partCount = partCountForBits(width);
1908 APInt api = APInt(width, partCount, parts);
1911 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
1916 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
1920 APFloat::convertFromHexadecimalString(const char *p,
1921 roundingMode rounding_mode)
1923 lostFraction lost_fraction;
1924 integerPart *significand;
1925 unsigned int bitPos, partsCount;
1926 const char *dot, *firstSignificantDigit;
1930 category = fcNormal;
1932 significand = significandParts();
1933 partsCount = partCount();
1934 bitPos = partsCount * integerPartWidth;
1936 /* Skip leading zeroes and any (hexa)decimal point. */
1937 p = skipLeadingZeroesAndAnyDot(p, &dot);
1938 firstSignificantDigit = p;
1941 integerPart hex_value;
1948 hex_value = hexDigitValue(*p);
1949 if(hex_value == -1U) {
1950 lost_fraction = lfExactlyZero;
1956 /* Store the number whilst 4-bit nibbles remain. */
1959 hex_value <<= bitPos % integerPartWidth;
1960 significand[bitPos / integerPartWidth] |= hex_value;
1962 lost_fraction = trailingHexadecimalFraction(p, hex_value);
1963 while(hexDigitValue(*p) != -1U)
1969 /* Hex floats require an exponent but not a hexadecimal point. */
1970 assert(*p == 'p' || *p == 'P');
1972 /* Ignore the exponent if we are zero. */
1973 if(p != firstSignificantDigit) {
1976 /* Implicit hexadecimal point? */
1980 /* Calculate the exponent adjustment implicit in the number of
1981 significant digits. */
1982 expAdjustment = dot - firstSignificantDigit;
1983 if(expAdjustment < 0)
1985 expAdjustment = expAdjustment * 4 - 1;
1987 /* Adjust for writing the significand starting at the most
1988 significant nibble. */
1989 expAdjustment += semantics->precision;
1990 expAdjustment -= partsCount * integerPartWidth;
1992 /* Adjust for the given exponent. */
1993 exponent = totalExponent(p, expAdjustment);
1996 return normalize(rounding_mode, lost_fraction);
2000 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2001 unsigned sigPartCount, int exp,
2002 roundingMode rounding_mode)
2004 unsigned int parts, pow5PartCount;
2005 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2006 integerPart pow5Parts[maxPowerOfFiveParts];
2009 isNearest = (rounding_mode == rmNearestTiesToEven
2010 || rounding_mode == rmNearestTiesToAway);
2012 parts = partCountForBits(semantics->precision + 11);
2014 /* Calculate pow(5, abs(exp)). */
2015 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2017 for (;; parts *= 2) {
2018 opStatus sigStatus, powStatus;
2019 unsigned int excessPrecision, truncatedBits;
2021 calcSemantics.precision = parts * integerPartWidth - 1;
2022 excessPrecision = calcSemantics.precision - semantics->precision;
2023 truncatedBits = excessPrecision;
2025 APFloat decSig(calcSemantics, fcZero, sign);
2026 APFloat pow5(calcSemantics, fcZero, false);
2028 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2029 rmNearestTiesToEven);
2030 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2031 rmNearestTiesToEven);
2032 /* Add exp, as 10^n = 5^n * 2^n. */
2033 decSig.exponent += exp;
2035 lostFraction calcLostFraction;
2036 integerPart HUerr, HUdistance, powHUerr;
2039 /* multiplySignificand leaves the precision-th bit set to 1. */
2040 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2041 powHUerr = powStatus != opOK;
2043 calcLostFraction = decSig.divideSignificand(pow5);
2044 /* Denormal numbers have less precision. */
2045 if (decSig.exponent < semantics->minExponent) {
2046 excessPrecision += (semantics->minExponent - decSig.exponent);
2047 truncatedBits = excessPrecision;
2048 if (excessPrecision > calcSemantics.precision)
2049 excessPrecision = calcSemantics.precision;
2051 /* Extra half-ulp lost in reciprocal of exponent. */
2052 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0: 2;
2055 /* Both multiplySignificand and divideSignificand return the
2056 result with the integer bit set. */
2057 assert (APInt::tcExtractBit
2058 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2060 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2062 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2063 excessPrecision, isNearest);
2065 /* Are we guaranteed to round correctly if we truncate? */
2066 if (HUdistance >= HUerr) {
2067 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2068 calcSemantics.precision - excessPrecision,
2070 /* Take the exponent of decSig. If we tcExtract-ed less bits
2071 above we must adjust our exponent to compensate for the
2072 implicit right shift. */
2073 exponent = (decSig.exponent + semantics->precision
2074 - (calcSemantics.precision - excessPrecision));
2075 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2078 return normalize(rounding_mode, calcLostFraction);
2084 APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode)
2089 /* Scan the text. */
2090 interpretDecimal(p, &D);
2092 /* Handle the quick cases. First the case of no significant digits,
2093 i.e. zero, and then exponents that are obviously too large or too
2094 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2095 definitely overflows if
2097 (exp - 1) * L >= maxExponent
2099 and definitely underflows to zero where
2101 (exp + 1) * L <= minExponent - precision
2103 With integer arithmetic the tightest bounds for L are
2105 93/28 < L < 196/59 [ numerator <= 256 ]
2106 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2109 if (*D.firstSigDigit == '0') {
2112 } else if ((D.normalizedExponent + 1) * 28738
2113 <= 8651 * (semantics->minExponent - (int) semantics->precision)) {
2114 /* Underflow to zero and round. */
2116 fs = normalize(rounding_mode, lfLessThanHalf);
2117 } else if ((D.normalizedExponent - 1) * 42039
2118 >= 12655 * semantics->maxExponent) {
2119 /* Overflow and round. */
2120 fs = handleOverflow(rounding_mode);
2122 integerPart *decSignificand;
2123 unsigned int partCount;
2125 /* A tight upper bound on number of bits required to hold an
2126 N-digit decimal integer is N * 196 / 59. Allocate enough space
2127 to hold the full significand, and an extra part required by
2129 partCount = (D.lastSigDigit - D.firstSigDigit) + 1;
2130 partCount = partCountForBits(1 + 196 * partCount / 59);
2131 decSignificand = new integerPart[partCount + 1];
2134 /* Convert to binary efficiently - we do almost all multiplication
2135 in an integerPart. When this would overflow do we do a single
2136 bignum multiplication, and then revert again to multiplication
2137 in an integerPart. */
2139 integerPart decValue, val, multiplier;
2148 decValue = decDigitValue(*p++);
2150 val = val * 10 + decValue;
2151 /* The maximum number that can be multiplied by ten with any
2152 digit added without overflowing an integerPart. */
2153 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2155 /* Multiply out the current part. */
2156 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2157 partCount, partCount + 1, false);
2159 /* If we used another part (likely but not guaranteed), increase
2161 if (decSignificand[partCount])
2163 } while (p <= D.lastSigDigit);
2165 category = fcNormal;
2166 fs = roundSignificandWithExponent(decSignificand, partCount,
2167 D.exponent, rounding_mode);
2169 delete [] decSignificand;
2176 APFloat::convertFromString(const char *p, roundingMode rounding_mode)
2178 assertArithmeticOK(*semantics);
2180 /* Handle a leading minus sign. */
2186 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
2187 return convertFromHexadecimalString(p + 2, rounding_mode);
2189 return convertFromDecimalString(p, rounding_mode);
2192 /* Write out a hexadecimal representation of the floating point value
2193 to DST, which must be of sufficient size, in the C99 form
2194 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2195 excluding the terminating NUL.
2197 If UPPERCASE, the output is in upper case, otherwise in lower case.
2199 HEXDIGITS digits appear altogether, rounding the value if
2200 necessary. If HEXDIGITS is 0, the minimal precision to display the
2201 number precisely is used instead. If nothing would appear after
2202 the decimal point it is suppressed.
2204 The decimal exponent is always printed and has at least one digit.
2205 Zero values display an exponent of zero. Infinities and NaNs
2206 appear as "infinity" or "nan" respectively.
2208 The above rules are as specified by C99. There is ambiguity about
2209 what the leading hexadecimal digit should be. This implementation
2210 uses whatever is necessary so that the exponent is displayed as
2211 stored. This implies the exponent will fall within the IEEE format
2212 range, and the leading hexadecimal digit will be 0 (for denormals),
2213 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2214 any other digits zero).
2217 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2218 bool upperCase, roundingMode rounding_mode) const
2222 assertArithmeticOK(*semantics);
2230 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2231 dst += sizeof infinityL - 1;
2235 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2236 dst += sizeof NaNU - 1;
2241 *dst++ = upperCase ? 'X': 'x';
2243 if (hexDigits > 1) {
2245 memset (dst, '0', hexDigits - 1);
2246 dst += hexDigits - 1;
2248 *dst++ = upperCase ? 'P': 'p';
2253 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2262 /* Does the hard work of outputting the correctly rounded hexadecimal
2263 form of a normal floating point number with the specified number of
2264 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2265 digits necessary to print the value precisely is output. */
2267 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2269 roundingMode rounding_mode) const
2271 unsigned int count, valueBits, shift, partsCount, outputDigits;
2272 const char *hexDigitChars;
2273 const integerPart *significand;
2278 *dst++ = upperCase ? 'X': 'x';
2281 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2283 significand = significandParts();
2284 partsCount = partCount();
2286 /* +3 because the first digit only uses the single integer bit, so
2287 we have 3 virtual zero most-significant-bits. */
2288 valueBits = semantics->precision + 3;
2289 shift = integerPartWidth - valueBits % integerPartWidth;
2291 /* The natural number of digits required ignoring trailing
2292 insignificant zeroes. */
2293 outputDigits = (valueBits - significandLSB () + 3) / 4;
2295 /* hexDigits of zero means use the required number for the
2296 precision. Otherwise, see if we are truncating. If we are,
2297 find out if we need to round away from zero. */
2299 if (hexDigits < outputDigits) {
2300 /* We are dropping non-zero bits, so need to check how to round.
2301 "bits" is the number of dropped bits. */
2303 lostFraction fraction;
2305 bits = valueBits - hexDigits * 4;
2306 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2307 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2309 outputDigits = hexDigits;
2312 /* Write the digits consecutively, and start writing in the location
2313 of the hexadecimal point. We move the most significant digit
2314 left and add the hexadecimal point later. */
2317 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2319 while (outputDigits && count) {
2322 /* Put the most significant integerPartWidth bits in "part". */
2323 if (--count == partsCount)
2324 part = 0; /* An imaginary higher zero part. */
2326 part = significand[count] << shift;
2329 part |= significand[count - 1] >> (integerPartWidth - shift);
2331 /* Convert as much of "part" to hexdigits as we can. */
2332 unsigned int curDigits = integerPartWidth / 4;
2334 if (curDigits > outputDigits)
2335 curDigits = outputDigits;
2336 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2337 outputDigits -= curDigits;
2343 /* Note that hexDigitChars has a trailing '0'. */
2346 *q = hexDigitChars[hexDigitValue (*q) + 1];
2347 } while (*q == '0');
2350 /* Add trailing zeroes. */
2351 memset (dst, '0', outputDigits);
2352 dst += outputDigits;
2355 /* Move the most significant digit to before the point, and if there
2356 is something after the decimal point add it. This must come
2357 after rounding above. */
2364 /* Finally output the exponent. */
2365 *dst++ = upperCase ? 'P': 'p';
2367 return writeSignedDecimal (dst, exponent);
2370 // For good performance it is desirable for different APFloats
2371 // to produce different integers.
2373 APFloat::getHashValue() const
2375 if (category==fcZero) return sign<<8 | semantics->precision ;
2376 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2377 else if (category==fcNaN) return 1<<10 | semantics->precision;
2379 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2380 const integerPart* p = significandParts();
2381 for (int i=partCount(); i>0; i--, p++)
2382 hash ^= ((uint32_t)*p) ^ (*p)>>32;
2387 // Conversion from APFloat to/from host float/double. It may eventually be
2388 // possible to eliminate these and have everybody deal with APFloats, but that
2389 // will take a while. This approach will not easily extend to long double.
2390 // Current implementation requires integerPartWidth==64, which is correct at
2391 // the moment but could be made more general.
2393 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2394 // the actual IEEE respresentations. We compensate for that here.
2397 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2399 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended);
2400 assert (partCount()==2);
2402 uint64_t myexponent, mysignificand;
2404 if (category==fcNormal) {
2405 myexponent = exponent+16383; //bias
2406 mysignificand = significandParts()[0];
2407 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2408 myexponent = 0; // denormal
2409 } else if (category==fcZero) {
2412 } else if (category==fcInfinity) {
2413 myexponent = 0x7fff;
2414 mysignificand = 0x8000000000000000ULL;
2416 assert(category == fcNaN && "Unknown category");
2417 myexponent = 0x7fff;
2418 mysignificand = significandParts()[0];
2422 words[0] = (((uint64_t)sign & 1) << 63) |
2423 ((myexponent & 0x7fff) << 48) |
2424 ((mysignificand >>16) & 0xffffffffffffLL);
2425 words[1] = mysignificand & 0xffff;
2426 return APInt(80, 2, words);
2430 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2432 assert(semantics == (const llvm::fltSemantics* const)&PPCDoubleDouble);
2433 assert (partCount()==2);
2435 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2437 if (category==fcNormal) {
2438 myexponent = exponent + 1023; //bias
2439 myexponent2 = exponent2 + 1023;
2440 mysignificand = significandParts()[0];
2441 mysignificand2 = significandParts()[1];
2442 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2443 myexponent = 0; // denormal
2444 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2445 myexponent2 = 0; // denormal
2446 } else if (category==fcZero) {
2451 } else if (category==fcInfinity) {
2457 assert(category == fcNaN && "Unknown category");
2459 mysignificand = significandParts()[0];
2460 myexponent2 = exponent2;
2461 mysignificand2 = significandParts()[1];
2465 words[0] = (((uint64_t)sign & 1) << 63) |
2466 ((myexponent & 0x7ff) << 52) |
2467 (mysignificand & 0xfffffffffffffLL);
2468 words[1] = (((uint64_t)sign2 & 1) << 63) |
2469 ((myexponent2 & 0x7ff) << 52) |
2470 (mysignificand2 & 0xfffffffffffffLL);
2471 return APInt(128, 2, words);
2475 APFloat::convertDoubleAPFloatToAPInt() const
2477 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2478 assert (partCount()==1);
2480 uint64_t myexponent, mysignificand;
2482 if (category==fcNormal) {
2483 myexponent = exponent+1023; //bias
2484 mysignificand = *significandParts();
2485 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2486 myexponent = 0; // denormal
2487 } else if (category==fcZero) {
2490 } else if (category==fcInfinity) {
2494 assert(category == fcNaN && "Unknown category!");
2496 mysignificand = *significandParts();
2499 return APInt(64, (((((uint64_t)sign & 1) << 63) |
2500 ((myexponent & 0x7ff) << 52) |
2501 (mysignificand & 0xfffffffffffffLL))));
2505 APFloat::convertFloatAPFloatToAPInt() const
2507 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2508 assert (partCount()==1);
2510 uint32_t myexponent, mysignificand;
2512 if (category==fcNormal) {
2513 myexponent = exponent+127; //bias
2514 mysignificand = *significandParts();
2515 if (myexponent == 1 && !(mysignificand & 0x400000))
2516 myexponent = 0; // denormal
2517 } else if (category==fcZero) {
2520 } else if (category==fcInfinity) {
2524 assert(category == fcNaN && "Unknown category!");
2526 mysignificand = *significandParts();
2529 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2530 (mysignificand & 0x7fffff)));
2533 // This function creates an APInt that is just a bit map of the floating
2534 // point constant as it would appear in memory. It is not a conversion,
2535 // and treating the result as a normal integer is unlikely to be useful.
2538 APFloat::convertToAPInt() const
2540 if (semantics == (const llvm::fltSemantics* const)&IEEEsingle)
2541 return convertFloatAPFloatToAPInt();
2543 if (semantics == (const llvm::fltSemantics* const)&IEEEdouble)
2544 return convertDoubleAPFloatToAPInt();
2546 if (semantics == (const llvm::fltSemantics* const)&PPCDoubleDouble)
2547 return convertPPCDoubleDoubleAPFloatToAPInt();
2549 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended &&
2551 return convertF80LongDoubleAPFloatToAPInt();
2555 APFloat::convertToFloat() const
2557 assert(semantics == (const llvm::fltSemantics* const)&IEEEsingle);
2558 APInt api = convertToAPInt();
2559 return api.bitsToFloat();
2563 APFloat::convertToDouble() const
2565 assert(semantics == (const llvm::fltSemantics* const)&IEEEdouble);
2566 APInt api = convertToAPInt();
2567 return api.bitsToDouble();
2570 /// Integer bit is explicit in this format. Current Intel book does not
2571 /// define meaning of:
2572 /// exponent = all 1's, integer bit not set.
2573 /// exponent = 0, integer bit set. (formerly "psuedodenormals")
2574 /// exponent!=0 nor all 1's, integer bit not set. (formerly "unnormals")
2576 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2578 assert(api.getBitWidth()==80);
2579 uint64_t i1 = api.getRawData()[0];
2580 uint64_t i2 = api.getRawData()[1];
2581 uint64_t myexponent = (i1 >> 48) & 0x7fff;
2582 uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) |
2585 initialize(&APFloat::x87DoubleExtended);
2586 assert(partCount()==2);
2589 if (myexponent==0 && mysignificand==0) {
2590 // exponent, significand meaningless
2592 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2593 // exponent, significand meaningless
2594 category = fcInfinity;
2595 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2596 // exponent meaningless
2598 significandParts()[0] = mysignificand;
2599 significandParts()[1] = 0;
2601 category = fcNormal;
2602 exponent = myexponent - 16383;
2603 significandParts()[0] = mysignificand;
2604 significandParts()[1] = 0;
2605 if (myexponent==0) // denormal
2611 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
2613 assert(api.getBitWidth()==128);
2614 uint64_t i1 = api.getRawData()[0];
2615 uint64_t i2 = api.getRawData()[1];
2616 uint64_t myexponent = (i1 >> 52) & 0x7ff;
2617 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
2618 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
2619 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
2621 initialize(&APFloat::PPCDoubleDouble);
2622 assert(partCount()==2);
2626 if (myexponent==0 && mysignificand==0) {
2627 // exponent, significand meaningless
2628 // exponent2 and significand2 are required to be 0; we don't check
2630 } else if (myexponent==0x7ff && mysignificand==0) {
2631 // exponent, significand meaningless
2632 // exponent2 and significand2 are required to be 0; we don't check
2633 category = fcInfinity;
2634 } else if (myexponent==0x7ff && mysignificand!=0) {
2635 // exponent meaningless. So is the whole second word, but keep it
2638 exponent2 = myexponent2;
2639 significandParts()[0] = mysignificand;
2640 significandParts()[1] = mysignificand2;
2642 category = fcNormal;
2643 // Note there is no category2; the second word is treated as if it is
2644 // fcNormal, although it might be something else considered by itself.
2645 exponent = myexponent - 1023;
2646 exponent2 = myexponent2 - 1023;
2647 significandParts()[0] = mysignificand;
2648 significandParts()[1] = mysignificand2;
2649 if (myexponent==0) // denormal
2652 significandParts()[0] |= 0x10000000000000LL; // integer bit
2656 significandParts()[1] |= 0x10000000000000LL; // integer bit
2661 APFloat::initFromDoubleAPInt(const APInt &api)
2663 assert(api.getBitWidth()==64);
2664 uint64_t i = *api.getRawData();
2665 uint64_t myexponent = (i >> 52) & 0x7ff;
2666 uint64_t mysignificand = i & 0xfffffffffffffLL;
2668 initialize(&APFloat::IEEEdouble);
2669 assert(partCount()==1);
2672 if (myexponent==0 && mysignificand==0) {
2673 // exponent, significand meaningless
2675 } else if (myexponent==0x7ff && mysignificand==0) {
2676 // exponent, significand meaningless
2677 category = fcInfinity;
2678 } else if (myexponent==0x7ff && mysignificand!=0) {
2679 // exponent meaningless
2681 *significandParts() = mysignificand;
2683 category = fcNormal;
2684 exponent = myexponent - 1023;
2685 *significandParts() = mysignificand;
2686 if (myexponent==0) // denormal
2689 *significandParts() |= 0x10000000000000LL; // integer bit
2694 APFloat::initFromFloatAPInt(const APInt & api)
2696 assert(api.getBitWidth()==32);
2697 uint32_t i = (uint32_t)*api.getRawData();
2698 uint32_t myexponent = (i >> 23) & 0xff;
2699 uint32_t mysignificand = i & 0x7fffff;
2701 initialize(&APFloat::IEEEsingle);
2702 assert(partCount()==1);
2705 if (myexponent==0 && mysignificand==0) {
2706 // exponent, significand meaningless
2708 } else if (myexponent==0xff && mysignificand==0) {
2709 // exponent, significand meaningless
2710 category = fcInfinity;
2711 } else if (myexponent==0xff && mysignificand!=0) {
2712 // sign, exponent, significand meaningless
2714 *significandParts() = mysignificand;
2716 category = fcNormal;
2717 exponent = myexponent - 127; //bias
2718 *significandParts() = mysignificand;
2719 if (myexponent==0) // denormal
2722 *significandParts() |= 0x800000; // integer bit
2726 /// Treat api as containing the bits of a floating point number. Currently
2727 /// we infer the floating point type from the size of the APInt. The
2728 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
2729 /// when the size is anything else).
2731 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
2733 if (api.getBitWidth() == 32)
2734 return initFromFloatAPInt(api);
2735 else if (api.getBitWidth()==64)
2736 return initFromDoubleAPInt(api);
2737 else if (api.getBitWidth()==80)
2738 return initFromF80LongDoubleAPInt(api);
2739 else if (api.getBitWidth()==128 && !isIEEE)
2740 return initFromPPCDoubleDoubleAPInt(api);
2745 APFloat::APFloat(const APInt& api, bool isIEEE)
2747 initFromAPInt(api, isIEEE);
2750 APFloat::APFloat(float f)
2752 APInt api = APInt(32, 0);
2753 initFromAPInt(api.floatToBits(f));
2756 APFloat::APFloat(double d)
2758 APInt api = APInt(64, 0);
2759 initFromAPInt(api.doubleToBits(d));