1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Neil Booth and is distributed under the
6 // University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
18 #include "llvm/Support/MathExtras.h"
22 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
24 /* Assumed in hexadecimal significand parsing, and conversion to
25 hexadecimal strings. */
26 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
30 /* Represents floating point arithmetic semantics. */
32 /* The largest E such that 2^E is representable; this matches the
33 definition of IEEE 754. */
34 exponent_t maxExponent;
36 /* The smallest E such that 2^E is a normalized number; this
37 matches the definition of IEEE 754. */
38 exponent_t minExponent;
40 /* Number of bits in the significand. This includes the integer
42 unsigned int precision;
44 /* True if arithmetic is supported. */
45 unsigned int arithmeticOK;
48 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
49 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
50 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
51 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
52 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
54 // The PowerPC format consists of two doubles. It does not map cleanly
55 // onto the usual format above. For now only storage of constants of
56 // this type is supported, no arithmetic.
57 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
59 /* A tight upper bound on number of parts required to hold the value
62 power * 815 / (351 * integerPartWidth) + 1
64 However, whilst the result may require only this many parts,
65 because we are multiplying two values to get it, the
66 multiplication may require an extra part with the excess part
67 being zero (consider the trivial case of 1 * 1, tcFullMultiply
68 requires two parts to hold the single-part result). So we add an
69 extra one to guarantee enough space whilst multiplying. */
70 const unsigned int maxExponent = 16383;
71 const unsigned int maxPrecision = 113;
72 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
73 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
74 / (351 * integerPartWidth));
77 /* Put a bunch of private, handy routines in an anonymous namespace. */
81 partCountForBits(unsigned int bits)
83 return ((bits) + integerPartWidth - 1) / integerPartWidth;
86 /* Returns 0U-9U. Return values >= 10U are not digits. */
88 decDigitValue(unsigned int c)
94 hexDigitValue(unsigned int c)
114 assertArithmeticOK(const llvm::fltSemantics &semantics) {
115 assert(semantics.arithmeticOK
116 && "Compile-time arithmetic does not support these semantics");
119 /* Return the value of a decimal exponent of the form
122 If the exponent overflows, returns a large exponent with the
125 readExponent(const char *p)
128 unsigned int absExponent;
129 const unsigned int overlargeExponent = 24000; /* FIXME. */
131 isNegative = (*p == '-');
132 if (*p == '-' || *p == '+')
135 absExponent = decDigitValue(*p++);
136 assert (absExponent < 10U);
141 value = decDigitValue(*p);
146 value += absExponent * 10;
147 if (absExponent >= overlargeExponent) {
148 absExponent = overlargeExponent;
155 return -(int) absExponent;
157 return (int) absExponent;
160 /* This is ugly and needs cleaning up, but I don't immediately see
161 how whilst remaining safe. */
163 totalExponent(const char *p, int exponentAdjustment)
165 integerPart unsignedExponent;
166 bool negative, overflow;
169 /* Move past the exponent letter and sign to the digits. */
171 negative = *p == '-';
172 if(*p == '-' || *p == '+')
175 unsignedExponent = 0;
180 value = decDigitValue(*p);
185 unsignedExponent = unsignedExponent * 10 + value;
186 if(unsignedExponent > 65535)
190 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
194 exponent = unsignedExponent;
196 exponent = -exponent;
197 exponent += exponentAdjustment;
198 if(exponent > 65535 || exponent < -65536)
203 exponent = negative ? -65536: 65535;
209 skipLeadingZeroesAndAnyDot(const char *p, const char **dot)
224 /* Given a normal decimal floating point number of the form
228 where the decimal point and exponent are optional, fill out the
229 structure D. Exponent is appropriate if the significand is
230 treated as an integer, and normalizedExponent if the significand
231 is taken to have the decimal point after a single leading
234 If the value is zero, V->firstSigDigit points to a non-digit, and
235 the return exponent is zero.
238 const char *firstSigDigit;
239 const char *lastSigDigit;
241 int normalizedExponent;
245 interpretDecimal(const char *p, decimalInfo *D)
249 p = skipLeadingZeroesAndAnyDot (p, &dot);
251 D->firstSigDigit = p;
253 D->normalizedExponent = 0;
260 if (decDigitValue(*p) >= 10U)
265 /* If number is all zerooes accept any exponent. */
266 if (p != D->firstSigDigit) {
267 if (*p == 'e' || *p == 'E')
268 D->exponent = readExponent(p + 1);
270 /* Implied decimal point? */
274 /* Drop insignificant trailing zeroes. */
281 /* Adjust the exponents for any decimal point. */
282 D->exponent += (dot - p) - (dot > p);
283 D->normalizedExponent = (D->exponent + (p - D->firstSigDigit)
284 - (dot > D->firstSigDigit && dot < p));
290 /* Return the trailing fraction of a hexadecimal number.
291 DIGITVALUE is the first hex digit of the fraction, P points to
294 trailingHexadecimalFraction(const char *p, unsigned int digitValue)
296 unsigned int hexDigit;
298 /* If the first trailing digit isn't 0 or 8 we can work out the
299 fraction immediately. */
301 return lfMoreThanHalf;
302 else if(digitValue < 8 && digitValue > 0)
303 return lfLessThanHalf;
305 /* Otherwise we need to find the first non-zero digit. */
309 hexDigit = hexDigitValue(*p);
311 /* If we ran off the end it is exactly zero or one-half, otherwise
314 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
316 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
319 /* Return the fraction lost were a bignum truncated losing the least
320 significant BITS bits. */
322 lostFractionThroughTruncation(const integerPart *parts,
323 unsigned int partCount,
328 lsb = APInt::tcLSB(parts, partCount);
330 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
332 return lfExactlyZero;
334 return lfExactlyHalf;
335 if(bits <= partCount * integerPartWidth
336 && APInt::tcExtractBit(parts, bits - 1))
337 return lfMoreThanHalf;
339 return lfLessThanHalf;
342 /* Shift DST right BITS bits noting lost fraction. */
344 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
346 lostFraction lost_fraction;
348 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
350 APInt::tcShiftRight(dst, parts, bits);
352 return lost_fraction;
355 /* Combine the effect of two lost fractions. */
357 combineLostFractions(lostFraction moreSignificant,
358 lostFraction lessSignificant)
360 if(lessSignificant != lfExactlyZero) {
361 if(moreSignificant == lfExactlyZero)
362 moreSignificant = lfLessThanHalf;
363 else if(moreSignificant == lfExactlyHalf)
364 moreSignificant = lfMoreThanHalf;
367 return moreSignificant;
370 /* The error from the true value, in half-ulps, on multiplying two
371 floating point numbers, which differ from the value they
372 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
373 than the returned value.
375 See "How to Read Floating Point Numbers Accurately" by William D
378 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
380 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
382 if (HUerr1 + HUerr2 == 0)
383 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
385 return inexactMultiply + 2 * (HUerr1 + HUerr2);
388 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
389 when the least significant BITS are truncated. BITS cannot be
392 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
394 unsigned int count, partBits;
395 integerPart part, boundary;
400 count = bits / integerPartWidth;
401 partBits = bits % integerPartWidth + 1;
403 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
406 boundary = (integerPart) 1 << (partBits - 1);
411 if (part - boundary <= boundary - part)
412 return part - boundary;
414 return boundary - part;
417 if (part == boundary) {
420 return ~(integerPart) 0; /* A lot. */
423 } else if (part == boundary - 1) {
426 return ~(integerPart) 0; /* A lot. */
431 return ~(integerPart) 0; /* A lot. */
434 /* Place pow(5, power) in DST, and return the number of parts used.
435 DST must be at least one part larger than size of the answer. */
437 powerOf5(integerPart *dst, unsigned int power)
439 static integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
441 static integerPart pow5s[maxPowerOfFiveParts * 2 + 5] = { 78125 * 5 };
442 static unsigned int partsCount[16] = { 1 };
444 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
447 assert(power <= maxExponent);
452 *p1 = firstEightPowers[power & 7];
458 for (unsigned int n = 0; power; power >>= 1, n++) {
463 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
465 pc = partsCount[n - 1];
466 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
468 if (pow5[pc - 1] == 0)
476 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
478 if (p2[result - 1] == 0)
481 /* Now result is in p1 with partsCount parts and p2 is scratch
483 tmp = p1, p1 = p2, p2 = tmp;
490 APInt::tcAssign(dst, p1, result);
495 /* Zero at the end to avoid modular arithmetic when adding one; used
496 when rounding up during hexadecimal output. */
497 static const char hexDigitsLower[] = "0123456789abcdef0";
498 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
499 static const char infinityL[] = "infinity";
500 static const char infinityU[] = "INFINITY";
501 static const char NaNL[] = "nan";
502 static const char NaNU[] = "NAN";
504 /* Write out an integerPart in hexadecimal, starting with the most
505 significant nibble. Write out exactly COUNT hexdigits, return
508 partAsHex (char *dst, integerPart part, unsigned int count,
509 const char *hexDigitChars)
511 unsigned int result = count;
513 assert (count != 0 && count <= integerPartWidth / 4);
515 part >>= (integerPartWidth - 4 * count);
517 dst[count] = hexDigitChars[part & 0xf];
524 /* Write out an unsigned decimal integer. */
526 writeUnsignedDecimal (char *dst, unsigned int n)
542 /* Write out a signed decimal integer. */
544 writeSignedDecimal (char *dst, int value)
548 dst = writeUnsignedDecimal(dst, -(unsigned) value);
550 dst = writeUnsignedDecimal(dst, value);
558 APFloat::initialize(const fltSemantics *ourSemantics)
562 semantics = ourSemantics;
565 significand.parts = new integerPart[count];
569 APFloat::freeSignificand()
572 delete [] significand.parts;
576 APFloat::assign(const APFloat &rhs)
578 assert(semantics == rhs.semantics);
581 category = rhs.category;
582 exponent = rhs.exponent;
584 exponent2 = rhs.exponent2;
585 if(category == fcNormal || category == fcNaN)
586 copySignificand(rhs);
590 APFloat::copySignificand(const APFloat &rhs)
592 assert(category == fcNormal || category == fcNaN);
593 assert(rhs.partCount() >= partCount());
595 APInt::tcAssign(significandParts(), rhs.significandParts(),
599 /* Make this number a NaN, with an arbitrary but deterministic value
600 for the significand. */
602 APFloat::makeNaN(void)
605 APInt::tcSet(significandParts(), ~0U, partCount());
609 APFloat::operator=(const APFloat &rhs)
612 if(semantics != rhs.semantics) {
614 initialize(rhs.semantics);
623 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
626 if (semantics != rhs.semantics ||
627 category != rhs.category ||
630 if (semantics==(const llvm::fltSemantics* const)&PPCDoubleDouble &&
633 if (category==fcZero || category==fcInfinity)
635 else if (category==fcNormal && exponent!=rhs.exponent)
637 else if (semantics==(const llvm::fltSemantics* const)&PPCDoubleDouble &&
638 exponent2!=rhs.exponent2)
642 const integerPart* p=significandParts();
643 const integerPart* q=rhs.significandParts();
644 for (; i>0; i--, p++, q++) {
652 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
654 assertArithmeticOK(ourSemantics);
655 initialize(&ourSemantics);
658 exponent = ourSemantics.precision - 1;
659 significandParts()[0] = value;
660 normalize(rmNearestTiesToEven, lfExactlyZero);
663 APFloat::APFloat(const fltSemantics &ourSemantics,
664 fltCategory ourCategory, bool negative)
666 assertArithmeticOK(ourSemantics);
667 initialize(&ourSemantics);
668 category = ourCategory;
670 if(category == fcNormal)
672 else if (ourCategory == fcNaN)
676 APFloat::APFloat(const fltSemantics &ourSemantics, const char *text)
678 assertArithmeticOK(ourSemantics);
679 initialize(&ourSemantics);
680 convertFromString(text, rmNearestTiesToEven);
683 APFloat::APFloat(const APFloat &rhs)
685 initialize(rhs.semantics);
695 APFloat::partCount() const
697 return partCountForBits(semantics->precision + 1);
701 APFloat::semanticsPrecision(const fltSemantics &semantics)
703 return semantics.precision;
707 APFloat::significandParts() const
709 return const_cast<APFloat *>(this)->significandParts();
713 APFloat::significandParts()
715 assert(category == fcNormal || category == fcNaN);
718 return significand.parts;
720 return &significand.part;
724 APFloat::zeroSignificand()
727 APInt::tcSet(significandParts(), 0, partCount());
730 /* Increment an fcNormal floating point number's significand. */
732 APFloat::incrementSignificand()
736 carry = APInt::tcIncrement(significandParts(), partCount());
738 /* Our callers should never cause us to overflow. */
742 /* Add the significand of the RHS. Returns the carry flag. */
744 APFloat::addSignificand(const APFloat &rhs)
748 parts = significandParts();
750 assert(semantics == rhs.semantics);
751 assert(exponent == rhs.exponent);
753 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
756 /* Subtract the significand of the RHS with a borrow flag. Returns
759 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
763 parts = significandParts();
765 assert(semantics == rhs.semantics);
766 assert(exponent == rhs.exponent);
768 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
772 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
773 on to the full-precision result of the multiplication. Returns the
776 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
778 unsigned int omsb; // One, not zero, based MSB.
779 unsigned int partsCount, newPartsCount, precision;
780 integerPart *lhsSignificand;
781 integerPart scratch[4];
782 integerPart *fullSignificand;
783 lostFraction lost_fraction;
785 assert(semantics == rhs.semantics);
787 precision = semantics->precision;
788 newPartsCount = partCountForBits(precision * 2);
790 if(newPartsCount > 4)
791 fullSignificand = new integerPart[newPartsCount];
793 fullSignificand = scratch;
795 lhsSignificand = significandParts();
796 partsCount = partCount();
798 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
799 rhs.significandParts(), partsCount, partsCount);
801 lost_fraction = lfExactlyZero;
802 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
803 exponent += rhs.exponent;
806 Significand savedSignificand = significand;
807 const fltSemantics *savedSemantics = semantics;
808 fltSemantics extendedSemantics;
810 unsigned int extendedPrecision;
812 /* Normalize our MSB. */
813 extendedPrecision = precision + precision - 1;
814 if(omsb != extendedPrecision)
816 APInt::tcShiftLeft(fullSignificand, newPartsCount,
817 extendedPrecision - omsb);
818 exponent -= extendedPrecision - omsb;
821 /* Create new semantics. */
822 extendedSemantics = *semantics;
823 extendedSemantics.precision = extendedPrecision;
825 if(newPartsCount == 1)
826 significand.part = fullSignificand[0];
828 significand.parts = fullSignificand;
829 semantics = &extendedSemantics;
831 APFloat extendedAddend(*addend);
832 status = extendedAddend.convert(extendedSemantics, rmTowardZero);
833 assert(status == opOK);
834 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
836 /* Restore our state. */
837 if(newPartsCount == 1)
838 fullSignificand[0] = significand.part;
839 significand = savedSignificand;
840 semantics = savedSemantics;
842 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
845 exponent -= (precision - 1);
847 if(omsb > precision) {
848 unsigned int bits, significantParts;
851 bits = omsb - precision;
852 significantParts = partCountForBits(omsb);
853 lf = shiftRight(fullSignificand, significantParts, bits);
854 lost_fraction = combineLostFractions(lf, lost_fraction);
858 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
860 if(newPartsCount > 4)
861 delete [] fullSignificand;
863 return lost_fraction;
866 /* Multiply the significands of LHS and RHS to DST. */
868 APFloat::divideSignificand(const APFloat &rhs)
870 unsigned int bit, i, partsCount;
871 const integerPart *rhsSignificand;
872 integerPart *lhsSignificand, *dividend, *divisor;
873 integerPart scratch[4];
874 lostFraction lost_fraction;
876 assert(semantics == rhs.semantics);
878 lhsSignificand = significandParts();
879 rhsSignificand = rhs.significandParts();
880 partsCount = partCount();
883 dividend = new integerPart[partsCount * 2];
887 divisor = dividend + partsCount;
889 /* Copy the dividend and divisor as they will be modified in-place. */
890 for(i = 0; i < partsCount; i++) {
891 dividend[i] = lhsSignificand[i];
892 divisor[i] = rhsSignificand[i];
893 lhsSignificand[i] = 0;
896 exponent -= rhs.exponent;
898 unsigned int precision = semantics->precision;
900 /* Normalize the divisor. */
901 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
904 APInt::tcShiftLeft(divisor, partsCount, bit);
907 /* Normalize the dividend. */
908 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
911 APInt::tcShiftLeft(dividend, partsCount, bit);
914 /* Ensure the dividend >= divisor initially for the loop below.
915 Incidentally, this means that the division loop below is
916 guaranteed to set the integer bit to one. */
917 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
919 APInt::tcShiftLeft(dividend, partsCount, 1);
920 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
924 for(bit = precision; bit; bit -= 1) {
925 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
926 APInt::tcSubtract(dividend, divisor, 0, partsCount);
927 APInt::tcSetBit(lhsSignificand, bit - 1);
930 APInt::tcShiftLeft(dividend, partsCount, 1);
933 /* Figure out the lost fraction. */
934 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
937 lost_fraction = lfMoreThanHalf;
939 lost_fraction = lfExactlyHalf;
940 else if(APInt::tcIsZero(dividend, partsCount))
941 lost_fraction = lfExactlyZero;
943 lost_fraction = lfLessThanHalf;
948 return lost_fraction;
952 APFloat::significandMSB() const
954 return APInt::tcMSB(significandParts(), partCount());
958 APFloat::significandLSB() const
960 return APInt::tcLSB(significandParts(), partCount());
963 /* Note that a zero result is NOT normalized to fcZero. */
965 APFloat::shiftSignificandRight(unsigned int bits)
967 /* Our exponent should not overflow. */
968 assert((exponent_t) (exponent + bits) >= exponent);
972 return shiftRight(significandParts(), partCount(), bits);
975 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
977 APFloat::shiftSignificandLeft(unsigned int bits)
979 assert(bits < semantics->precision);
982 unsigned int partsCount = partCount();
984 APInt::tcShiftLeft(significandParts(), partsCount, bits);
987 assert(!APInt::tcIsZero(significandParts(), partsCount));
992 APFloat::compareAbsoluteValue(const APFloat &rhs) const
996 assert(semantics == rhs.semantics);
997 assert(category == fcNormal);
998 assert(rhs.category == fcNormal);
1000 compare = exponent - rhs.exponent;
1002 /* If exponents are equal, do an unsigned bignum comparison of the
1005 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1009 return cmpGreaterThan;
1010 else if(compare < 0)
1016 /* Handle overflow. Sign is preserved. We either become infinity or
1017 the largest finite number. */
1019 APFloat::handleOverflow(roundingMode rounding_mode)
1022 if(rounding_mode == rmNearestTiesToEven
1023 || rounding_mode == rmNearestTiesToAway
1024 || (rounding_mode == rmTowardPositive && !sign)
1025 || (rounding_mode == rmTowardNegative && sign))
1027 category = fcInfinity;
1028 return (opStatus) (opOverflow | opInexact);
1031 /* Otherwise we become the largest finite number. */
1032 category = fcNormal;
1033 exponent = semantics->maxExponent;
1034 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1035 semantics->precision);
1040 /* Returns TRUE if, when truncating the current number, with BIT the
1041 new LSB, with the given lost fraction and rounding mode, the result
1042 would need to be rounded away from zero (i.e., by increasing the
1043 signficand). This routine must work for fcZero of both signs, and
1044 fcNormal numbers. */
1046 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1047 lostFraction lost_fraction,
1048 unsigned int bit) const
1050 /* NaNs and infinities should not have lost fractions. */
1051 assert(category == fcNormal || category == fcZero);
1053 /* Current callers never pass this so we don't handle it. */
1054 assert(lost_fraction != lfExactlyZero);
1056 switch(rounding_mode) {
1060 case rmNearestTiesToAway:
1061 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1063 case rmNearestTiesToEven:
1064 if(lost_fraction == lfMoreThanHalf)
1067 /* Our zeroes don't have a significand to test. */
1068 if(lost_fraction == lfExactlyHalf && category != fcZero)
1069 return APInt::tcExtractBit(significandParts(), bit);
1076 case rmTowardPositive:
1077 return sign == false;
1079 case rmTowardNegative:
1080 return sign == true;
1085 APFloat::normalize(roundingMode rounding_mode,
1086 lostFraction lost_fraction)
1088 unsigned int omsb; /* One, not zero, based MSB. */
1091 if(category != fcNormal)
1094 /* Before rounding normalize the exponent of fcNormal numbers. */
1095 omsb = significandMSB() + 1;
1098 /* OMSB is numbered from 1. We want to place it in the integer
1099 bit numbered PRECISON if possible, with a compensating change in
1101 exponentChange = omsb - semantics->precision;
1103 /* If the resulting exponent is too high, overflow according to
1104 the rounding mode. */
1105 if(exponent + exponentChange > semantics->maxExponent)
1106 return handleOverflow(rounding_mode);
1108 /* Subnormal numbers have exponent minExponent, and their MSB
1109 is forced based on that. */
1110 if(exponent + exponentChange < semantics->minExponent)
1111 exponentChange = semantics->minExponent - exponent;
1113 /* Shifting left is easy as we don't lose precision. */
1114 if(exponentChange < 0) {
1115 assert(lost_fraction == lfExactlyZero);
1117 shiftSignificandLeft(-exponentChange);
1122 if(exponentChange > 0) {
1125 /* Shift right and capture any new lost fraction. */
1126 lf = shiftSignificandRight(exponentChange);
1128 lost_fraction = combineLostFractions(lf, lost_fraction);
1130 /* Keep OMSB up-to-date. */
1131 if(omsb > (unsigned) exponentChange)
1132 omsb -= exponentChange;
1138 /* Now round the number according to rounding_mode given the lost
1141 /* As specified in IEEE 754, since we do not trap we do not report
1142 underflow for exact results. */
1143 if(lost_fraction == lfExactlyZero) {
1144 /* Canonicalize zeroes. */
1151 /* Increment the significand if we're rounding away from zero. */
1152 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1154 exponent = semantics->minExponent;
1156 incrementSignificand();
1157 omsb = significandMSB() + 1;
1159 /* Did the significand increment overflow? */
1160 if(omsb == (unsigned) semantics->precision + 1) {
1161 /* Renormalize by incrementing the exponent and shifting our
1162 significand right one. However if we already have the
1163 maximum exponent we overflow to infinity. */
1164 if(exponent == semantics->maxExponent) {
1165 category = fcInfinity;
1167 return (opStatus) (opOverflow | opInexact);
1170 shiftSignificandRight(1);
1176 /* The normal case - we were and are not denormal, and any
1177 significand increment above didn't overflow. */
1178 if(omsb == semantics->precision)
1181 /* We have a non-zero denormal. */
1182 assert(omsb < semantics->precision);
1184 /* Canonicalize zeroes. */
1188 /* The fcZero case is a denormal that underflowed to zero. */
1189 return (opStatus) (opUnderflow | opInexact);
1193 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1195 switch(convolve(category, rhs.category)) {
1199 case convolve(fcNaN, fcZero):
1200 case convolve(fcNaN, fcNormal):
1201 case convolve(fcNaN, fcInfinity):
1202 case convolve(fcNaN, fcNaN):
1203 case convolve(fcNormal, fcZero):
1204 case convolve(fcInfinity, fcNormal):
1205 case convolve(fcInfinity, fcZero):
1208 case convolve(fcZero, fcNaN):
1209 case convolve(fcNormal, fcNaN):
1210 case convolve(fcInfinity, fcNaN):
1212 copySignificand(rhs);
1215 case convolve(fcNormal, fcInfinity):
1216 case convolve(fcZero, fcInfinity):
1217 category = fcInfinity;
1218 sign = rhs.sign ^ subtract;
1221 case convolve(fcZero, fcNormal):
1223 sign = rhs.sign ^ subtract;
1226 case convolve(fcZero, fcZero):
1227 /* Sign depends on rounding mode; handled by caller. */
1230 case convolve(fcInfinity, fcInfinity):
1231 /* Differently signed infinities can only be validly
1233 if((sign ^ rhs.sign) != subtract) {
1240 case convolve(fcNormal, fcNormal):
1245 /* Add or subtract two normal numbers. */
1247 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1250 lostFraction lost_fraction;
1253 /* Determine if the operation on the absolute values is effectively
1254 an addition or subtraction. */
1255 subtract ^= (sign ^ rhs.sign) ? true : false;
1257 /* Are we bigger exponent-wise than the RHS? */
1258 bits = exponent - rhs.exponent;
1260 /* Subtraction is more subtle than one might naively expect. */
1262 APFloat temp_rhs(rhs);
1266 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1267 lost_fraction = lfExactlyZero;
1268 } else if (bits > 0) {
1269 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1270 shiftSignificandLeft(1);
1273 lost_fraction = shiftSignificandRight(-bits - 1);
1274 temp_rhs.shiftSignificandLeft(1);
1279 carry = temp_rhs.subtractSignificand
1280 (*this, lost_fraction != lfExactlyZero);
1281 copySignificand(temp_rhs);
1284 carry = subtractSignificand
1285 (temp_rhs, lost_fraction != lfExactlyZero);
1288 /* Invert the lost fraction - it was on the RHS and
1290 if(lost_fraction == lfLessThanHalf)
1291 lost_fraction = lfMoreThanHalf;
1292 else if(lost_fraction == lfMoreThanHalf)
1293 lost_fraction = lfLessThanHalf;
1295 /* The code above is intended to ensure that no borrow is
1300 APFloat temp_rhs(rhs);
1302 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1303 carry = addSignificand(temp_rhs);
1305 lost_fraction = shiftSignificandRight(-bits);
1306 carry = addSignificand(rhs);
1309 /* We have a guard bit; generating a carry cannot happen. */
1313 return lost_fraction;
1317 APFloat::multiplySpecials(const APFloat &rhs)
1319 switch(convolve(category, rhs.category)) {
1323 case convolve(fcNaN, fcZero):
1324 case convolve(fcNaN, fcNormal):
1325 case convolve(fcNaN, fcInfinity):
1326 case convolve(fcNaN, fcNaN):
1329 case convolve(fcZero, fcNaN):
1330 case convolve(fcNormal, fcNaN):
1331 case convolve(fcInfinity, fcNaN):
1333 copySignificand(rhs);
1336 case convolve(fcNormal, fcInfinity):
1337 case convolve(fcInfinity, fcNormal):
1338 case convolve(fcInfinity, fcInfinity):
1339 category = fcInfinity;
1342 case convolve(fcZero, fcNormal):
1343 case convolve(fcNormal, fcZero):
1344 case convolve(fcZero, fcZero):
1348 case convolve(fcZero, fcInfinity):
1349 case convolve(fcInfinity, fcZero):
1353 case convolve(fcNormal, fcNormal):
1359 APFloat::divideSpecials(const APFloat &rhs)
1361 switch(convolve(category, rhs.category)) {
1365 case convolve(fcNaN, fcZero):
1366 case convolve(fcNaN, fcNormal):
1367 case convolve(fcNaN, fcInfinity):
1368 case convolve(fcNaN, fcNaN):
1369 case convolve(fcInfinity, fcZero):
1370 case convolve(fcInfinity, fcNormal):
1371 case convolve(fcZero, fcInfinity):
1372 case convolve(fcZero, fcNormal):
1375 case convolve(fcZero, fcNaN):
1376 case convolve(fcNormal, fcNaN):
1377 case convolve(fcInfinity, fcNaN):
1379 copySignificand(rhs);
1382 case convolve(fcNormal, fcInfinity):
1386 case convolve(fcNormal, fcZero):
1387 category = fcInfinity;
1390 case convolve(fcInfinity, fcInfinity):
1391 case convolve(fcZero, fcZero):
1395 case convolve(fcNormal, fcNormal):
1402 APFloat::changeSign()
1404 /* Look mummy, this one's easy. */
1409 APFloat::clearSign()
1411 /* So is this one. */
1416 APFloat::copySign(const APFloat &rhs)
1422 /* Normalized addition or subtraction. */
1424 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1429 assertArithmeticOK(*semantics);
1431 fs = addOrSubtractSpecials(rhs, subtract);
1433 /* This return code means it was not a simple case. */
1434 if(fs == opDivByZero) {
1435 lostFraction lost_fraction;
1437 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1438 fs = normalize(rounding_mode, lost_fraction);
1440 /* Can only be zero if we lost no fraction. */
1441 assert(category != fcZero || lost_fraction == lfExactlyZero);
1444 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1445 positive zero unless rounding to minus infinity, except that
1446 adding two like-signed zeroes gives that zero. */
1447 if(category == fcZero) {
1448 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1449 sign = (rounding_mode == rmTowardNegative);
1455 /* Normalized addition. */
1457 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1459 return addOrSubtract(rhs, rounding_mode, false);
1462 /* Normalized subtraction. */
1464 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1466 return addOrSubtract(rhs, rounding_mode, true);
1469 /* Normalized multiply. */
1471 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1475 assertArithmeticOK(*semantics);
1477 fs = multiplySpecials(rhs);
1479 if(category == fcNormal) {
1480 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1481 fs = normalize(rounding_mode, lost_fraction);
1482 if(lost_fraction != lfExactlyZero)
1483 fs = (opStatus) (fs | opInexact);
1489 /* Normalized divide. */
1491 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1495 assertArithmeticOK(*semantics);
1497 fs = divideSpecials(rhs);
1499 if(category == fcNormal) {
1500 lostFraction lost_fraction = divideSignificand(rhs);
1501 fs = normalize(rounding_mode, lost_fraction);
1502 if(lost_fraction != lfExactlyZero)
1503 fs = (opStatus) (fs | opInexact);
1509 /* Normalized remainder. This is not currently doing TRT. */
1511 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1515 unsigned int origSign = sign;
1517 assertArithmeticOK(*semantics);
1518 fs = V.divide(rhs, rmNearestTiesToEven);
1519 if (fs == opDivByZero)
1522 int parts = partCount();
1523 integerPart *x = new integerPart[parts];
1524 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1525 rmNearestTiesToEven);
1526 if (fs==opInvalidOp)
1529 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1530 rmNearestTiesToEven);
1531 assert(fs==opOK); // should always work
1533 fs = V.multiply(rhs, rounding_mode);
1534 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1536 fs = subtract(V, rounding_mode);
1537 assert(fs==opOK || fs==opInexact); // likewise
1540 sign = origSign; // IEEE754 requires this
1545 /* Normalized fused-multiply-add. */
1547 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1548 const APFloat &addend,
1549 roundingMode rounding_mode)
1553 assertArithmeticOK(*semantics);
1555 /* Post-multiplication sign, before addition. */
1556 sign ^= multiplicand.sign;
1558 /* If and only if all arguments are normal do we need to do an
1559 extended-precision calculation. */
1560 if(category == fcNormal
1561 && multiplicand.category == fcNormal
1562 && addend.category == fcNormal) {
1563 lostFraction lost_fraction;
1565 lost_fraction = multiplySignificand(multiplicand, &addend);
1566 fs = normalize(rounding_mode, lost_fraction);
1567 if(lost_fraction != lfExactlyZero)
1568 fs = (opStatus) (fs | opInexact);
1570 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1571 positive zero unless rounding to minus infinity, except that
1572 adding two like-signed zeroes gives that zero. */
1573 if(category == fcZero && sign != addend.sign)
1574 sign = (rounding_mode == rmTowardNegative);
1576 fs = multiplySpecials(multiplicand);
1578 /* FS can only be opOK or opInvalidOp. There is no more work
1579 to do in the latter case. The IEEE-754R standard says it is
1580 implementation-defined in this case whether, if ADDEND is a
1581 quiet NaN, we raise invalid op; this implementation does so.
1583 If we need to do the addition we can do so with normal
1586 fs = addOrSubtract(addend, rounding_mode, false);
1592 /* Comparison requires normalized numbers. */
1594 APFloat::compare(const APFloat &rhs) const
1598 assertArithmeticOK(*semantics);
1599 assert(semantics == rhs.semantics);
1601 switch(convolve(category, rhs.category)) {
1605 case convolve(fcNaN, fcZero):
1606 case convolve(fcNaN, fcNormal):
1607 case convolve(fcNaN, fcInfinity):
1608 case convolve(fcNaN, fcNaN):
1609 case convolve(fcZero, fcNaN):
1610 case convolve(fcNormal, fcNaN):
1611 case convolve(fcInfinity, fcNaN):
1612 return cmpUnordered;
1614 case convolve(fcInfinity, fcNormal):
1615 case convolve(fcInfinity, fcZero):
1616 case convolve(fcNormal, fcZero):
1620 return cmpGreaterThan;
1622 case convolve(fcNormal, fcInfinity):
1623 case convolve(fcZero, fcInfinity):
1624 case convolve(fcZero, fcNormal):
1626 return cmpGreaterThan;
1630 case convolve(fcInfinity, fcInfinity):
1631 if(sign == rhs.sign)
1636 return cmpGreaterThan;
1638 case convolve(fcZero, fcZero):
1641 case convolve(fcNormal, fcNormal):
1645 /* Two normal numbers. Do they have the same sign? */
1646 if(sign != rhs.sign) {
1648 result = cmpLessThan;
1650 result = cmpGreaterThan;
1652 /* Compare absolute values; invert result if negative. */
1653 result = compareAbsoluteValue(rhs);
1656 if(result == cmpLessThan)
1657 result = cmpGreaterThan;
1658 else if(result == cmpGreaterThan)
1659 result = cmpLessThan;
1667 APFloat::convert(const fltSemantics &toSemantics,
1668 roundingMode rounding_mode)
1670 lostFraction lostFraction;
1671 unsigned int newPartCount, oldPartCount;
1674 assertArithmeticOK(*semantics);
1675 lostFraction = lfExactlyZero;
1676 newPartCount = partCountForBits(toSemantics.precision + 1);
1677 oldPartCount = partCount();
1679 /* Handle storage complications. If our new form is wider,
1680 re-allocate our bit pattern into wider storage. If it is
1681 narrower, we ignore the excess parts, but if narrowing to a
1682 single part we need to free the old storage.
1683 Be careful not to reference significandParts for zeroes
1684 and infinities, since it aborts. */
1685 if (newPartCount > oldPartCount) {
1686 integerPart *newParts;
1687 newParts = new integerPart[newPartCount];
1688 APInt::tcSet(newParts, 0, newPartCount);
1689 if (category==fcNormal || category==fcNaN)
1690 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1692 significand.parts = newParts;
1693 } else if (newPartCount < oldPartCount) {
1694 /* Capture any lost fraction through truncation of parts so we get
1695 correct rounding whilst normalizing. */
1696 if (category==fcNormal)
1697 lostFraction = lostFractionThroughTruncation
1698 (significandParts(), oldPartCount, toSemantics.precision);
1699 if (newPartCount == 1) {
1700 integerPart newPart = 0;
1701 if (category==fcNormal || category==fcNaN)
1702 newPart = significandParts()[0];
1704 significand.part = newPart;
1708 if(category == fcNormal) {
1709 /* Re-interpret our bit-pattern. */
1710 exponent += toSemantics.precision - semantics->precision;
1711 semantics = &toSemantics;
1712 fs = normalize(rounding_mode, lostFraction);
1713 } else if (category == fcNaN) {
1714 int shift = toSemantics.precision - semantics->precision;
1715 // No normalization here, just truncate
1717 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1719 APInt::tcShiftRight(significandParts(), newPartCount, -shift);
1720 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1721 // does not give you back the same bits. This is dubious, and we
1722 // don't currently do it. You're really supposed to get
1723 // an invalid operation signal at runtime, but nobody does that.
1724 semantics = &toSemantics;
1727 semantics = &toSemantics;
1734 /* Convert a floating point number to an integer according to the
1735 rounding mode. If the rounded integer value is out of range this
1736 returns an invalid operation exception and the contents of the
1737 destination parts are unspecified. If the rounded value is in
1738 range but the floating point number is not the exact integer, the C
1739 standard doesn't require an inexact exception to be raised. IEEE
1740 854 does require it so we do that.
1742 Note that for conversions to integer type the C standard requires
1743 round-to-zero to always be used. */
1745 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1747 roundingMode rounding_mode) const
1749 lostFraction lost_fraction;
1750 const integerPart *src;
1751 unsigned int dstPartsCount, truncatedBits;
1753 assertArithmeticOK(*semantics);
1755 /* Handle the three special cases first. */
1756 if(category == fcInfinity || category == fcNaN)
1759 dstPartsCount = partCountForBits(width);
1761 if(category == fcZero) {
1762 APInt::tcSet(parts, 0, dstPartsCount);
1766 src = significandParts();
1768 /* Step 1: place our absolute value, with any fraction truncated, in
1771 /* Our absolute value is less than one; truncate everything. */
1772 APInt::tcSet(parts, 0, dstPartsCount);
1773 truncatedBits = semantics->precision;
1775 /* We want the most significant (exponent + 1) bits; the rest are
1777 unsigned int bits = exponent + 1U;
1779 /* Hopelessly large in magnitude? */
1783 if (bits < semantics->precision) {
1784 /* We truncate (semantics->precision - bits) bits. */
1785 truncatedBits = semantics->precision - bits;
1786 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1788 /* We want at least as many bits as are available. */
1789 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
1790 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
1795 /* Step 2: work out any lost fraction, and increment the absolute
1796 value if we would round away from zero. */
1797 if (truncatedBits) {
1798 lost_fraction = lostFractionThroughTruncation(src, partCount(),
1800 if (lost_fraction != lfExactlyZero
1801 && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
1802 if (APInt::tcIncrement(parts, dstPartsCount))
1803 return opInvalidOp; /* Overflow. */
1806 lost_fraction = lfExactlyZero;
1809 /* Step 3: check if we fit in the destination. */
1810 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
1814 /* Negative numbers cannot be represented as unsigned. */
1818 /* It takes omsb bits to represent the unsigned integer value.
1819 We lose a bit for the sign, but care is needed as the
1820 maximally negative integer is a special case. */
1821 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
1824 /* This case can happen because of rounding. */
1829 APInt::tcNegate (parts, dstPartsCount);
1831 if (omsb >= width + !isSigned)
1835 if (lost_fraction == lfExactlyZero)
1841 /* Same as convertToSignExtendedInteger, except we provide
1842 deterministic values in case of an invalid operation exception,
1843 namely zero for NaNs and the minimal or maximal value respectively
1844 for underflow or overflow. */
1846 APFloat::convertToInteger(integerPart *parts, unsigned int width,
1848 roundingMode rounding_mode) const
1852 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode);
1854 if (fs == opInvalidOp) {
1855 unsigned int bits, dstPartsCount;
1857 dstPartsCount = partCountForBits(width);
1859 if (category == fcNaN)
1864 bits = width - isSigned;
1866 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
1867 if (sign && isSigned)
1868 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
1874 /* Convert an unsigned integer SRC to a floating point number,
1875 rounding according to ROUNDING_MODE. The sign of the floating
1876 point number is not modified. */
1878 APFloat::convertFromUnsignedParts(const integerPart *src,
1879 unsigned int srcCount,
1880 roundingMode rounding_mode)
1882 unsigned int omsb, precision, dstCount;
1884 lostFraction lost_fraction;
1886 assertArithmeticOK(*semantics);
1887 category = fcNormal;
1888 omsb = APInt::tcMSB(src, srcCount) + 1;
1889 dst = significandParts();
1890 dstCount = partCount();
1891 precision = semantics->precision;
1893 /* We want the most significant PRECISON bits of SRC. There may not
1894 be that many; extract what we can. */
1895 if (precision <= omsb) {
1896 exponent = omsb - 1;
1897 lost_fraction = lostFractionThroughTruncation(src, srcCount,
1899 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
1901 exponent = precision - 1;
1902 lost_fraction = lfExactlyZero;
1903 APInt::tcExtract(dst, dstCount, src, omsb, 0);
1906 return normalize(rounding_mode, lost_fraction);
1909 /* Convert a two's complement integer SRC to a floating point number,
1910 rounding according to ROUNDING_MODE. ISSIGNED is true if the
1911 integer is signed, in which case it must be sign-extended. */
1913 APFloat::convertFromSignExtendedInteger(const integerPart *src,
1914 unsigned int srcCount,
1916 roundingMode rounding_mode)
1920 assertArithmeticOK(*semantics);
1922 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
1925 /* If we're signed and negative negate a copy. */
1927 copy = new integerPart[srcCount];
1928 APInt::tcAssign(copy, src, srcCount);
1929 APInt::tcNegate(copy, srcCount);
1930 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
1934 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
1940 /* FIXME: should this just take a const APInt reference? */
1942 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
1943 unsigned int width, bool isSigned,
1944 roundingMode rounding_mode)
1946 unsigned int partCount = partCountForBits(width);
1947 APInt api = APInt(width, partCount, parts);
1950 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
1955 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
1959 APFloat::convertFromHexadecimalString(const char *p,
1960 roundingMode rounding_mode)
1962 lostFraction lost_fraction;
1963 integerPart *significand;
1964 unsigned int bitPos, partsCount;
1965 const char *dot, *firstSignificantDigit;
1969 category = fcNormal;
1971 significand = significandParts();
1972 partsCount = partCount();
1973 bitPos = partsCount * integerPartWidth;
1975 /* Skip leading zeroes and any (hexa)decimal point. */
1976 p = skipLeadingZeroesAndAnyDot(p, &dot);
1977 firstSignificantDigit = p;
1980 integerPart hex_value;
1987 hex_value = hexDigitValue(*p);
1988 if(hex_value == -1U) {
1989 lost_fraction = lfExactlyZero;
1995 /* Store the number whilst 4-bit nibbles remain. */
1998 hex_value <<= bitPos % integerPartWidth;
1999 significand[bitPos / integerPartWidth] |= hex_value;
2001 lost_fraction = trailingHexadecimalFraction(p, hex_value);
2002 while(hexDigitValue(*p) != -1U)
2008 /* Hex floats require an exponent but not a hexadecimal point. */
2009 assert(*p == 'p' || *p == 'P');
2011 /* Ignore the exponent if we are zero. */
2012 if(p != firstSignificantDigit) {
2015 /* Implicit hexadecimal point? */
2019 /* Calculate the exponent adjustment implicit in the number of
2020 significant digits. */
2021 expAdjustment = dot - firstSignificantDigit;
2022 if(expAdjustment < 0)
2024 expAdjustment = expAdjustment * 4 - 1;
2026 /* Adjust for writing the significand starting at the most
2027 significant nibble. */
2028 expAdjustment += semantics->precision;
2029 expAdjustment -= partsCount * integerPartWidth;
2031 /* Adjust for the given exponent. */
2032 exponent = totalExponent(p, expAdjustment);
2035 return normalize(rounding_mode, lost_fraction);
2039 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2040 unsigned sigPartCount, int exp,
2041 roundingMode rounding_mode)
2043 unsigned int parts, pow5PartCount;
2044 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2045 integerPart pow5Parts[maxPowerOfFiveParts];
2048 isNearest = (rounding_mode == rmNearestTiesToEven
2049 || rounding_mode == rmNearestTiesToAway);
2051 parts = partCountForBits(semantics->precision + 11);
2053 /* Calculate pow(5, abs(exp)). */
2054 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2056 for (;; parts *= 2) {
2057 opStatus sigStatus, powStatus;
2058 unsigned int excessPrecision, truncatedBits;
2060 calcSemantics.precision = parts * integerPartWidth - 1;
2061 excessPrecision = calcSemantics.precision - semantics->precision;
2062 truncatedBits = excessPrecision;
2064 APFloat decSig(calcSemantics, fcZero, sign);
2065 APFloat pow5(calcSemantics, fcZero, false);
2067 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2068 rmNearestTiesToEven);
2069 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2070 rmNearestTiesToEven);
2071 /* Add exp, as 10^n = 5^n * 2^n. */
2072 decSig.exponent += exp;
2074 lostFraction calcLostFraction;
2075 integerPart HUerr, HUdistance, powHUerr;
2078 /* multiplySignificand leaves the precision-th bit set to 1. */
2079 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2080 powHUerr = powStatus != opOK;
2082 calcLostFraction = decSig.divideSignificand(pow5);
2083 /* Denormal numbers have less precision. */
2084 if (decSig.exponent < semantics->minExponent) {
2085 excessPrecision += (semantics->minExponent - decSig.exponent);
2086 truncatedBits = excessPrecision;
2087 if (excessPrecision > calcSemantics.precision)
2088 excessPrecision = calcSemantics.precision;
2090 /* Extra half-ulp lost in reciprocal of exponent. */
2091 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0: 2;
2094 /* Both multiplySignificand and divideSignificand return the
2095 result with the integer bit set. */
2096 assert (APInt::tcExtractBit
2097 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2099 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2101 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2102 excessPrecision, isNearest);
2104 /* Are we guaranteed to round correctly if we truncate? */
2105 if (HUdistance >= HUerr) {
2106 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2107 calcSemantics.precision - excessPrecision,
2109 /* Take the exponent of decSig. If we tcExtract-ed less bits
2110 above we must adjust our exponent to compensate for the
2111 implicit right shift. */
2112 exponent = (decSig.exponent + semantics->precision
2113 - (calcSemantics.precision - excessPrecision));
2114 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2117 return normalize(rounding_mode, calcLostFraction);
2123 APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode)
2128 /* Scan the text. */
2129 interpretDecimal(p, &D);
2131 /* Handle the quick cases. First the case of no significant digits,
2132 i.e. zero, and then exponents that are obviously too large or too
2133 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2134 definitely overflows if
2136 (exp - 1) * L >= maxExponent
2138 and definitely underflows to zero where
2140 (exp + 1) * L <= minExponent - precision
2142 With integer arithmetic the tightest bounds for L are
2144 93/28 < L < 196/59 [ numerator <= 256 ]
2145 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2148 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2151 } else if ((D.normalizedExponent + 1) * 28738
2152 <= 8651 * (semantics->minExponent - (int) semantics->precision)) {
2153 /* Underflow to zero and round. */
2155 fs = normalize(rounding_mode, lfLessThanHalf);
2156 } else if ((D.normalizedExponent - 1) * 42039
2157 >= 12655 * semantics->maxExponent) {
2158 /* Overflow and round. */
2159 fs = handleOverflow(rounding_mode);
2161 integerPart *decSignificand;
2162 unsigned int partCount;
2164 /* A tight upper bound on number of bits required to hold an
2165 N-digit decimal integer is N * 196 / 59. Allocate enough space
2166 to hold the full significand, and an extra part required by
2168 partCount = (D.lastSigDigit - D.firstSigDigit) + 1;
2169 partCount = partCountForBits(1 + 196 * partCount / 59);
2170 decSignificand = new integerPart[partCount + 1];
2173 /* Convert to binary efficiently - we do almost all multiplication
2174 in an integerPart. When this would overflow do we do a single
2175 bignum multiplication, and then revert again to multiplication
2176 in an integerPart. */
2178 integerPart decValue, val, multiplier;
2187 decValue = decDigitValue(*p++);
2189 val = val * 10 + decValue;
2190 /* The maximum number that can be multiplied by ten with any
2191 digit added without overflowing an integerPart. */
2192 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2194 /* Multiply out the current part. */
2195 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2196 partCount, partCount + 1, false);
2198 /* If we used another part (likely but not guaranteed), increase
2200 if (decSignificand[partCount])
2202 } while (p <= D.lastSigDigit);
2204 category = fcNormal;
2205 fs = roundSignificandWithExponent(decSignificand, partCount,
2206 D.exponent, rounding_mode);
2208 delete [] decSignificand;
2215 APFloat::convertFromString(const char *p, roundingMode rounding_mode)
2217 assertArithmeticOK(*semantics);
2219 /* Handle a leading minus sign. */
2225 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
2226 return convertFromHexadecimalString(p + 2, rounding_mode);
2228 return convertFromDecimalString(p, rounding_mode);
2231 /* Write out a hexadecimal representation of the floating point value
2232 to DST, which must be of sufficient size, in the C99 form
2233 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2234 excluding the terminating NUL.
2236 If UPPERCASE, the output is in upper case, otherwise in lower case.
2238 HEXDIGITS digits appear altogether, rounding the value if
2239 necessary. If HEXDIGITS is 0, the minimal precision to display the
2240 number precisely is used instead. If nothing would appear after
2241 the decimal point it is suppressed.
2243 The decimal exponent is always printed and has at least one digit.
2244 Zero values display an exponent of zero. Infinities and NaNs
2245 appear as "infinity" or "nan" respectively.
2247 The above rules are as specified by C99. There is ambiguity about
2248 what the leading hexadecimal digit should be. This implementation
2249 uses whatever is necessary so that the exponent is displayed as
2250 stored. This implies the exponent will fall within the IEEE format
2251 range, and the leading hexadecimal digit will be 0 (for denormals),
2252 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2253 any other digits zero).
2256 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2257 bool upperCase, roundingMode rounding_mode) const
2261 assertArithmeticOK(*semantics);
2269 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2270 dst += sizeof infinityL - 1;
2274 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2275 dst += sizeof NaNU - 1;
2280 *dst++ = upperCase ? 'X': 'x';
2282 if (hexDigits > 1) {
2284 memset (dst, '0', hexDigits - 1);
2285 dst += hexDigits - 1;
2287 *dst++ = upperCase ? 'P': 'p';
2292 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2301 /* Does the hard work of outputting the correctly rounded hexadecimal
2302 form of a normal floating point number with the specified number of
2303 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2304 digits necessary to print the value precisely is output. */
2306 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2308 roundingMode rounding_mode) const
2310 unsigned int count, valueBits, shift, partsCount, outputDigits;
2311 const char *hexDigitChars;
2312 const integerPart *significand;
2317 *dst++ = upperCase ? 'X': 'x';
2320 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2322 significand = significandParts();
2323 partsCount = partCount();
2325 /* +3 because the first digit only uses the single integer bit, so
2326 we have 3 virtual zero most-significant-bits. */
2327 valueBits = semantics->precision + 3;
2328 shift = integerPartWidth - valueBits % integerPartWidth;
2330 /* The natural number of digits required ignoring trailing
2331 insignificant zeroes. */
2332 outputDigits = (valueBits - significandLSB () + 3) / 4;
2334 /* hexDigits of zero means use the required number for the
2335 precision. Otherwise, see if we are truncating. If we are,
2336 find out if we need to round away from zero. */
2338 if (hexDigits < outputDigits) {
2339 /* We are dropping non-zero bits, so need to check how to round.
2340 "bits" is the number of dropped bits. */
2342 lostFraction fraction;
2344 bits = valueBits - hexDigits * 4;
2345 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2346 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2348 outputDigits = hexDigits;
2351 /* Write the digits consecutively, and start writing in the location
2352 of the hexadecimal point. We move the most significant digit
2353 left and add the hexadecimal point later. */
2356 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2358 while (outputDigits && count) {
2361 /* Put the most significant integerPartWidth bits in "part". */
2362 if (--count == partsCount)
2363 part = 0; /* An imaginary higher zero part. */
2365 part = significand[count] << shift;
2368 part |= significand[count - 1] >> (integerPartWidth - shift);
2370 /* Convert as much of "part" to hexdigits as we can. */
2371 unsigned int curDigits = integerPartWidth / 4;
2373 if (curDigits > outputDigits)
2374 curDigits = outputDigits;
2375 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2376 outputDigits -= curDigits;
2382 /* Note that hexDigitChars has a trailing '0'. */
2385 *q = hexDigitChars[hexDigitValue (*q) + 1];
2386 } while (*q == '0');
2389 /* Add trailing zeroes. */
2390 memset (dst, '0', outputDigits);
2391 dst += outputDigits;
2394 /* Move the most significant digit to before the point, and if there
2395 is something after the decimal point add it. This must come
2396 after rounding above. */
2403 /* Finally output the exponent. */
2404 *dst++ = upperCase ? 'P': 'p';
2406 return writeSignedDecimal (dst, exponent);
2409 // For good performance it is desirable for different APFloats
2410 // to produce different integers.
2412 APFloat::getHashValue() const
2414 if (category==fcZero) return sign<<8 | semantics->precision ;
2415 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2416 else if (category==fcNaN) return 1<<10 | semantics->precision;
2418 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2419 const integerPart* p = significandParts();
2420 for (int i=partCount(); i>0; i--, p++)
2421 hash ^= ((uint32_t)*p) ^ (*p)>>32;
2426 // Conversion from APFloat to/from host float/double. It may eventually be
2427 // possible to eliminate these and have everybody deal with APFloats, but that
2428 // will take a while. This approach will not easily extend to long double.
2429 // Current implementation requires integerPartWidth==64, which is correct at
2430 // the moment but could be made more general.
2432 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2433 // the actual IEEE respresentations. We compensate for that here.
2436 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2438 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended);
2439 assert (partCount()==2);
2441 uint64_t myexponent, mysignificand;
2443 if (category==fcNormal) {
2444 myexponent = exponent+16383; //bias
2445 mysignificand = significandParts()[0];
2446 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2447 myexponent = 0; // denormal
2448 } else if (category==fcZero) {
2451 } else if (category==fcInfinity) {
2452 myexponent = 0x7fff;
2453 mysignificand = 0x8000000000000000ULL;
2455 assert(category == fcNaN && "Unknown category");
2456 myexponent = 0x7fff;
2457 mysignificand = significandParts()[0];
2461 words[0] = (((uint64_t)sign & 1) << 63) |
2462 ((myexponent & 0x7fff) << 48) |
2463 ((mysignificand >>16) & 0xffffffffffffLL);
2464 words[1] = mysignificand & 0xffff;
2465 return APInt(80, 2, words);
2469 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2471 assert(semantics == (const llvm::fltSemantics* const)&PPCDoubleDouble);
2472 assert (partCount()==2);
2474 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2476 if (category==fcNormal) {
2477 myexponent = exponent + 1023; //bias
2478 myexponent2 = exponent2 + 1023;
2479 mysignificand = significandParts()[0];
2480 mysignificand2 = significandParts()[1];
2481 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2482 myexponent = 0; // denormal
2483 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2484 myexponent2 = 0; // denormal
2485 } else if (category==fcZero) {
2490 } else if (category==fcInfinity) {
2496 assert(category == fcNaN && "Unknown category");
2498 mysignificand = significandParts()[0];
2499 myexponent2 = exponent2;
2500 mysignificand2 = significandParts()[1];
2504 words[0] = (((uint64_t)sign & 1) << 63) |
2505 ((myexponent & 0x7ff) << 52) |
2506 (mysignificand & 0xfffffffffffffLL);
2507 words[1] = (((uint64_t)sign2 & 1) << 63) |
2508 ((myexponent2 & 0x7ff) << 52) |
2509 (mysignificand2 & 0xfffffffffffffLL);
2510 return APInt(128, 2, words);
2514 APFloat::convertDoubleAPFloatToAPInt() const
2516 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2517 assert (partCount()==1);
2519 uint64_t myexponent, mysignificand;
2521 if (category==fcNormal) {
2522 myexponent = exponent+1023; //bias
2523 mysignificand = *significandParts();
2524 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2525 myexponent = 0; // denormal
2526 } else if (category==fcZero) {
2529 } else if (category==fcInfinity) {
2533 assert(category == fcNaN && "Unknown category!");
2535 mysignificand = *significandParts();
2538 return APInt(64, (((((uint64_t)sign & 1) << 63) |
2539 ((myexponent & 0x7ff) << 52) |
2540 (mysignificand & 0xfffffffffffffLL))));
2544 APFloat::convertFloatAPFloatToAPInt() const
2546 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2547 assert (partCount()==1);
2549 uint32_t myexponent, mysignificand;
2551 if (category==fcNormal) {
2552 myexponent = exponent+127; //bias
2553 mysignificand = *significandParts();
2554 if (myexponent == 1 && !(mysignificand & 0x800000))
2555 myexponent = 0; // denormal
2556 } else if (category==fcZero) {
2559 } else if (category==fcInfinity) {
2563 assert(category == fcNaN && "Unknown category!");
2565 mysignificand = *significandParts();
2568 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2569 (mysignificand & 0x7fffff)));
2572 // This function creates an APInt that is just a bit map of the floating
2573 // point constant as it would appear in memory. It is not a conversion,
2574 // and treating the result as a normal integer is unlikely to be useful.
2577 APFloat::convertToAPInt() const
2579 if (semantics == (const llvm::fltSemantics* const)&IEEEsingle)
2580 return convertFloatAPFloatToAPInt();
2582 if (semantics == (const llvm::fltSemantics* const)&IEEEdouble)
2583 return convertDoubleAPFloatToAPInt();
2585 if (semantics == (const llvm::fltSemantics* const)&PPCDoubleDouble)
2586 return convertPPCDoubleDoubleAPFloatToAPInt();
2588 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended &&
2590 return convertF80LongDoubleAPFloatToAPInt();
2594 APFloat::convertToFloat() const
2596 assert(semantics == (const llvm::fltSemantics* const)&IEEEsingle);
2597 APInt api = convertToAPInt();
2598 return api.bitsToFloat();
2602 APFloat::convertToDouble() const
2604 assert(semantics == (const llvm::fltSemantics* const)&IEEEdouble);
2605 APInt api = convertToAPInt();
2606 return api.bitsToDouble();
2609 /// Integer bit is explicit in this format. Current Intel book does not
2610 /// define meaning of:
2611 /// exponent = all 1's, integer bit not set.
2612 /// exponent = 0, integer bit set. (formerly "psuedodenormals")
2613 /// exponent!=0 nor all 1's, integer bit not set. (formerly "unnormals")
2615 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2617 assert(api.getBitWidth()==80);
2618 uint64_t i1 = api.getRawData()[0];
2619 uint64_t i2 = api.getRawData()[1];
2620 uint64_t myexponent = (i1 >> 48) & 0x7fff;
2621 uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) |
2624 initialize(&APFloat::x87DoubleExtended);
2625 assert(partCount()==2);
2628 if (myexponent==0 && mysignificand==0) {
2629 // exponent, significand meaningless
2631 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2632 // exponent, significand meaningless
2633 category = fcInfinity;
2634 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2635 // exponent meaningless
2637 significandParts()[0] = mysignificand;
2638 significandParts()[1] = 0;
2640 category = fcNormal;
2641 exponent = myexponent - 16383;
2642 significandParts()[0] = mysignificand;
2643 significandParts()[1] = 0;
2644 if (myexponent==0) // denormal
2650 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
2652 assert(api.getBitWidth()==128);
2653 uint64_t i1 = api.getRawData()[0];
2654 uint64_t i2 = api.getRawData()[1];
2655 uint64_t myexponent = (i1 >> 52) & 0x7ff;
2656 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
2657 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
2658 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
2660 initialize(&APFloat::PPCDoubleDouble);
2661 assert(partCount()==2);
2665 if (myexponent==0 && mysignificand==0) {
2666 // exponent, significand meaningless
2667 // exponent2 and significand2 are required to be 0; we don't check
2669 } else if (myexponent==0x7ff && mysignificand==0) {
2670 // exponent, significand meaningless
2671 // exponent2 and significand2 are required to be 0; we don't check
2672 category = fcInfinity;
2673 } else if (myexponent==0x7ff && mysignificand!=0) {
2674 // exponent meaningless. So is the whole second word, but keep it
2677 exponent2 = myexponent2;
2678 significandParts()[0] = mysignificand;
2679 significandParts()[1] = mysignificand2;
2681 category = fcNormal;
2682 // Note there is no category2; the second word is treated as if it is
2683 // fcNormal, although it might be something else considered by itself.
2684 exponent = myexponent - 1023;
2685 exponent2 = myexponent2 - 1023;
2686 significandParts()[0] = mysignificand;
2687 significandParts()[1] = mysignificand2;
2688 if (myexponent==0) // denormal
2691 significandParts()[0] |= 0x10000000000000LL; // integer bit
2695 significandParts()[1] |= 0x10000000000000LL; // integer bit
2700 APFloat::initFromDoubleAPInt(const APInt &api)
2702 assert(api.getBitWidth()==64);
2703 uint64_t i = *api.getRawData();
2704 uint64_t myexponent = (i >> 52) & 0x7ff;
2705 uint64_t mysignificand = i & 0xfffffffffffffLL;
2707 initialize(&APFloat::IEEEdouble);
2708 assert(partCount()==1);
2711 if (myexponent==0 && mysignificand==0) {
2712 // exponent, significand meaningless
2714 } else if (myexponent==0x7ff && mysignificand==0) {
2715 // exponent, significand meaningless
2716 category = fcInfinity;
2717 } else if (myexponent==0x7ff && mysignificand!=0) {
2718 // exponent meaningless
2720 *significandParts() = mysignificand;
2722 category = fcNormal;
2723 exponent = myexponent - 1023;
2724 *significandParts() = mysignificand;
2725 if (myexponent==0) // denormal
2728 *significandParts() |= 0x10000000000000LL; // integer bit
2733 APFloat::initFromFloatAPInt(const APInt & api)
2735 assert(api.getBitWidth()==32);
2736 uint32_t i = (uint32_t)*api.getRawData();
2737 uint32_t myexponent = (i >> 23) & 0xff;
2738 uint32_t mysignificand = i & 0x7fffff;
2740 initialize(&APFloat::IEEEsingle);
2741 assert(partCount()==1);
2744 if (myexponent==0 && mysignificand==0) {
2745 // exponent, significand meaningless
2747 } else if (myexponent==0xff && mysignificand==0) {
2748 // exponent, significand meaningless
2749 category = fcInfinity;
2750 } else if (myexponent==0xff && mysignificand!=0) {
2751 // sign, exponent, significand meaningless
2753 *significandParts() = mysignificand;
2755 category = fcNormal;
2756 exponent = myexponent - 127; //bias
2757 *significandParts() = mysignificand;
2758 if (myexponent==0) // denormal
2761 *significandParts() |= 0x800000; // integer bit
2765 /// Treat api as containing the bits of a floating point number. Currently
2766 /// we infer the floating point type from the size of the APInt. The
2767 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
2768 /// when the size is anything else).
2770 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
2772 if (api.getBitWidth() == 32)
2773 return initFromFloatAPInt(api);
2774 else if (api.getBitWidth()==64)
2775 return initFromDoubleAPInt(api);
2776 else if (api.getBitWidth()==80)
2777 return initFromF80LongDoubleAPInt(api);
2778 else if (api.getBitWidth()==128 && !isIEEE)
2779 return initFromPPCDoubleDoubleAPInt(api);
2784 APFloat::APFloat(const APInt& api, bool isIEEE)
2786 initFromAPInt(api, isIEEE);
2789 APFloat::APFloat(float f)
2791 APInt api = APInt(32, 0);
2792 initFromAPInt(api.floatToBits(f));
2795 APFloat::APFloat(double d)
2797 APInt api = APInt(64, 0);
2798 initFromAPInt(api.doubleToBits(d));