1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Neil Booth and is distributed under the
6 // University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
17 #include "llvm/ADT/APFloat.h"
18 #include "llvm/Support/MathExtras.h"
22 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
24 /* Assumed in hexadecimal significand parsing, and conversion to
25 hexadecimal strings. */
26 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
30 /* Represents floating point arithmetic semantics. */
32 /* The largest E such that 2^E is representable; this matches the
33 definition of IEEE 754. */
34 exponent_t maxExponent;
36 /* The smallest E such that 2^E is a normalized number; this
37 matches the definition of IEEE 754. */
38 exponent_t minExponent;
40 /* Number of bits in the significand. This includes the integer
42 unsigned int precision;
45 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 };
46 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 };
47 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 };
48 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 };
49 const fltSemantics APFloat::Bogus = { 0, 0, 0 };
51 // The PowerPC format consists of two doubles. It does not map cleanly
52 // onto the usual format above. For now only storage of constants of
53 // this type is supported, no arithmetic.
54 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106 };
56 /* A tight upper bound on number of parts required to hold the value
59 power * 1024 / (441 * integerPartWidth) + 1
61 However, whilst the result may require only this many parts,
62 because we are multiplying two values to get it, the
63 multiplication may require an extra part with the excess part
64 being zero (consider the trivial case of 1 * 1, tcFullMultiply
65 requires two parts to hold the single-part result). So we add an
66 extra one to guarantee enough space whilst multiplying. */
67 const unsigned int maxExponent = 16383;
68 const unsigned int maxPrecision = 113;
69 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
70 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 1024)
71 / (441 * integerPartWidth));
74 /* Put a bunch of private, handy routines in an anonymous namespace. */
78 partCountForBits(unsigned int bits)
80 return ((bits) + integerPartWidth - 1) / integerPartWidth;
84 digitValue(unsigned int c)
96 hexDigitValue(unsigned int c)
115 /* This is ugly and needs cleaning up, but I don't immediately see
116 how whilst remaining safe. */
118 totalExponent(const char *p, int exponentAdjustment)
120 integerPart unsignedExponent;
121 bool negative, overflow;
124 /* Move past the exponent letter and sign to the digits. */
126 negative = *p == '-';
127 if(*p == '-' || *p == '+')
130 unsignedExponent = 0;
135 value = digitValue(*p);
140 unsignedExponent = unsignedExponent * 10 + value;
141 if(unsignedExponent > 65535)
145 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
149 exponent = unsignedExponent;
151 exponent = -exponent;
152 exponent += exponentAdjustment;
153 if(exponent > 65535 || exponent < -65536)
158 exponent = negative ? -65536: 65535;
164 skipLeadingZeroesAndAnyDot(const char *p, const char **dot)
179 /* Return the trailing fraction of a hexadecimal number.
180 DIGITVALUE is the first hex digit of the fraction, P points to
183 trailingHexadecimalFraction(const char *p, unsigned int digitValue)
185 unsigned int hexDigit;
187 /* If the first trailing digit isn't 0 or 8 we can work out the
188 fraction immediately. */
190 return lfMoreThanHalf;
191 else if(digitValue < 8 && digitValue > 0)
192 return lfLessThanHalf;
194 /* Otherwise we need to find the first non-zero digit. */
198 hexDigit = hexDigitValue(*p);
200 /* If we ran off the end it is exactly zero or one-half, otherwise
203 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
205 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
208 /* Return the fraction lost were a bignum truncated losing the least
209 significant BITS bits. */
211 lostFractionThroughTruncation(const integerPart *parts,
212 unsigned int partCount,
217 lsb = APInt::tcLSB(parts, partCount);
219 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
221 return lfExactlyZero;
223 return lfExactlyHalf;
224 if(bits <= partCount * integerPartWidth
225 && APInt::tcExtractBit(parts, bits - 1))
226 return lfMoreThanHalf;
228 return lfLessThanHalf;
231 /* Shift DST right BITS bits noting lost fraction. */
233 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
235 lostFraction lost_fraction;
237 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
239 APInt::tcShiftRight(dst, parts, bits);
241 return lost_fraction;
244 /* Combine the effect of two lost fractions. */
246 combineLostFractions(lostFraction moreSignificant,
247 lostFraction lessSignificant)
249 if(lessSignificant != lfExactlyZero) {
250 if(moreSignificant == lfExactlyZero)
251 moreSignificant = lfLessThanHalf;
252 else if(moreSignificant == lfExactlyHalf)
253 moreSignificant = lfMoreThanHalf;
256 return moreSignificant;
259 /* The error from the true value, in half-ulps, on multiplying two
260 floating point numbers, which differ from the value they
261 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
262 than the returned value.
264 See "How to Read Floating Point Numbers Accurately" by William D
267 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
269 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
271 if (HUerr1 + HUerr2 == 0)
272 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
274 return inexactMultiply + 2 * (HUerr1 + HUerr2);
277 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
278 when the least significant BITS are truncated. BITS cannot be
281 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
283 unsigned int count, partBits;
284 integerPart part, boundary;
289 count = bits / integerPartWidth;
290 partBits = bits % integerPartWidth + 1;
292 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
295 boundary = (integerPart) 1 << (partBits - 1);
300 if (part - boundary <= boundary - part)
301 return part - boundary;
303 return boundary - part;
306 if (part == boundary) {
309 return ~(integerPart) 0; /* A lot. */
312 } else if (part == boundary - 1) {
315 return ~(integerPart) 0; /* A lot. */
320 return ~(integerPart) 0; /* A lot. */
323 /* Place pow(5, power) in DST, and return the number of parts used.
324 DST must be at least one part larger than size of the answer. */
326 powerOf5(integerPart *dst, unsigned int power)
328 static integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
330 static integerPart pow5s[maxPowerOfFiveParts * 2 + 5] = { 78125 * 5 };
331 static unsigned int partsCount[16] = { 1 };
333 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
336 assert(power <= maxExponent);
341 *p1 = firstEightPowers[power & 7];
347 for (unsigned int n = 0; power; power >>= 1, n++) {
352 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
354 pc = partsCount[n - 1];
355 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
357 if (pow5[pc - 1] == 0)
365 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
367 if (p2[result - 1] == 0)
370 /* Now result is in p1 with partsCount parts and p2 is scratch
372 tmp = p1, p1 = p2, p2 = tmp;
379 APInt::tcAssign(dst, p1, result);
384 /* Zero at the end to avoid modular arithmetic when adding one; used
385 when rounding up during hexadecimal output. */
386 static const char hexDigitsLower[] = "0123456789abcdef0";
387 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
388 static const char infinityL[] = "infinity";
389 static const char infinityU[] = "INFINITY";
390 static const char NaNL[] = "nan";
391 static const char NaNU[] = "NAN";
393 /* Write out an integerPart in hexadecimal, starting with the most
394 significant nibble. Write out exactly COUNT hexdigits, return
397 partAsHex (char *dst, integerPart part, unsigned int count,
398 const char *hexDigitChars)
400 unsigned int result = count;
402 assert (count != 0 && count <= integerPartWidth / 4);
404 part >>= (integerPartWidth - 4 * count);
406 dst[count] = hexDigitChars[part & 0xf];
413 /* Write out an unsigned decimal integer. */
415 writeUnsignedDecimal (char *dst, unsigned int n)
431 /* Write out a signed decimal integer. */
433 writeSignedDecimal (char *dst, int value)
437 dst = writeUnsignedDecimal(dst, -(unsigned) value);
439 dst = writeUnsignedDecimal(dst, value);
447 APFloat::initialize(const fltSemantics *ourSemantics)
451 semantics = ourSemantics;
454 significand.parts = new integerPart[count];
458 APFloat::freeSignificand()
461 delete [] significand.parts;
465 APFloat::assign(const APFloat &rhs)
467 assert(semantics == rhs.semantics);
470 category = rhs.category;
471 exponent = rhs.exponent;
473 exponent2 = rhs.exponent2;
474 if(category == fcNormal || category == fcNaN)
475 copySignificand(rhs);
479 APFloat::copySignificand(const APFloat &rhs)
481 assert(category == fcNormal || category == fcNaN);
482 assert(rhs.partCount() >= partCount());
484 APInt::tcAssign(significandParts(), rhs.significandParts(),
489 APFloat::operator=(const APFloat &rhs)
492 if(semantics != rhs.semantics) {
494 initialize(rhs.semantics);
503 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
506 if (semantics != rhs.semantics ||
507 category != rhs.category ||
510 if (semantics==(const llvm::fltSemantics* const)&PPCDoubleDouble &&
513 if (category==fcZero || category==fcInfinity)
515 else if (category==fcNormal && exponent!=rhs.exponent)
517 else if (semantics==(const llvm::fltSemantics* const)&PPCDoubleDouble &&
518 exponent2!=rhs.exponent2)
522 const integerPart* p=significandParts();
523 const integerPart* q=rhs.significandParts();
524 for (; i>0; i--, p++, q++) {
532 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
534 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
535 "Compile-time arithmetic on PPC long double not supported yet");
536 initialize(&ourSemantics);
539 exponent = ourSemantics.precision - 1;
540 significandParts()[0] = value;
541 normalize(rmNearestTiesToEven, lfExactlyZero);
544 APFloat::APFloat(const fltSemantics &ourSemantics,
545 fltCategory ourCategory, bool negative)
547 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
548 "Compile-time arithmetic on PPC long double not supported yet");
549 initialize(&ourSemantics);
550 category = ourCategory;
552 if(category == fcNormal)
556 APFloat::APFloat(const fltSemantics &ourSemantics, const char *text)
558 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
559 "Compile-time arithmetic on PPC long double not supported yet");
560 initialize(&ourSemantics);
561 convertFromString(text, rmNearestTiesToEven);
564 APFloat::APFloat(const APFloat &rhs)
566 initialize(rhs.semantics);
576 APFloat::partCount() const
578 return partCountForBits(semantics->precision + 1);
582 APFloat::semanticsPrecision(const fltSemantics &semantics)
584 return semantics.precision;
588 APFloat::significandParts() const
590 return const_cast<APFloat *>(this)->significandParts();
594 APFloat::significandParts()
596 assert(category == fcNormal || category == fcNaN);
599 return significand.parts;
601 return &significand.part;
605 APFloat::zeroSignificand()
608 APInt::tcSet(significandParts(), 0, partCount());
611 /* Increment an fcNormal floating point number's significand. */
613 APFloat::incrementSignificand()
617 carry = APInt::tcIncrement(significandParts(), partCount());
619 /* Our callers should never cause us to overflow. */
623 /* Add the significand of the RHS. Returns the carry flag. */
625 APFloat::addSignificand(const APFloat &rhs)
629 parts = significandParts();
631 assert(semantics == rhs.semantics);
632 assert(exponent == rhs.exponent);
634 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
637 /* Subtract the significand of the RHS with a borrow flag. Returns
640 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
644 parts = significandParts();
646 assert(semantics == rhs.semantics);
647 assert(exponent == rhs.exponent);
649 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
653 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
654 on to the full-precision result of the multiplication. Returns the
657 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
659 unsigned int omsb; // One, not zero, based MSB.
660 unsigned int partsCount, newPartsCount, precision;
661 integerPart *lhsSignificand;
662 integerPart scratch[4];
663 integerPart *fullSignificand;
664 lostFraction lost_fraction;
666 assert(semantics == rhs.semantics);
668 precision = semantics->precision;
669 newPartsCount = partCountForBits(precision * 2);
671 if(newPartsCount > 4)
672 fullSignificand = new integerPart[newPartsCount];
674 fullSignificand = scratch;
676 lhsSignificand = significandParts();
677 partsCount = partCount();
679 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
680 rhs.significandParts(), partsCount, partsCount);
682 lost_fraction = lfExactlyZero;
683 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
684 exponent += rhs.exponent;
687 Significand savedSignificand = significand;
688 const fltSemantics *savedSemantics = semantics;
689 fltSemantics extendedSemantics;
691 unsigned int extendedPrecision;
693 /* Normalize our MSB. */
694 extendedPrecision = precision + precision - 1;
695 if(omsb != extendedPrecision)
697 APInt::tcShiftLeft(fullSignificand, newPartsCount,
698 extendedPrecision - omsb);
699 exponent -= extendedPrecision - omsb;
702 /* Create new semantics. */
703 extendedSemantics = *semantics;
704 extendedSemantics.precision = extendedPrecision;
706 if(newPartsCount == 1)
707 significand.part = fullSignificand[0];
709 significand.parts = fullSignificand;
710 semantics = &extendedSemantics;
712 APFloat extendedAddend(*addend);
713 status = extendedAddend.convert(extendedSemantics, rmTowardZero);
714 assert(status == opOK);
715 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
717 /* Restore our state. */
718 if(newPartsCount == 1)
719 fullSignificand[0] = significand.part;
720 significand = savedSignificand;
721 semantics = savedSemantics;
723 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
726 exponent -= (precision - 1);
728 if(omsb > precision) {
729 unsigned int bits, significantParts;
732 bits = omsb - precision;
733 significantParts = partCountForBits(omsb);
734 lf = shiftRight(fullSignificand, significantParts, bits);
735 lost_fraction = combineLostFractions(lf, lost_fraction);
739 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
741 if(newPartsCount > 4)
742 delete [] fullSignificand;
744 return lost_fraction;
747 /* Multiply the significands of LHS and RHS to DST. */
749 APFloat::divideSignificand(const APFloat &rhs)
751 unsigned int bit, i, partsCount;
752 const integerPart *rhsSignificand;
753 integerPart *lhsSignificand, *dividend, *divisor;
754 integerPart scratch[4];
755 lostFraction lost_fraction;
757 assert(semantics == rhs.semantics);
759 lhsSignificand = significandParts();
760 rhsSignificand = rhs.significandParts();
761 partsCount = partCount();
764 dividend = new integerPart[partsCount * 2];
768 divisor = dividend + partsCount;
770 /* Copy the dividend and divisor as they will be modified in-place. */
771 for(i = 0; i < partsCount; i++) {
772 dividend[i] = lhsSignificand[i];
773 divisor[i] = rhsSignificand[i];
774 lhsSignificand[i] = 0;
777 exponent -= rhs.exponent;
779 unsigned int precision = semantics->precision;
781 /* Normalize the divisor. */
782 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
785 APInt::tcShiftLeft(divisor, partsCount, bit);
788 /* Normalize the dividend. */
789 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
792 APInt::tcShiftLeft(dividend, partsCount, bit);
795 /* Ensure the dividend >= divisor initially for the loop below.
796 Incidentally, this means that the division loop below is
797 guaranteed to set the integer bit to one. */
798 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
800 APInt::tcShiftLeft(dividend, partsCount, 1);
801 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
805 for(bit = precision; bit; bit -= 1) {
806 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
807 APInt::tcSubtract(dividend, divisor, 0, partsCount);
808 APInt::tcSetBit(lhsSignificand, bit - 1);
811 APInt::tcShiftLeft(dividend, partsCount, 1);
814 /* Figure out the lost fraction. */
815 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
818 lost_fraction = lfMoreThanHalf;
820 lost_fraction = lfExactlyHalf;
821 else if(APInt::tcIsZero(dividend, partsCount))
822 lost_fraction = lfExactlyZero;
824 lost_fraction = lfLessThanHalf;
829 return lost_fraction;
833 APFloat::significandMSB() const
835 return APInt::tcMSB(significandParts(), partCount());
839 APFloat::significandLSB() const
841 return APInt::tcLSB(significandParts(), partCount());
844 /* Note that a zero result is NOT normalized to fcZero. */
846 APFloat::shiftSignificandRight(unsigned int bits)
848 /* Our exponent should not overflow. */
849 assert((exponent_t) (exponent + bits) >= exponent);
853 return shiftRight(significandParts(), partCount(), bits);
856 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
858 APFloat::shiftSignificandLeft(unsigned int bits)
860 assert(bits < semantics->precision);
863 unsigned int partsCount = partCount();
865 APInt::tcShiftLeft(significandParts(), partsCount, bits);
868 assert(!APInt::tcIsZero(significandParts(), partsCount));
873 APFloat::compareAbsoluteValue(const APFloat &rhs) const
877 assert(semantics == rhs.semantics);
878 assert(category == fcNormal);
879 assert(rhs.category == fcNormal);
881 compare = exponent - rhs.exponent;
883 /* If exponents are equal, do an unsigned bignum comparison of the
886 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
890 return cmpGreaterThan;
897 /* Handle overflow. Sign is preserved. We either become infinity or
898 the largest finite number. */
900 APFloat::handleOverflow(roundingMode rounding_mode)
903 if(rounding_mode == rmNearestTiesToEven
904 || rounding_mode == rmNearestTiesToAway
905 || (rounding_mode == rmTowardPositive && !sign)
906 || (rounding_mode == rmTowardNegative && sign))
908 category = fcInfinity;
909 return (opStatus) (opOverflow | opInexact);
912 /* Otherwise we become the largest finite number. */
914 exponent = semantics->maxExponent;
915 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
916 semantics->precision);
921 /* Returns TRUE if, when truncating the current number, with BIT the
922 new LSB, with the given lost fraction and rounding mode, the result
923 would need to be rounded away from zero (i.e., by increasing the
924 signficand). This routine must work for fcZero of both signs, and
927 APFloat::roundAwayFromZero(roundingMode rounding_mode,
928 lostFraction lost_fraction,
929 unsigned int bit) const
931 /* NaNs and infinities should not have lost fractions. */
932 assert(category == fcNormal || category == fcZero);
934 /* Current callers never pass this so we don't handle it. */
935 assert(lost_fraction != lfExactlyZero);
937 switch(rounding_mode) {
941 case rmNearestTiesToAway:
942 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
944 case rmNearestTiesToEven:
945 if(lost_fraction == lfMoreThanHalf)
948 /* Our zeroes don't have a significand to test. */
949 if(lost_fraction == lfExactlyHalf && category != fcZero)
950 return APInt::tcExtractBit(significandParts(), bit);
957 case rmTowardPositive:
958 return sign == false;
960 case rmTowardNegative:
966 APFloat::normalize(roundingMode rounding_mode,
967 lostFraction lost_fraction)
969 unsigned int omsb; /* One, not zero, based MSB. */
972 if(category != fcNormal)
975 /* Before rounding normalize the exponent of fcNormal numbers. */
976 omsb = significandMSB() + 1;
979 /* OMSB is numbered from 1. We want to place it in the integer
980 bit numbered PRECISON if possible, with a compensating change in
982 exponentChange = omsb - semantics->precision;
984 /* If the resulting exponent is too high, overflow according to
985 the rounding mode. */
986 if(exponent + exponentChange > semantics->maxExponent)
987 return handleOverflow(rounding_mode);
989 /* Subnormal numbers have exponent minExponent, and their MSB
990 is forced based on that. */
991 if(exponent + exponentChange < semantics->minExponent)
992 exponentChange = semantics->minExponent - exponent;
994 /* Shifting left is easy as we don't lose precision. */
995 if(exponentChange < 0) {
996 assert(lost_fraction == lfExactlyZero);
998 shiftSignificandLeft(-exponentChange);
1003 if(exponentChange > 0) {
1006 /* Shift right and capture any new lost fraction. */
1007 lf = shiftSignificandRight(exponentChange);
1009 lost_fraction = combineLostFractions(lf, lost_fraction);
1011 /* Keep OMSB up-to-date. */
1012 if(omsb > (unsigned) exponentChange)
1013 omsb -= exponentChange;
1019 /* Now round the number according to rounding_mode given the lost
1022 /* As specified in IEEE 754, since we do not trap we do not report
1023 underflow for exact results. */
1024 if(lost_fraction == lfExactlyZero) {
1025 /* Canonicalize zeroes. */
1032 /* Increment the significand if we're rounding away from zero. */
1033 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1035 exponent = semantics->minExponent;
1037 incrementSignificand();
1038 omsb = significandMSB() + 1;
1040 /* Did the significand increment overflow? */
1041 if(omsb == (unsigned) semantics->precision + 1) {
1042 /* Renormalize by incrementing the exponent and shifting our
1043 significand right one. However if we already have the
1044 maximum exponent we overflow to infinity. */
1045 if(exponent == semantics->maxExponent) {
1046 category = fcInfinity;
1048 return (opStatus) (opOverflow | opInexact);
1051 shiftSignificandRight(1);
1057 /* The normal case - we were and are not denormal, and any
1058 significand increment above didn't overflow. */
1059 if(omsb == semantics->precision)
1062 /* We have a non-zero denormal. */
1063 assert(omsb < semantics->precision);
1065 /* Canonicalize zeroes. */
1069 /* The fcZero case is a denormal that underflowed to zero. */
1070 return (opStatus) (opUnderflow | opInexact);
1074 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1076 switch(convolve(category, rhs.category)) {
1080 case convolve(fcNaN, fcZero):
1081 case convolve(fcNaN, fcNormal):
1082 case convolve(fcNaN, fcInfinity):
1083 case convolve(fcNaN, fcNaN):
1084 case convolve(fcNormal, fcZero):
1085 case convolve(fcInfinity, fcNormal):
1086 case convolve(fcInfinity, fcZero):
1089 case convolve(fcZero, fcNaN):
1090 case convolve(fcNormal, fcNaN):
1091 case convolve(fcInfinity, fcNaN):
1093 copySignificand(rhs);
1096 case convolve(fcNormal, fcInfinity):
1097 case convolve(fcZero, fcInfinity):
1098 category = fcInfinity;
1099 sign = rhs.sign ^ subtract;
1102 case convolve(fcZero, fcNormal):
1104 sign = rhs.sign ^ subtract;
1107 case convolve(fcZero, fcZero):
1108 /* Sign depends on rounding mode; handled by caller. */
1111 case convolve(fcInfinity, fcInfinity):
1112 /* Differently signed infinities can only be validly
1114 if(sign ^ rhs.sign != subtract) {
1116 // Arbitrary but deterministic value for significand
1117 APInt::tcSet(significandParts(), ~0U, partCount());
1123 case convolve(fcNormal, fcNormal):
1128 /* Add or subtract two normal numbers. */
1130 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1133 lostFraction lost_fraction;
1136 /* Determine if the operation on the absolute values is effectively
1137 an addition or subtraction. */
1138 subtract ^= (sign ^ rhs.sign);
1140 /* Are we bigger exponent-wise than the RHS? */
1141 bits = exponent - rhs.exponent;
1143 /* Subtraction is more subtle than one might naively expect. */
1145 APFloat temp_rhs(rhs);
1149 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1150 lost_fraction = lfExactlyZero;
1151 } else if (bits > 0) {
1152 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1153 shiftSignificandLeft(1);
1156 lost_fraction = shiftSignificandRight(-bits - 1);
1157 temp_rhs.shiftSignificandLeft(1);
1162 carry = temp_rhs.subtractSignificand
1163 (*this, lost_fraction != lfExactlyZero);
1164 copySignificand(temp_rhs);
1167 carry = subtractSignificand
1168 (temp_rhs, lost_fraction != lfExactlyZero);
1171 /* Invert the lost fraction - it was on the RHS and
1173 if(lost_fraction == lfLessThanHalf)
1174 lost_fraction = lfMoreThanHalf;
1175 else if(lost_fraction == lfMoreThanHalf)
1176 lost_fraction = lfLessThanHalf;
1178 /* The code above is intended to ensure that no borrow is
1183 APFloat temp_rhs(rhs);
1185 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1186 carry = addSignificand(temp_rhs);
1188 lost_fraction = shiftSignificandRight(-bits);
1189 carry = addSignificand(rhs);
1192 /* We have a guard bit; generating a carry cannot happen. */
1196 return lost_fraction;
1200 APFloat::multiplySpecials(const APFloat &rhs)
1202 switch(convolve(category, rhs.category)) {
1206 case convolve(fcNaN, fcZero):
1207 case convolve(fcNaN, fcNormal):
1208 case convolve(fcNaN, fcInfinity):
1209 case convolve(fcNaN, fcNaN):
1212 case convolve(fcZero, fcNaN):
1213 case convolve(fcNormal, fcNaN):
1214 case convolve(fcInfinity, fcNaN):
1216 copySignificand(rhs);
1219 case convolve(fcNormal, fcInfinity):
1220 case convolve(fcInfinity, fcNormal):
1221 case convolve(fcInfinity, fcInfinity):
1222 category = fcInfinity;
1225 case convolve(fcZero, fcNormal):
1226 case convolve(fcNormal, fcZero):
1227 case convolve(fcZero, fcZero):
1231 case convolve(fcZero, fcInfinity):
1232 case convolve(fcInfinity, fcZero):
1234 // Arbitrary but deterministic value for significand
1235 APInt::tcSet(significandParts(), ~0U, partCount());
1238 case convolve(fcNormal, fcNormal):
1244 APFloat::divideSpecials(const APFloat &rhs)
1246 switch(convolve(category, rhs.category)) {
1250 case convolve(fcNaN, fcZero):
1251 case convolve(fcNaN, fcNormal):
1252 case convolve(fcNaN, fcInfinity):
1253 case convolve(fcNaN, fcNaN):
1254 case convolve(fcInfinity, fcZero):
1255 case convolve(fcInfinity, fcNormal):
1256 case convolve(fcZero, fcInfinity):
1257 case convolve(fcZero, fcNormal):
1260 case convolve(fcZero, fcNaN):
1261 case convolve(fcNormal, fcNaN):
1262 case convolve(fcInfinity, fcNaN):
1264 copySignificand(rhs);
1267 case convolve(fcNormal, fcInfinity):
1271 case convolve(fcNormal, fcZero):
1272 category = fcInfinity;
1275 case convolve(fcInfinity, fcInfinity):
1276 case convolve(fcZero, fcZero):
1278 // Arbitrary but deterministic value for significand
1279 APInt::tcSet(significandParts(), ~0U, partCount());
1282 case convolve(fcNormal, fcNormal):
1289 APFloat::changeSign()
1291 /* Look mummy, this one's easy. */
1296 APFloat::clearSign()
1298 /* So is this one. */
1303 APFloat::copySign(const APFloat &rhs)
1309 /* Normalized addition or subtraction. */
1311 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1316 fs = addOrSubtractSpecials(rhs, subtract);
1318 /* This return code means it was not a simple case. */
1319 if(fs == opDivByZero) {
1320 lostFraction lost_fraction;
1322 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1323 fs = normalize(rounding_mode, lost_fraction);
1325 /* Can only be zero if we lost no fraction. */
1326 assert(category != fcZero || lost_fraction == lfExactlyZero);
1329 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1330 positive zero unless rounding to minus infinity, except that
1331 adding two like-signed zeroes gives that zero. */
1332 if(category == fcZero) {
1333 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1334 sign = (rounding_mode == rmTowardNegative);
1340 /* Normalized addition. */
1342 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1344 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1345 "Compile-time arithmetic on PPC long double not supported yet");
1346 return addOrSubtract(rhs, rounding_mode, false);
1349 /* Normalized subtraction. */
1351 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1353 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1354 "Compile-time arithmetic on PPC long double not supported yet");
1355 return addOrSubtract(rhs, rounding_mode, true);
1358 /* Normalized multiply. */
1360 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1362 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1363 "Compile-time arithmetic on PPC long double not supported yet");
1367 fs = multiplySpecials(rhs);
1369 if(category == fcNormal) {
1370 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1371 fs = normalize(rounding_mode, lost_fraction);
1372 if(lost_fraction != lfExactlyZero)
1373 fs = (opStatus) (fs | opInexact);
1379 /* Normalized divide. */
1381 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1383 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1384 "Compile-time arithmetic on PPC long double not supported yet");
1388 fs = divideSpecials(rhs);
1390 if(category == fcNormal) {
1391 lostFraction lost_fraction = divideSignificand(rhs);
1392 fs = normalize(rounding_mode, lost_fraction);
1393 if(lost_fraction != lfExactlyZero)
1394 fs = (opStatus) (fs | opInexact);
1400 /* Normalized remainder. This is not currently doing TRT. */
1402 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1404 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1405 "Compile-time arithmetic on PPC long double not supported yet");
1408 unsigned int origSign = sign;
1409 fs = V.divide(rhs, rmNearestTiesToEven);
1410 if (fs == opDivByZero)
1413 int parts = partCount();
1414 integerPart *x = new integerPart[parts];
1415 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1416 rmNearestTiesToEven);
1417 if (fs==opInvalidOp)
1420 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1421 rmNearestTiesToEven);
1422 assert(fs==opOK); // should always work
1424 fs = V.multiply(rhs, rounding_mode);
1425 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1427 fs = subtract(V, rounding_mode);
1428 assert(fs==opOK || fs==opInexact); // likewise
1431 sign = origSign; // IEEE754 requires this
1436 /* Normalized fused-multiply-add. */
1438 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1439 const APFloat &addend,
1440 roundingMode rounding_mode)
1442 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1443 "Compile-time arithmetic on PPC long double not supported yet");
1446 /* Post-multiplication sign, before addition. */
1447 sign ^= multiplicand.sign;
1449 /* If and only if all arguments are normal do we need to do an
1450 extended-precision calculation. */
1451 if(category == fcNormal
1452 && multiplicand.category == fcNormal
1453 && addend.category == fcNormal) {
1454 lostFraction lost_fraction;
1456 lost_fraction = multiplySignificand(multiplicand, &addend);
1457 fs = normalize(rounding_mode, lost_fraction);
1458 if(lost_fraction != lfExactlyZero)
1459 fs = (opStatus) (fs | opInexact);
1461 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1462 positive zero unless rounding to minus infinity, except that
1463 adding two like-signed zeroes gives that zero. */
1464 if(category == fcZero && sign != addend.sign)
1465 sign = (rounding_mode == rmTowardNegative);
1467 fs = multiplySpecials(multiplicand);
1469 /* FS can only be opOK or opInvalidOp. There is no more work
1470 to do in the latter case. The IEEE-754R standard says it is
1471 implementation-defined in this case whether, if ADDEND is a
1472 quiet NaN, we raise invalid op; this implementation does so.
1474 If we need to do the addition we can do so with normal
1477 fs = addOrSubtract(addend, rounding_mode, false);
1483 /* Comparison requires normalized numbers. */
1485 APFloat::compare(const APFloat &rhs) const
1487 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1488 "Compile-time arithmetic on PPC long double not supported yet");
1491 assert(semantics == rhs.semantics);
1493 switch(convolve(category, rhs.category)) {
1497 case convolve(fcNaN, fcZero):
1498 case convolve(fcNaN, fcNormal):
1499 case convolve(fcNaN, fcInfinity):
1500 case convolve(fcNaN, fcNaN):
1501 case convolve(fcZero, fcNaN):
1502 case convolve(fcNormal, fcNaN):
1503 case convolve(fcInfinity, fcNaN):
1504 return cmpUnordered;
1506 case convolve(fcInfinity, fcNormal):
1507 case convolve(fcInfinity, fcZero):
1508 case convolve(fcNormal, fcZero):
1512 return cmpGreaterThan;
1514 case convolve(fcNormal, fcInfinity):
1515 case convolve(fcZero, fcInfinity):
1516 case convolve(fcZero, fcNormal):
1518 return cmpGreaterThan;
1522 case convolve(fcInfinity, fcInfinity):
1523 if(sign == rhs.sign)
1528 return cmpGreaterThan;
1530 case convolve(fcZero, fcZero):
1533 case convolve(fcNormal, fcNormal):
1537 /* Two normal numbers. Do they have the same sign? */
1538 if(sign != rhs.sign) {
1540 result = cmpLessThan;
1542 result = cmpGreaterThan;
1544 /* Compare absolute values; invert result if negative. */
1545 result = compareAbsoluteValue(rhs);
1548 if(result == cmpLessThan)
1549 result = cmpGreaterThan;
1550 else if(result == cmpGreaterThan)
1551 result = cmpLessThan;
1559 APFloat::convert(const fltSemantics &toSemantics,
1560 roundingMode rounding_mode)
1562 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1563 "Compile-time arithmetic on PPC long double not supported yet");
1564 lostFraction lostFraction;
1565 unsigned int newPartCount, oldPartCount;
1568 lostFraction = lfExactlyZero;
1569 newPartCount = partCountForBits(toSemantics.precision + 1);
1570 oldPartCount = partCount();
1572 /* Handle storage complications. If our new form is wider,
1573 re-allocate our bit pattern into wider storage. If it is
1574 narrower, we ignore the excess parts, but if narrowing to a
1575 single part we need to free the old storage.
1576 Be careful not to reference significandParts for zeroes
1577 and infinities, since it aborts. */
1578 if (newPartCount > oldPartCount) {
1579 integerPart *newParts;
1580 newParts = new integerPart[newPartCount];
1581 APInt::tcSet(newParts, 0, newPartCount);
1582 if (category==fcNormal || category==fcNaN)
1583 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1585 significand.parts = newParts;
1586 } else if (newPartCount < oldPartCount) {
1587 /* Capture any lost fraction through truncation of parts so we get
1588 correct rounding whilst normalizing. */
1589 if (category==fcNormal)
1590 lostFraction = lostFractionThroughTruncation
1591 (significandParts(), oldPartCount, toSemantics.precision);
1592 if (newPartCount == 1) {
1593 integerPart newPart = 0;
1594 if (category==fcNormal || category==fcNaN)
1595 newPart = significandParts()[0];
1597 significand.part = newPart;
1601 if(category == fcNormal) {
1602 /* Re-interpret our bit-pattern. */
1603 exponent += toSemantics.precision - semantics->precision;
1604 semantics = &toSemantics;
1605 fs = normalize(rounding_mode, lostFraction);
1606 } else if (category == fcNaN) {
1607 int shift = toSemantics.precision - semantics->precision;
1608 // No normalization here, just truncate
1610 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1612 APInt::tcShiftRight(significandParts(), newPartCount, -shift);
1613 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1614 // does not give you back the same bits. This is dubious, and we
1615 // don't currently do it. You're really supposed to get
1616 // an invalid operation signal at runtime, but nobody does that.
1617 semantics = &toSemantics;
1620 semantics = &toSemantics;
1627 /* Convert a floating point number to an integer according to the
1628 rounding mode. If the rounded integer value is out of range this
1629 returns an invalid operation exception. If the rounded value is in
1630 range but the floating point number is not the exact integer, the C
1631 standard doesn't require an inexact exception to be raised. IEEE
1632 854 does require it so we do that.
1634 Note that for conversions to integer type the C standard requires
1635 round-to-zero to always be used. */
1637 APFloat::convertToInteger(integerPart *parts, unsigned int width,
1639 roundingMode rounding_mode) const
1641 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1642 "Compile-time arithmetic on PPC long double not supported yet");
1643 lostFraction lost_fraction;
1644 unsigned int msb, partsCount;
1647 partsCount = partCountForBits(width);
1649 /* Handle the three special cases first. We produce
1650 a deterministic result even for the Invalid cases. */
1651 if (category == fcNaN) {
1652 // Neither sign nor isSigned affects this.
1653 APInt::tcSet(parts, 0, partsCount);
1656 if (category == fcInfinity) {
1657 if (!sign && isSigned)
1658 APInt::tcSetLeastSignificantBits(parts, partsCount, width-1);
1659 else if (!sign && !isSigned)
1660 APInt::tcSetLeastSignificantBits(parts, partsCount, width);
1661 else if (sign && isSigned) {
1662 APInt::tcSetLeastSignificantBits(parts, partsCount, 1);
1663 APInt::tcShiftLeft(parts, partsCount, width-1);
1664 } else // sign && !isSigned
1665 APInt::tcSet(parts, 0, partsCount);
1668 if (category == fcZero) {
1669 APInt::tcSet(parts, 0, partsCount);
1673 /* Shift the bit pattern so the fraction is lost. */
1676 bits = (int) semantics->precision - 1 - exponent;
1679 lost_fraction = tmp.shiftSignificandRight(bits);
1681 if (-bits >= semantics->precision) {
1682 // Unrepresentably large.
1683 if (!sign && isSigned)
1684 APInt::tcSetLeastSignificantBits(parts, partsCount, width-1);
1685 else if (!sign && !isSigned)
1686 APInt::tcSetLeastSignificantBits(parts, partsCount, width);
1687 else if (sign && isSigned) {
1688 APInt::tcSetLeastSignificantBits(parts, partsCount, 1);
1689 APInt::tcShiftLeft(parts, partsCount, width-1);
1690 } else // sign && !isSigned
1691 APInt::tcSet(parts, 0, partsCount);
1692 return (opStatus)(opOverflow | opInexact);
1694 tmp.shiftSignificandLeft(-bits);
1695 lost_fraction = lfExactlyZero;
1698 if(lost_fraction != lfExactlyZero
1699 && tmp.roundAwayFromZero(rounding_mode, lost_fraction, 0))
1700 tmp.incrementSignificand();
1702 msb = tmp.significandMSB();
1704 /* Negative numbers cannot be represented as unsigned. */
1705 if(!isSigned && tmp.sign && msb != -1U)
1708 /* It takes exponent + 1 bits to represent the truncated floating
1709 point number without its sign. We lose a bit for the sign, but
1710 the maximally negative integer is a special case. */
1711 if(msb + 1 > width) /* !! Not same as msb >= width !! */
1714 if(isSigned && msb + 1 == width
1715 && (!tmp.sign || tmp.significandLSB() != msb))
1718 APInt::tcAssign(parts, tmp.significandParts(), partsCount);
1721 APInt::tcNegate(parts, partsCount);
1723 if(lost_fraction == lfExactlyZero)
1729 /* Convert an unsigned integer SRC to a floating point number,
1730 rounding according to ROUNDING_MODE. The sign of the floating
1731 point number is not modified. */
1733 APFloat::convertFromUnsignedParts(const integerPart *src,
1734 unsigned int srcCount,
1735 roundingMode rounding_mode)
1737 unsigned int omsb, precision, dstCount;
1739 lostFraction lost_fraction;
1741 category = fcNormal;
1742 omsb = APInt::tcMSB(src, srcCount) + 1;
1743 dst = significandParts();
1744 dstCount = partCount();
1745 precision = semantics->precision;
1747 /* We want the most significant PRECISON bits of SRC. There may not
1748 be that many; extract what we can. */
1749 if (precision <= omsb) {
1750 exponent = omsb - 1;
1751 lost_fraction = lostFractionThroughTruncation(src, srcCount,
1753 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
1755 exponent = precision - 1;
1756 lost_fraction = lfExactlyZero;
1757 APInt::tcExtract(dst, dstCount, src, omsb, 0);
1760 return normalize(rounding_mode, lost_fraction);
1763 /* Convert a two's complement integer SRC to a floating point number,
1764 rounding according to ROUNDING_MODE. ISSIGNED is true if the
1765 integer is signed, in which case it must be sign-extended. */
1767 APFloat::convertFromSignExtendedInteger(const integerPart *src,
1768 unsigned int srcCount,
1770 roundingMode rounding_mode)
1772 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1773 "Compile-time arithmetic on PPC long double not supported yet");
1777 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
1780 /* If we're signed and negative negate a copy. */
1782 copy = new integerPart[srcCount];
1783 APInt::tcAssign(copy, src, srcCount);
1784 APInt::tcNegate(copy, srcCount);
1785 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
1789 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
1795 /* FIXME: should this just take a const APInt reference? */
1797 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
1798 unsigned int width, bool isSigned,
1799 roundingMode rounding_mode)
1801 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1802 "Compile-time arithmetic on PPC long double not supported yet");
1803 unsigned int partCount = partCountForBits(width);
1804 APInt api = APInt(width, partCount, parts);
1807 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
1812 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
1816 APFloat::convertFromHexadecimalString(const char *p,
1817 roundingMode rounding_mode)
1819 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1820 "Compile-time arithmetic on PPC long double not supported yet");
1821 lostFraction lost_fraction;
1822 integerPart *significand;
1823 unsigned int bitPos, partsCount;
1824 const char *dot, *firstSignificantDigit;
1828 category = fcNormal;
1830 significand = significandParts();
1831 partsCount = partCount();
1832 bitPos = partsCount * integerPartWidth;
1834 /* Skip leading zeroes and any (hexa)decimal point. */
1835 p = skipLeadingZeroesAndAnyDot(p, &dot);
1836 firstSignificantDigit = p;
1839 integerPart hex_value;
1846 hex_value = hexDigitValue(*p);
1847 if(hex_value == -1U) {
1848 lost_fraction = lfExactlyZero;
1854 /* Store the number whilst 4-bit nibbles remain. */
1857 hex_value <<= bitPos % integerPartWidth;
1858 significand[bitPos / integerPartWidth] |= hex_value;
1860 lost_fraction = trailingHexadecimalFraction(p, hex_value);
1861 while(hexDigitValue(*p) != -1U)
1867 /* Hex floats require an exponent but not a hexadecimal point. */
1868 assert(*p == 'p' || *p == 'P');
1870 /* Ignore the exponent if we are zero. */
1871 if(p != firstSignificantDigit) {
1874 /* Implicit hexadecimal point? */
1878 /* Calculate the exponent adjustment implicit in the number of
1879 significant digits. */
1880 expAdjustment = dot - firstSignificantDigit;
1881 if(expAdjustment < 0)
1883 expAdjustment = expAdjustment * 4 - 1;
1885 /* Adjust for writing the significand starting at the most
1886 significant nibble. */
1887 expAdjustment += semantics->precision;
1888 expAdjustment -= partsCount * integerPartWidth;
1890 /* Adjust for the given exponent. */
1891 exponent = totalExponent(p, expAdjustment);
1894 return normalize(rounding_mode, lost_fraction);
1898 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
1899 unsigned sigPartCount, int exp,
1900 roundingMode rounding_mode)
1902 unsigned int parts, pow5PartCount;
1903 fltSemantics calcSemantics = { 32767, -32767, 0 };
1904 integerPart pow5Parts[maxPowerOfFiveParts];
1907 isNearest = (rounding_mode == rmNearestTiesToEven
1908 || rounding_mode == rmNearestTiesToAway);
1910 parts = partCountForBits(semantics->precision + 11);
1912 /* Calculate pow(5, abs(exp)). */
1913 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
1915 for (;; parts *= 2) {
1916 opStatus sigStatus, powStatus;
1917 unsigned int excessPrecision, truncatedBits;
1919 calcSemantics.precision = parts * integerPartWidth - 1;
1920 excessPrecision = calcSemantics.precision - semantics->precision;
1921 truncatedBits = excessPrecision;
1923 APFloat decSig(calcSemantics, fcZero, sign);
1924 APFloat pow5(calcSemantics, fcZero, false);
1926 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
1927 rmNearestTiesToEven);
1928 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
1929 rmNearestTiesToEven);
1930 /* Add exp, as 10^n = 5^n * 2^n. */
1931 decSig.exponent += exp;
1933 lostFraction calcLostFraction;
1934 integerPart HUerr, HUdistance, powHUerr;
1937 /* multiplySignificand leaves the precision-th bit set to 1. */
1938 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
1939 powHUerr = powStatus != opOK;
1941 calcLostFraction = decSig.divideSignificand(pow5);
1942 /* Denormal numbers have less precision. */
1943 if (decSig.exponent < semantics->minExponent) {
1944 excessPrecision += (semantics->minExponent - decSig.exponent);
1945 truncatedBits = excessPrecision;
1946 if (excessPrecision > calcSemantics.precision)
1947 excessPrecision = calcSemantics.precision;
1949 /* Extra half-ulp lost in reciprocal of exponent. */
1950 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0: 2;
1953 /* Both multiplySignificand and divideSignificand return the
1954 result with the integer bit set. */
1955 assert (APInt::tcExtractBit
1956 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
1958 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
1960 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
1961 excessPrecision, isNearest);
1963 /* Are we guaranteed to round correctly if we truncate? */
1964 if (HUdistance >= HUerr) {
1965 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
1966 calcSemantics.precision - excessPrecision,
1968 /* Take the exponent of decSig. If we tcExtract-ed less bits
1969 above we must adjust our exponent to compensate for the
1970 implicit right shift. */
1971 exponent = (decSig.exponent + semantics->precision
1972 - (calcSemantics.precision - excessPrecision));
1973 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
1976 return normalize(rounding_mode, calcLostFraction);
1982 APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode)
1984 const char *dot, *firstSignificantDigit;
1985 integerPart val, maxVal, decValue;
1988 /* Skip leading zeroes and any decimal point. */
1989 p = skipLeadingZeroesAndAnyDot(p, &dot);
1990 firstSignificantDigit = p;
1992 /* The maximum number that can be multiplied by ten with any digit
1993 added without overflowing an integerPart. */
1994 maxVal = (~ (integerPart) 0 - 9) / 10;
1997 while (val <= maxVal) {
2003 decValue = digitValue(*p);
2004 if (decValue == -1U)
2007 val = val * 10 + decValue;
2010 integerPart *decSignificand;
2011 unsigned int partCount, maxPartCount;
2015 decSignificand = new integerPart[maxPartCount];
2016 decSignificand[partCount++] = val;
2018 /* Now continue to do single-part arithmetic for as long as we can.
2019 Then do a part multiplication, and repeat. */
2020 while (decValue != -1U) {
2021 integerPart multiplier;
2026 while (multiplier <= maxVal) {
2032 decValue = digitValue(*p);
2033 if (decValue == -1U)
2037 val = val * 10 + decValue;
2040 if (partCount == maxPartCount) {
2041 integerPart *newDecSignificand;
2042 newDecSignificand = new integerPart[maxPartCount = partCount * 2];
2043 APInt::tcAssign(newDecSignificand, decSignificand, partCount);
2044 delete [] decSignificand;
2045 decSignificand = newDecSignificand;
2048 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2049 partCount, partCount + 1, false);
2051 /* If we used another part (likely), increase the count. */
2052 if (decSignificand[partCount] != 0)
2056 /* Now decSignificand contains the supplied significand ignoring the
2057 decimal point. Figure out our effective exponent, which is the
2058 specified exponent adjusted for any decimal point. */
2060 if (p == firstSignificantDigit) {
2061 /* Ignore the exponent if we are zero - we cannot overflow. */
2065 int decimalExponent;
2068 decimalExponent = dot + 1 - p;
2070 decimalExponent = 0;
2072 /* Add the given exponent. */
2073 if (*p == 'e' || *p == 'E')
2074 decimalExponent = totalExponent(p, decimalExponent);
2076 category = fcNormal;
2077 fs = roundSignificandWithExponent(decSignificand, partCount,
2078 decimalExponent, rounding_mode);
2081 delete [] decSignificand;
2087 APFloat::convertFromString(const char *p, roundingMode rounding_mode)
2089 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
2090 "Compile-time arithmetic on PPC long double not supported yet");
2091 /* Handle a leading minus sign. */
2097 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
2098 return convertFromHexadecimalString(p + 2, rounding_mode);
2100 return convertFromDecimalString(p, rounding_mode);
2103 /* Write out a hexadecimal representation of the floating point value
2104 to DST, which must be of sufficient size, in the C99 form
2105 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2106 excluding the terminating NUL.
2108 If UPPERCASE, the output is in upper case, otherwise in lower case.
2110 HEXDIGITS digits appear altogether, rounding the value if
2111 necessary. If HEXDIGITS is 0, the minimal precision to display the
2112 number precisely is used instead. If nothing would appear after
2113 the decimal point it is suppressed.
2115 The decimal exponent is always printed and has at least one digit.
2116 Zero values display an exponent of zero. Infinities and NaNs
2117 appear as "infinity" or "nan" respectively.
2119 The above rules are as specified by C99. There is ambiguity about
2120 what the leading hexadecimal digit should be. This implementation
2121 uses whatever is necessary so that the exponent is displayed as
2122 stored. This implies the exponent will fall within the IEEE format
2123 range, and the leading hexadecimal digit will be 0 (for denormals),
2124 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2125 any other digits zero).
2128 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2129 bool upperCase, roundingMode rounding_mode) const
2131 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
2132 "Compile-time arithmetic on PPC long double not supported yet");
2141 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2142 dst += sizeof infinityL - 1;
2146 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2147 dst += sizeof NaNU - 1;
2152 *dst++ = upperCase ? 'X': 'x';
2154 if (hexDigits > 1) {
2156 memset (dst, '0', hexDigits - 1);
2157 dst += hexDigits - 1;
2159 *dst++ = upperCase ? 'P': 'p';
2164 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2173 /* Does the hard work of outputting the correctly rounded hexadecimal
2174 form of a normal floating point number with the specified number of
2175 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2176 digits necessary to print the value precisely is output. */
2178 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2180 roundingMode rounding_mode) const
2182 unsigned int count, valueBits, shift, partsCount, outputDigits;
2183 const char *hexDigitChars;
2184 const integerPart *significand;
2189 *dst++ = upperCase ? 'X': 'x';
2192 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2194 significand = significandParts();
2195 partsCount = partCount();
2197 /* +3 because the first digit only uses the single integer bit, so
2198 we have 3 virtual zero most-significant-bits. */
2199 valueBits = semantics->precision + 3;
2200 shift = integerPartWidth - valueBits % integerPartWidth;
2202 /* The natural number of digits required ignoring trailing
2203 insignificant zeroes. */
2204 outputDigits = (valueBits - significandLSB () + 3) / 4;
2206 /* hexDigits of zero means use the required number for the
2207 precision. Otherwise, see if we are truncating. If we are,
2208 find out if we need to round away from zero. */
2210 if (hexDigits < outputDigits) {
2211 /* We are dropping non-zero bits, so need to check how to round.
2212 "bits" is the number of dropped bits. */
2214 lostFraction fraction;
2216 bits = valueBits - hexDigits * 4;
2217 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2218 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2220 outputDigits = hexDigits;
2223 /* Write the digits consecutively, and start writing in the location
2224 of the hexadecimal point. We move the most significant digit
2225 left and add the hexadecimal point later. */
2228 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2230 while (outputDigits && count) {
2233 /* Put the most significant integerPartWidth bits in "part". */
2234 if (--count == partsCount)
2235 part = 0; /* An imaginary higher zero part. */
2237 part = significand[count] << shift;
2240 part |= significand[count - 1] >> (integerPartWidth - shift);
2242 /* Convert as much of "part" to hexdigits as we can. */
2243 unsigned int curDigits = integerPartWidth / 4;
2245 if (curDigits > outputDigits)
2246 curDigits = outputDigits;
2247 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2248 outputDigits -= curDigits;
2254 /* Note that hexDigitChars has a trailing '0'. */
2257 *q = hexDigitChars[hexDigitValue (*q) + 1];
2258 } while (*q == '0');
2261 /* Add trailing zeroes. */
2262 memset (dst, '0', outputDigits);
2263 dst += outputDigits;
2266 /* Move the most significant digit to before the point, and if there
2267 is something after the decimal point add it. This must come
2268 after rounding above. */
2275 /* Finally output the exponent. */
2276 *dst++ = upperCase ? 'P': 'p';
2278 return writeSignedDecimal (dst, exponent);
2281 // For good performance it is desirable for different APFloats
2282 // to produce different integers.
2284 APFloat::getHashValue() const
2286 if (category==fcZero) return sign<<8 | semantics->precision ;
2287 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2288 else if (category==fcNaN) return 1<<10 | semantics->precision;
2290 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2291 const integerPart* p = significandParts();
2292 for (int i=partCount(); i>0; i--, p++)
2293 hash ^= ((uint32_t)*p) ^ (*p)>>32;
2298 // Conversion from APFloat to/from host float/double. It may eventually be
2299 // possible to eliminate these and have everybody deal with APFloats, but that
2300 // will take a while. This approach will not easily extend to long double.
2301 // Current implementation requires integerPartWidth==64, which is correct at
2302 // the moment but could be made more general.
2304 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2305 // the actual IEEE respresentations. We compensate for that here.
2308 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2310 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended);
2311 assert (partCount()==2);
2313 uint64_t myexponent, mysignificand;
2315 if (category==fcNormal) {
2316 myexponent = exponent+16383; //bias
2317 mysignificand = significandParts()[0];
2318 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2319 myexponent = 0; // denormal
2320 } else if (category==fcZero) {
2323 } else if (category==fcInfinity) {
2324 myexponent = 0x7fff;
2325 mysignificand = 0x8000000000000000ULL;
2327 assert(category == fcNaN && "Unknown category");
2328 myexponent = 0x7fff;
2329 mysignificand = significandParts()[0];
2333 words[0] = (((uint64_t)sign & 1) << 63) |
2334 ((myexponent & 0x7fff) << 48) |
2335 ((mysignificand >>16) & 0xffffffffffffLL);
2336 words[1] = mysignificand & 0xffff;
2337 return APInt(80, 2, words);
2341 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2343 assert(semantics == (const llvm::fltSemantics* const)&PPCDoubleDouble);
2344 assert (partCount()==2);
2346 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2348 if (category==fcNormal) {
2349 myexponent = exponent + 1023; //bias
2350 myexponent2 = exponent2 + 1023;
2351 mysignificand = significandParts()[0];
2352 mysignificand2 = significandParts()[1];
2353 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2354 myexponent = 0; // denormal
2355 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2356 myexponent2 = 0; // denormal
2357 } else if (category==fcZero) {
2362 } else if (category==fcInfinity) {
2368 assert(category == fcNaN && "Unknown category");
2370 mysignificand = significandParts()[0];
2371 myexponent2 = exponent2;
2372 mysignificand2 = significandParts()[1];
2376 words[0] = (((uint64_t)sign & 1) << 63) |
2377 ((myexponent & 0x7ff) << 52) |
2378 (mysignificand & 0xfffffffffffffLL);
2379 words[1] = (((uint64_t)sign2 & 1) << 63) |
2380 ((myexponent2 & 0x7ff) << 52) |
2381 (mysignificand2 & 0xfffffffffffffLL);
2382 return APInt(128, 2, words);
2386 APFloat::convertDoubleAPFloatToAPInt() const
2388 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2389 assert (partCount()==1);
2391 uint64_t myexponent, mysignificand;
2393 if (category==fcNormal) {
2394 myexponent = exponent+1023; //bias
2395 mysignificand = *significandParts();
2396 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2397 myexponent = 0; // denormal
2398 } else if (category==fcZero) {
2401 } else if (category==fcInfinity) {
2405 assert(category == fcNaN && "Unknown category!");
2407 mysignificand = *significandParts();
2410 return APInt(64, (((((uint64_t)sign & 1) << 63) |
2411 ((myexponent & 0x7ff) << 52) |
2412 (mysignificand & 0xfffffffffffffLL))));
2416 APFloat::convertFloatAPFloatToAPInt() const
2418 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2419 assert (partCount()==1);
2421 uint32_t myexponent, mysignificand;
2423 if (category==fcNormal) {
2424 myexponent = exponent+127; //bias
2425 mysignificand = *significandParts();
2426 if (myexponent == 1 && !(mysignificand & 0x400000))
2427 myexponent = 0; // denormal
2428 } else if (category==fcZero) {
2431 } else if (category==fcInfinity) {
2435 assert(category == fcNaN && "Unknown category!");
2437 mysignificand = *significandParts();
2440 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2441 (mysignificand & 0x7fffff)));
2444 // This function creates an APInt that is just a bit map of the floating
2445 // point constant as it would appear in memory. It is not a conversion,
2446 // and treating the result as a normal integer is unlikely to be useful.
2449 APFloat::convertToAPInt() const
2451 if (semantics == (const llvm::fltSemantics* const)&IEEEsingle)
2452 return convertFloatAPFloatToAPInt();
2454 if (semantics == (const llvm::fltSemantics* const)&IEEEdouble)
2455 return convertDoubleAPFloatToAPInt();
2457 if (semantics == (const llvm::fltSemantics* const)&PPCDoubleDouble)
2458 return convertPPCDoubleDoubleAPFloatToAPInt();
2460 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended &&
2462 return convertF80LongDoubleAPFloatToAPInt();
2466 APFloat::convertToFloat() const
2468 assert(semantics == (const llvm::fltSemantics* const)&IEEEsingle);
2469 APInt api = convertToAPInt();
2470 return api.bitsToFloat();
2474 APFloat::convertToDouble() const
2476 assert(semantics == (const llvm::fltSemantics* const)&IEEEdouble);
2477 APInt api = convertToAPInt();
2478 return api.bitsToDouble();
2481 /// Integer bit is explicit in this format. Current Intel book does not
2482 /// define meaning of:
2483 /// exponent = all 1's, integer bit not set.
2484 /// exponent = 0, integer bit set. (formerly "psuedodenormals")
2485 /// exponent!=0 nor all 1's, integer bit not set. (formerly "unnormals")
2487 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2489 assert(api.getBitWidth()==80);
2490 uint64_t i1 = api.getRawData()[0];
2491 uint64_t i2 = api.getRawData()[1];
2492 uint64_t myexponent = (i1 >> 48) & 0x7fff;
2493 uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) |
2496 initialize(&APFloat::x87DoubleExtended);
2497 assert(partCount()==2);
2500 if (myexponent==0 && mysignificand==0) {
2501 // exponent, significand meaningless
2503 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2504 // exponent, significand meaningless
2505 category = fcInfinity;
2506 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2507 // exponent meaningless
2509 significandParts()[0] = mysignificand;
2510 significandParts()[1] = 0;
2512 category = fcNormal;
2513 exponent = myexponent - 16383;
2514 significandParts()[0] = mysignificand;
2515 significandParts()[1] = 0;
2516 if (myexponent==0) // denormal
2522 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
2524 assert(api.getBitWidth()==128);
2525 uint64_t i1 = api.getRawData()[0];
2526 uint64_t i2 = api.getRawData()[1];
2527 uint64_t myexponent = (i1 >> 52) & 0x7ff;
2528 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
2529 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
2530 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
2532 initialize(&APFloat::PPCDoubleDouble);
2533 assert(partCount()==2);
2537 if (myexponent==0 && mysignificand==0) {
2538 // exponent, significand meaningless
2539 // exponent2 and significand2 are required to be 0; we don't check
2541 } else if (myexponent==0x7ff && mysignificand==0) {
2542 // exponent, significand meaningless
2543 // exponent2 and significand2 are required to be 0; we don't check
2544 category = fcInfinity;
2545 } else if (myexponent==0x7ff && mysignificand!=0) {
2546 // exponent meaningless. So is the whole second word, but keep it
2549 exponent2 = myexponent2;
2550 significandParts()[0] = mysignificand;
2551 significandParts()[1] = mysignificand2;
2553 category = fcNormal;
2554 // Note there is no category2; the second word is treated as if it is
2555 // fcNormal, although it might be something else considered by itself.
2556 exponent = myexponent - 1023;
2557 exponent2 = myexponent2 - 1023;
2558 significandParts()[0] = mysignificand;
2559 significandParts()[1] = mysignificand2;
2560 if (myexponent==0) // denormal
2563 significandParts()[0] |= 0x10000000000000LL; // integer bit
2567 significandParts()[1] |= 0x10000000000000LL; // integer bit
2572 APFloat::initFromDoubleAPInt(const APInt &api)
2574 assert(api.getBitWidth()==64);
2575 uint64_t i = *api.getRawData();
2576 uint64_t myexponent = (i >> 52) & 0x7ff;
2577 uint64_t mysignificand = i & 0xfffffffffffffLL;
2579 initialize(&APFloat::IEEEdouble);
2580 assert(partCount()==1);
2583 if (myexponent==0 && mysignificand==0) {
2584 // exponent, significand meaningless
2586 } else if (myexponent==0x7ff && mysignificand==0) {
2587 // exponent, significand meaningless
2588 category = fcInfinity;
2589 } else if (myexponent==0x7ff && mysignificand!=0) {
2590 // exponent meaningless
2592 *significandParts() = mysignificand;
2594 category = fcNormal;
2595 exponent = myexponent - 1023;
2596 *significandParts() = mysignificand;
2597 if (myexponent==0) // denormal
2600 *significandParts() |= 0x10000000000000LL; // integer bit
2605 APFloat::initFromFloatAPInt(const APInt & api)
2607 assert(api.getBitWidth()==32);
2608 uint32_t i = (uint32_t)*api.getRawData();
2609 uint32_t myexponent = (i >> 23) & 0xff;
2610 uint32_t mysignificand = i & 0x7fffff;
2612 initialize(&APFloat::IEEEsingle);
2613 assert(partCount()==1);
2616 if (myexponent==0 && mysignificand==0) {
2617 // exponent, significand meaningless
2619 } else if (myexponent==0xff && mysignificand==0) {
2620 // exponent, significand meaningless
2621 category = fcInfinity;
2622 } else if (myexponent==0xff && mysignificand!=0) {
2623 // sign, exponent, significand meaningless
2625 *significandParts() = mysignificand;
2627 category = fcNormal;
2628 exponent = myexponent - 127; //bias
2629 *significandParts() = mysignificand;
2630 if (myexponent==0) // denormal
2633 *significandParts() |= 0x800000; // integer bit
2637 /// Treat api as containing the bits of a floating point number. Currently
2638 /// we infer the floating point type from the size of the APInt. The
2639 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
2640 /// when the size is anything else).
2642 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
2644 if (api.getBitWidth() == 32)
2645 return initFromFloatAPInt(api);
2646 else if (api.getBitWidth()==64)
2647 return initFromDoubleAPInt(api);
2648 else if (api.getBitWidth()==80)
2649 return initFromF80LongDoubleAPInt(api);
2650 else if (api.getBitWidth()==128 && !isIEEE)
2651 return initFromPPCDoubleDoubleAPInt(api);
2656 APFloat::APFloat(const APInt& api, bool isIEEE)
2658 initFromAPInt(api, isIEEE);
2661 APFloat::APFloat(float f)
2663 APInt api = APInt(32, 0);
2664 initFromAPInt(api.floatToBits(f));
2667 APFloat::APFloat(double d)
2669 APInt api = APInt(64, 0);
2670 initFromAPInt(api.doubleToBits(d));