1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Neil Booth and is distributed under the
6 // University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
17 #include "llvm/ADT/APFloat.h"
18 #include "llvm/Support/MathExtras.h"
22 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
24 /* Assumed in hexadecimal significand parsing, and conversion to
25 hexadecimal strings. */
26 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
30 /* Represents floating point arithmetic semantics. */
32 /* The largest E such that 2^E is representable; this matches the
33 definition of IEEE 754. */
34 exponent_t maxExponent;
36 /* The smallest E such that 2^E is a normalized number; this
37 matches the definition of IEEE 754. */
38 exponent_t minExponent;
40 /* Number of bits in the significand. This includes the integer
42 unsigned int precision;
45 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 };
46 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 };
47 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 };
48 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 };
49 const fltSemantics APFloat::Bogus = { 0, 0, 0 };
51 // The PowerPC format consists of two doubles. It does not map cleanly
52 // onto the usual format above. For now only storage of constants of
53 // this type is supported, no arithmetic.
54 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106 };
56 /* A tight upper bound on number of parts required to hold the value
59 power * 1024 / (441 * integerPartWidth) + 1
61 However, whilst the result may require only this many parts,
62 because we are multiplying two values to get it, the
63 multiplication may require an extra part with the excess part
64 being zero (consider the trivial case of 1 * 1, tcFullMultiply
65 requires two parts to hold the single-part result). So we add an
66 extra one to guarantee enough space whilst multiplying. */
67 const unsigned int maxExponent = 16383;
68 const unsigned int maxPrecision = 113;
69 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
70 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 1024)
71 / (441 * integerPartWidth));
74 /* Put a bunch of private, handy routines in an anonymous namespace. */
78 partCountForBits(unsigned int bits)
80 return ((bits) + integerPartWidth - 1) / integerPartWidth;
84 digitValue(unsigned int c)
96 hexDigitValue(unsigned int c)
115 /* This is ugly and needs cleaning up, but I don't immediately see
116 how whilst remaining safe. */
118 totalExponent(const char *p, int exponentAdjustment)
120 integerPart unsignedExponent;
121 bool negative, overflow;
124 /* Move past the exponent letter and sign to the digits. */
126 negative = *p == '-';
127 if(*p == '-' || *p == '+')
130 unsignedExponent = 0;
135 value = digitValue(*p);
140 unsignedExponent = unsignedExponent * 10 + value;
141 if(unsignedExponent > 65535)
145 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
149 exponent = unsignedExponent;
151 exponent = -exponent;
152 exponent += exponentAdjustment;
153 if(exponent > 65535 || exponent < -65536)
158 exponent = negative ? -65536: 65535;
164 skipLeadingZeroesAndAnyDot(const char *p, const char **dot)
179 /* Return the trailing fraction of a hexadecimal number.
180 DIGITVALUE is the first hex digit of the fraction, P points to
183 trailingHexadecimalFraction(const char *p, unsigned int digitValue)
185 unsigned int hexDigit;
187 /* If the first trailing digit isn't 0 or 8 we can work out the
188 fraction immediately. */
190 return lfMoreThanHalf;
191 else if(digitValue < 8 && digitValue > 0)
192 return lfLessThanHalf;
194 /* Otherwise we need to find the first non-zero digit. */
198 hexDigit = hexDigitValue(*p);
200 /* If we ran off the end it is exactly zero or one-half, otherwise
203 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
205 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
208 /* Return the fraction lost were a bignum truncated losing the least
209 significant BITS bits. */
211 lostFractionThroughTruncation(const integerPart *parts,
212 unsigned int partCount,
217 lsb = APInt::tcLSB(parts, partCount);
219 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
221 return lfExactlyZero;
223 return lfExactlyHalf;
224 if(bits <= partCount * integerPartWidth
225 && APInt::tcExtractBit(parts, bits - 1))
226 return lfMoreThanHalf;
228 return lfLessThanHalf;
231 /* Shift DST right BITS bits noting lost fraction. */
233 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
235 lostFraction lost_fraction;
237 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
239 APInt::tcShiftRight(dst, parts, bits);
241 return lost_fraction;
244 /* Combine the effect of two lost fractions. */
246 combineLostFractions(lostFraction moreSignificant,
247 lostFraction lessSignificant)
249 if(lessSignificant != lfExactlyZero) {
250 if(moreSignificant == lfExactlyZero)
251 moreSignificant = lfLessThanHalf;
252 else if(moreSignificant == lfExactlyHalf)
253 moreSignificant = lfMoreThanHalf;
256 return moreSignificant;
259 /* The error from the true value, in half-ulps, on multiplying two
260 floating point numbers, which differ from the value they
261 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
262 than the returned value.
264 See "How to Read Floating Point Numbers Accurately" by William D
267 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
269 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
271 if (HUerr1 + HUerr2 == 0)
272 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
274 return inexactMultiply + 2 * (HUerr1 + HUerr2);
277 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
278 when the least significant BITS are truncated. BITS cannot be
281 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
283 unsigned int count, partBits;
284 integerPart part, boundary;
289 count = bits / integerPartWidth;
290 partBits = bits % integerPartWidth + 1;
292 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
295 boundary = (integerPart) 1 << (partBits - 1);
300 if (part - boundary <= boundary - part)
301 return part - boundary;
303 return boundary - part;
306 if (part == boundary) {
309 return ~(integerPart) 0; /* A lot. */
312 } else if (part == boundary - 1) {
315 return ~(integerPart) 0; /* A lot. */
320 return ~(integerPart) 0; /* A lot. */
323 /* Place pow(5, power) in DST, and return the number of parts used.
324 DST must be at least one part larger than size of the answer. */
326 powerOf5(integerPart *dst, unsigned int power)
328 /* A tight upper bound on number of parts required to hold the
329 value pow(5, power) is
331 power * 65536 / (28224 * integerPartWidth) + 1
333 However, whilst the result may require only N parts, because we
334 are multiplying two values to get it, the multiplication may
335 require N + 1 parts with the excess part being zero (consider
336 the trivial case of 1 * 1, the multiplier requires two parts to
337 hold the single-part result). So we add two to guarantee
338 enough space whilst multiplying. */
339 static integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
341 static integerPart pow5s[maxPowerOfFiveParts * 2 + 5] = { 78125 * 5 };
342 static unsigned int partsCount[16] = { 1 };
344 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
347 assert(power <= maxExponent);
352 *p1 = firstEightPowers[power & 7];
358 for (unsigned int n = 0; power; power >>= 1, n++) {
363 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
365 pc = partsCount[n - 1];
366 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
368 if (pow5[pc - 1] == 0)
376 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
378 if (p2[result - 1] == 0)
381 /* Now result is in p1 with partsCount parts and p2 is scratch
383 tmp = p1, p1 = p2, p2 = tmp;
390 APInt::tcAssign(dst, p1, result);
395 /* Zero at the end to avoid modular arithmetic when adding one; used
396 when rounding up during hexadecimal output. */
397 static const char hexDigitsLower[] = "0123456789abcdef0";
398 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
399 static const char infinityL[] = "infinity";
400 static const char infinityU[] = "INFINITY";
401 static const char NaNL[] = "nan";
402 static const char NaNU[] = "NAN";
404 /* Write out an integerPart in hexadecimal, starting with the most
405 significant nibble. Write out exactly COUNT hexdigits, return
408 partAsHex (char *dst, integerPart part, unsigned int count,
409 const char *hexDigitChars)
411 unsigned int result = count;
413 assert (count != 0 && count <= integerPartWidth / 4);
415 part >>= (integerPartWidth - 4 * count);
417 dst[count] = hexDigitChars[part & 0xf];
424 /* Write out an unsigned decimal integer. */
426 writeUnsignedDecimal (char *dst, unsigned int n)
442 /* Write out a signed decimal integer. */
444 writeSignedDecimal (char *dst, int value)
448 dst = writeUnsignedDecimal(dst, -(unsigned) value);
450 dst = writeUnsignedDecimal(dst, value);
458 APFloat::initialize(const fltSemantics *ourSemantics)
462 semantics = ourSemantics;
465 significand.parts = new integerPart[count];
469 APFloat::freeSignificand()
472 delete [] significand.parts;
476 APFloat::assign(const APFloat &rhs)
478 assert(semantics == rhs.semantics);
481 category = rhs.category;
482 exponent = rhs.exponent;
484 exponent2 = rhs.exponent2;
485 if(category == fcNormal || category == fcNaN)
486 copySignificand(rhs);
490 APFloat::copySignificand(const APFloat &rhs)
492 assert(category == fcNormal || category == fcNaN);
493 assert(rhs.partCount() >= partCount());
495 APInt::tcAssign(significandParts(), rhs.significandParts(),
500 APFloat::operator=(const APFloat &rhs)
503 if(semantics != rhs.semantics) {
505 initialize(rhs.semantics);
514 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
517 if (semantics != rhs.semantics ||
518 category != rhs.category ||
521 if (semantics==(const llvm::fltSemantics* const)&PPCDoubleDouble &&
524 if (category==fcZero || category==fcInfinity)
526 else if (category==fcNormal && exponent!=rhs.exponent)
528 else if (semantics==(const llvm::fltSemantics* const)&PPCDoubleDouble &&
529 exponent2!=rhs.exponent2)
533 const integerPart* p=significandParts();
534 const integerPart* q=rhs.significandParts();
535 for (; i>0; i--, p++, q++) {
543 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
545 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
546 "Compile-time arithmetic on PPC long double not supported yet");
547 initialize(&ourSemantics);
550 exponent = ourSemantics.precision - 1;
551 significandParts()[0] = value;
552 normalize(rmNearestTiesToEven, lfExactlyZero);
555 APFloat::APFloat(const fltSemantics &ourSemantics,
556 fltCategory ourCategory, bool negative)
558 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
559 "Compile-time arithmetic on PPC long double not supported yet");
560 initialize(&ourSemantics);
561 category = ourCategory;
563 if(category == fcNormal)
567 APFloat::APFloat(const fltSemantics &ourSemantics, const char *text)
569 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
570 "Compile-time arithmetic on PPC long double not supported yet");
571 initialize(&ourSemantics);
572 convertFromString(text, rmNearestTiesToEven);
575 APFloat::APFloat(const APFloat &rhs)
577 initialize(rhs.semantics);
587 APFloat::partCount() const
589 return partCountForBits(semantics->precision + 1);
593 APFloat::semanticsPrecision(const fltSemantics &semantics)
595 return semantics.precision;
599 APFloat::significandParts() const
601 return const_cast<APFloat *>(this)->significandParts();
605 APFloat::significandParts()
607 assert(category == fcNormal || category == fcNaN);
610 return significand.parts;
612 return &significand.part;
616 APFloat::zeroSignificand()
619 APInt::tcSet(significandParts(), 0, partCount());
622 /* Increment an fcNormal floating point number's significand. */
624 APFloat::incrementSignificand()
628 carry = APInt::tcIncrement(significandParts(), partCount());
630 /* Our callers should never cause us to overflow. */
634 /* Add the significand of the RHS. Returns the carry flag. */
636 APFloat::addSignificand(const APFloat &rhs)
640 parts = significandParts();
642 assert(semantics == rhs.semantics);
643 assert(exponent == rhs.exponent);
645 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
648 /* Subtract the significand of the RHS with a borrow flag. Returns
651 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
655 parts = significandParts();
657 assert(semantics == rhs.semantics);
658 assert(exponent == rhs.exponent);
660 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
664 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
665 on to the full-precision result of the multiplication. Returns the
668 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
670 unsigned int omsb; // One, not zero, based MSB.
671 unsigned int partsCount, newPartsCount, precision;
672 integerPart *lhsSignificand;
673 integerPart scratch[4];
674 integerPart *fullSignificand;
675 lostFraction lost_fraction;
677 assert(semantics == rhs.semantics);
679 precision = semantics->precision;
680 newPartsCount = partCountForBits(precision * 2);
682 if(newPartsCount > 4)
683 fullSignificand = new integerPart[newPartsCount];
685 fullSignificand = scratch;
687 lhsSignificand = significandParts();
688 partsCount = partCount();
690 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
691 rhs.significandParts(), partsCount, partsCount);
693 lost_fraction = lfExactlyZero;
694 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
695 exponent += rhs.exponent;
698 Significand savedSignificand = significand;
699 const fltSemantics *savedSemantics = semantics;
700 fltSemantics extendedSemantics;
702 unsigned int extendedPrecision;
704 /* Normalize our MSB. */
705 extendedPrecision = precision + precision - 1;
706 if(omsb != extendedPrecision)
708 APInt::tcShiftLeft(fullSignificand, newPartsCount,
709 extendedPrecision - omsb);
710 exponent -= extendedPrecision - omsb;
713 /* Create new semantics. */
714 extendedSemantics = *semantics;
715 extendedSemantics.precision = extendedPrecision;
717 if(newPartsCount == 1)
718 significand.part = fullSignificand[0];
720 significand.parts = fullSignificand;
721 semantics = &extendedSemantics;
723 APFloat extendedAddend(*addend);
724 status = extendedAddend.convert(extendedSemantics, rmTowardZero);
725 assert(status == opOK);
726 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
728 /* Restore our state. */
729 if(newPartsCount == 1)
730 fullSignificand[0] = significand.part;
731 significand = savedSignificand;
732 semantics = savedSemantics;
734 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
737 exponent -= (precision - 1);
739 if(omsb > precision) {
740 unsigned int bits, significantParts;
743 bits = omsb - precision;
744 significantParts = partCountForBits(omsb);
745 lf = shiftRight(fullSignificand, significantParts, bits);
746 lost_fraction = combineLostFractions(lf, lost_fraction);
750 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
752 if(newPartsCount > 4)
753 delete [] fullSignificand;
755 return lost_fraction;
758 /* Multiply the significands of LHS and RHS to DST. */
760 APFloat::divideSignificand(const APFloat &rhs)
762 unsigned int bit, i, partsCount;
763 const integerPart *rhsSignificand;
764 integerPart *lhsSignificand, *dividend, *divisor;
765 integerPart scratch[4];
766 lostFraction lost_fraction;
768 assert(semantics == rhs.semantics);
770 lhsSignificand = significandParts();
771 rhsSignificand = rhs.significandParts();
772 partsCount = partCount();
775 dividend = new integerPart[partsCount * 2];
779 divisor = dividend + partsCount;
781 /* Copy the dividend and divisor as they will be modified in-place. */
782 for(i = 0; i < partsCount; i++) {
783 dividend[i] = lhsSignificand[i];
784 divisor[i] = rhsSignificand[i];
785 lhsSignificand[i] = 0;
788 exponent -= rhs.exponent;
790 unsigned int precision = semantics->precision;
792 /* Normalize the divisor. */
793 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
796 APInt::tcShiftLeft(divisor, partsCount, bit);
799 /* Normalize the dividend. */
800 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
803 APInt::tcShiftLeft(dividend, partsCount, bit);
806 /* Ensure the dividend >= divisor initially for the loop below.
807 Incidentally, this means that the division loop below is
808 guaranteed to set the integer bit to one. */
809 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
811 APInt::tcShiftLeft(dividend, partsCount, 1);
812 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
816 for(bit = precision; bit; bit -= 1) {
817 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
818 APInt::tcSubtract(dividend, divisor, 0, partsCount);
819 APInt::tcSetBit(lhsSignificand, bit - 1);
822 APInt::tcShiftLeft(dividend, partsCount, 1);
825 /* Figure out the lost fraction. */
826 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
829 lost_fraction = lfMoreThanHalf;
831 lost_fraction = lfExactlyHalf;
832 else if(APInt::tcIsZero(dividend, partsCount))
833 lost_fraction = lfExactlyZero;
835 lost_fraction = lfLessThanHalf;
840 return lost_fraction;
844 APFloat::significandMSB() const
846 return APInt::tcMSB(significandParts(), partCount());
850 APFloat::significandLSB() const
852 return APInt::tcLSB(significandParts(), partCount());
855 /* Note that a zero result is NOT normalized to fcZero. */
857 APFloat::shiftSignificandRight(unsigned int bits)
859 /* Our exponent should not overflow. */
860 assert((exponent_t) (exponent + bits) >= exponent);
864 return shiftRight(significandParts(), partCount(), bits);
867 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
869 APFloat::shiftSignificandLeft(unsigned int bits)
871 assert(bits < semantics->precision);
874 unsigned int partsCount = partCount();
876 APInt::tcShiftLeft(significandParts(), partsCount, bits);
879 assert(!APInt::tcIsZero(significandParts(), partsCount));
884 APFloat::compareAbsoluteValue(const APFloat &rhs) const
888 assert(semantics == rhs.semantics);
889 assert(category == fcNormal);
890 assert(rhs.category == fcNormal);
892 compare = exponent - rhs.exponent;
894 /* If exponents are equal, do an unsigned bignum comparison of the
897 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
901 return cmpGreaterThan;
908 /* Handle overflow. Sign is preserved. We either become infinity or
909 the largest finite number. */
911 APFloat::handleOverflow(roundingMode rounding_mode)
914 if(rounding_mode == rmNearestTiesToEven
915 || rounding_mode == rmNearestTiesToAway
916 || (rounding_mode == rmTowardPositive && !sign)
917 || (rounding_mode == rmTowardNegative && sign))
919 category = fcInfinity;
920 return (opStatus) (opOverflow | opInexact);
923 /* Otherwise we become the largest finite number. */
925 exponent = semantics->maxExponent;
926 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
927 semantics->precision);
932 /* Returns TRUE if, when truncating the current number, with BIT the
933 new LSB, with the given lost fraction and rounding mode, the result
934 would need to be rounded away from zero (i.e., by increasing the
935 signficand). This routine must work for fcZero of both signs, and
938 APFloat::roundAwayFromZero(roundingMode rounding_mode,
939 lostFraction lost_fraction,
940 unsigned int bit) const
942 /* NaNs and infinities should not have lost fractions. */
943 assert(category == fcNormal || category == fcZero);
945 /* Current callers never pass this so we don't handle it. */
946 assert(lost_fraction != lfExactlyZero);
948 switch(rounding_mode) {
952 case rmNearestTiesToAway:
953 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
955 case rmNearestTiesToEven:
956 if(lost_fraction == lfMoreThanHalf)
959 /* Our zeroes don't have a significand to test. */
960 if(lost_fraction == lfExactlyHalf && category != fcZero)
961 return APInt::tcExtractBit(significandParts(), bit);
968 case rmTowardPositive:
969 return sign == false;
971 case rmTowardNegative:
977 APFloat::normalize(roundingMode rounding_mode,
978 lostFraction lost_fraction)
980 unsigned int omsb; /* One, not zero, based MSB. */
983 if(category != fcNormal)
986 /* Before rounding normalize the exponent of fcNormal numbers. */
987 omsb = significandMSB() + 1;
990 /* OMSB is numbered from 1. We want to place it in the integer
991 bit numbered PRECISON if possible, with a compensating change in
993 exponentChange = omsb - semantics->precision;
995 /* If the resulting exponent is too high, overflow according to
996 the rounding mode. */
997 if(exponent + exponentChange > semantics->maxExponent)
998 return handleOverflow(rounding_mode);
1000 /* Subnormal numbers have exponent minExponent, and their MSB
1001 is forced based on that. */
1002 if(exponent + exponentChange < semantics->minExponent)
1003 exponentChange = semantics->minExponent - exponent;
1005 /* Shifting left is easy as we don't lose precision. */
1006 if(exponentChange < 0) {
1007 assert(lost_fraction == lfExactlyZero);
1009 shiftSignificandLeft(-exponentChange);
1014 if(exponentChange > 0) {
1017 /* Shift right and capture any new lost fraction. */
1018 lf = shiftSignificandRight(exponentChange);
1020 lost_fraction = combineLostFractions(lf, lost_fraction);
1022 /* Keep OMSB up-to-date. */
1023 if(omsb > (unsigned) exponentChange)
1024 omsb -= exponentChange;
1030 /* Now round the number according to rounding_mode given the lost
1033 /* As specified in IEEE 754, since we do not trap we do not report
1034 underflow for exact results. */
1035 if(lost_fraction == lfExactlyZero) {
1036 /* Canonicalize zeroes. */
1043 /* Increment the significand if we're rounding away from zero. */
1044 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1046 exponent = semantics->minExponent;
1048 incrementSignificand();
1049 omsb = significandMSB() + 1;
1051 /* Did the significand increment overflow? */
1052 if(omsb == (unsigned) semantics->precision + 1) {
1053 /* Renormalize by incrementing the exponent and shifting our
1054 significand right one. However if we already have the
1055 maximum exponent we overflow to infinity. */
1056 if(exponent == semantics->maxExponent) {
1057 category = fcInfinity;
1059 return (opStatus) (opOverflow | opInexact);
1062 shiftSignificandRight(1);
1068 /* The normal case - we were and are not denormal, and any
1069 significand increment above didn't overflow. */
1070 if(omsb == semantics->precision)
1073 /* We have a non-zero denormal. */
1074 assert(omsb < semantics->precision);
1076 /* Canonicalize zeroes. */
1080 /* The fcZero case is a denormal that underflowed to zero. */
1081 return (opStatus) (opUnderflow | opInexact);
1085 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1087 switch(convolve(category, rhs.category)) {
1091 case convolve(fcNaN, fcZero):
1092 case convolve(fcNaN, fcNormal):
1093 case convolve(fcNaN, fcInfinity):
1094 case convolve(fcNaN, fcNaN):
1095 case convolve(fcNormal, fcZero):
1096 case convolve(fcInfinity, fcNormal):
1097 case convolve(fcInfinity, fcZero):
1100 case convolve(fcZero, fcNaN):
1101 case convolve(fcNormal, fcNaN):
1102 case convolve(fcInfinity, fcNaN):
1104 copySignificand(rhs);
1107 case convolve(fcNormal, fcInfinity):
1108 case convolve(fcZero, fcInfinity):
1109 category = fcInfinity;
1110 sign = rhs.sign ^ subtract;
1113 case convolve(fcZero, fcNormal):
1115 sign = rhs.sign ^ subtract;
1118 case convolve(fcZero, fcZero):
1119 /* Sign depends on rounding mode; handled by caller. */
1122 case convolve(fcInfinity, fcInfinity):
1123 /* Differently signed infinities can only be validly
1125 if(sign ^ rhs.sign != subtract) {
1127 // Arbitrary but deterministic value for significand
1128 APInt::tcSet(significandParts(), ~0U, partCount());
1134 case convolve(fcNormal, fcNormal):
1139 /* Add or subtract two normal numbers. */
1141 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1144 lostFraction lost_fraction;
1147 /* Determine if the operation on the absolute values is effectively
1148 an addition or subtraction. */
1149 subtract ^= (sign ^ rhs.sign);
1151 /* Are we bigger exponent-wise than the RHS? */
1152 bits = exponent - rhs.exponent;
1154 /* Subtraction is more subtle than one might naively expect. */
1156 APFloat temp_rhs(rhs);
1160 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1161 lost_fraction = lfExactlyZero;
1162 } else if (bits > 0) {
1163 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1164 shiftSignificandLeft(1);
1167 lost_fraction = shiftSignificandRight(-bits - 1);
1168 temp_rhs.shiftSignificandLeft(1);
1173 carry = temp_rhs.subtractSignificand
1174 (*this, lost_fraction != lfExactlyZero);
1175 copySignificand(temp_rhs);
1178 carry = subtractSignificand
1179 (temp_rhs, lost_fraction != lfExactlyZero);
1182 /* Invert the lost fraction - it was on the RHS and
1184 if(lost_fraction == lfLessThanHalf)
1185 lost_fraction = lfMoreThanHalf;
1186 else if(lost_fraction == lfMoreThanHalf)
1187 lost_fraction = lfLessThanHalf;
1189 /* The code above is intended to ensure that no borrow is
1194 APFloat temp_rhs(rhs);
1196 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1197 carry = addSignificand(temp_rhs);
1199 lost_fraction = shiftSignificandRight(-bits);
1200 carry = addSignificand(rhs);
1203 /* We have a guard bit; generating a carry cannot happen. */
1207 return lost_fraction;
1211 APFloat::multiplySpecials(const APFloat &rhs)
1213 switch(convolve(category, rhs.category)) {
1217 case convolve(fcNaN, fcZero):
1218 case convolve(fcNaN, fcNormal):
1219 case convolve(fcNaN, fcInfinity):
1220 case convolve(fcNaN, fcNaN):
1223 case convolve(fcZero, fcNaN):
1224 case convolve(fcNormal, fcNaN):
1225 case convolve(fcInfinity, fcNaN):
1227 copySignificand(rhs);
1230 case convolve(fcNormal, fcInfinity):
1231 case convolve(fcInfinity, fcNormal):
1232 case convolve(fcInfinity, fcInfinity):
1233 category = fcInfinity;
1236 case convolve(fcZero, fcNormal):
1237 case convolve(fcNormal, fcZero):
1238 case convolve(fcZero, fcZero):
1242 case convolve(fcZero, fcInfinity):
1243 case convolve(fcInfinity, fcZero):
1245 // Arbitrary but deterministic value for significand
1246 APInt::tcSet(significandParts(), ~0U, partCount());
1249 case convolve(fcNormal, fcNormal):
1255 APFloat::divideSpecials(const APFloat &rhs)
1257 switch(convolve(category, rhs.category)) {
1261 case convolve(fcNaN, fcZero):
1262 case convolve(fcNaN, fcNormal):
1263 case convolve(fcNaN, fcInfinity):
1264 case convolve(fcNaN, fcNaN):
1265 case convolve(fcInfinity, fcZero):
1266 case convolve(fcInfinity, fcNormal):
1267 case convolve(fcZero, fcInfinity):
1268 case convolve(fcZero, fcNormal):
1271 case convolve(fcZero, fcNaN):
1272 case convolve(fcNormal, fcNaN):
1273 case convolve(fcInfinity, fcNaN):
1275 copySignificand(rhs);
1278 case convolve(fcNormal, fcInfinity):
1282 case convolve(fcNormal, fcZero):
1283 category = fcInfinity;
1286 case convolve(fcInfinity, fcInfinity):
1287 case convolve(fcZero, fcZero):
1289 // Arbitrary but deterministic value for significand
1290 APInt::tcSet(significandParts(), ~0U, partCount());
1293 case convolve(fcNormal, fcNormal):
1300 APFloat::changeSign()
1302 /* Look mummy, this one's easy. */
1307 APFloat::clearSign()
1309 /* So is this one. */
1314 APFloat::copySign(const APFloat &rhs)
1320 /* Normalized addition or subtraction. */
1322 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1327 fs = addOrSubtractSpecials(rhs, subtract);
1329 /* This return code means it was not a simple case. */
1330 if(fs == opDivByZero) {
1331 lostFraction lost_fraction;
1333 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1334 fs = normalize(rounding_mode, lost_fraction);
1336 /* Can only be zero if we lost no fraction. */
1337 assert(category != fcZero || lost_fraction == lfExactlyZero);
1340 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1341 positive zero unless rounding to minus infinity, except that
1342 adding two like-signed zeroes gives that zero. */
1343 if(category == fcZero) {
1344 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1345 sign = (rounding_mode == rmTowardNegative);
1351 /* Normalized addition. */
1353 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1355 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1356 "Compile-time arithmetic on PPC long double not supported yet");
1357 return addOrSubtract(rhs, rounding_mode, false);
1360 /* Normalized subtraction. */
1362 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1364 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1365 "Compile-time arithmetic on PPC long double not supported yet");
1366 return addOrSubtract(rhs, rounding_mode, true);
1369 /* Normalized multiply. */
1371 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1373 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1374 "Compile-time arithmetic on PPC long double not supported yet");
1378 fs = multiplySpecials(rhs);
1380 if(category == fcNormal) {
1381 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1382 fs = normalize(rounding_mode, lost_fraction);
1383 if(lost_fraction != lfExactlyZero)
1384 fs = (opStatus) (fs | opInexact);
1390 /* Normalized divide. */
1392 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1394 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1395 "Compile-time arithmetic on PPC long double not supported yet");
1399 fs = divideSpecials(rhs);
1401 if(category == fcNormal) {
1402 lostFraction lost_fraction = divideSignificand(rhs);
1403 fs = normalize(rounding_mode, lost_fraction);
1404 if(lost_fraction != lfExactlyZero)
1405 fs = (opStatus) (fs | opInexact);
1411 /* Normalized remainder. This is not currently doing TRT. */
1413 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1415 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1416 "Compile-time arithmetic on PPC long double not supported yet");
1419 unsigned int origSign = sign;
1420 fs = V.divide(rhs, rmNearestTiesToEven);
1421 if (fs == opDivByZero)
1424 int parts = partCount();
1425 integerPart *x = new integerPart[parts];
1426 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1427 rmNearestTiesToEven);
1428 if (fs==opInvalidOp)
1431 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1432 rmNearestTiesToEven);
1433 assert(fs==opOK); // should always work
1435 fs = V.multiply(rhs, rounding_mode);
1436 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1438 fs = subtract(V, rounding_mode);
1439 assert(fs==opOK || fs==opInexact); // likewise
1442 sign = origSign; // IEEE754 requires this
1447 /* Normalized fused-multiply-add. */
1449 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1450 const APFloat &addend,
1451 roundingMode rounding_mode)
1453 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1454 "Compile-time arithmetic on PPC long double not supported yet");
1457 /* Post-multiplication sign, before addition. */
1458 sign ^= multiplicand.sign;
1460 /* If and only if all arguments are normal do we need to do an
1461 extended-precision calculation. */
1462 if(category == fcNormal
1463 && multiplicand.category == fcNormal
1464 && addend.category == fcNormal) {
1465 lostFraction lost_fraction;
1467 lost_fraction = multiplySignificand(multiplicand, &addend);
1468 fs = normalize(rounding_mode, lost_fraction);
1469 if(lost_fraction != lfExactlyZero)
1470 fs = (opStatus) (fs | opInexact);
1472 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1473 positive zero unless rounding to minus infinity, except that
1474 adding two like-signed zeroes gives that zero. */
1475 if(category == fcZero && sign != addend.sign)
1476 sign = (rounding_mode == rmTowardNegative);
1478 fs = multiplySpecials(multiplicand);
1480 /* FS can only be opOK or opInvalidOp. There is no more work
1481 to do in the latter case. The IEEE-754R standard says it is
1482 implementation-defined in this case whether, if ADDEND is a
1483 quiet NaN, we raise invalid op; this implementation does so.
1485 If we need to do the addition we can do so with normal
1488 fs = addOrSubtract(addend, rounding_mode, false);
1494 /* Comparison requires normalized numbers. */
1496 APFloat::compare(const APFloat &rhs) const
1498 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1499 "Compile-time arithmetic on PPC long double not supported yet");
1502 assert(semantics == rhs.semantics);
1504 switch(convolve(category, rhs.category)) {
1508 case convolve(fcNaN, fcZero):
1509 case convolve(fcNaN, fcNormal):
1510 case convolve(fcNaN, fcInfinity):
1511 case convolve(fcNaN, fcNaN):
1512 case convolve(fcZero, fcNaN):
1513 case convolve(fcNormal, fcNaN):
1514 case convolve(fcInfinity, fcNaN):
1515 return cmpUnordered;
1517 case convolve(fcInfinity, fcNormal):
1518 case convolve(fcInfinity, fcZero):
1519 case convolve(fcNormal, fcZero):
1523 return cmpGreaterThan;
1525 case convolve(fcNormal, fcInfinity):
1526 case convolve(fcZero, fcInfinity):
1527 case convolve(fcZero, fcNormal):
1529 return cmpGreaterThan;
1533 case convolve(fcInfinity, fcInfinity):
1534 if(sign == rhs.sign)
1539 return cmpGreaterThan;
1541 case convolve(fcZero, fcZero):
1544 case convolve(fcNormal, fcNormal):
1548 /* Two normal numbers. Do they have the same sign? */
1549 if(sign != rhs.sign) {
1551 result = cmpLessThan;
1553 result = cmpGreaterThan;
1555 /* Compare absolute values; invert result if negative. */
1556 result = compareAbsoluteValue(rhs);
1559 if(result == cmpLessThan)
1560 result = cmpGreaterThan;
1561 else if(result == cmpGreaterThan)
1562 result = cmpLessThan;
1570 APFloat::convert(const fltSemantics &toSemantics,
1571 roundingMode rounding_mode)
1573 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1574 "Compile-time arithmetic on PPC long double not supported yet");
1575 lostFraction lostFraction;
1576 unsigned int newPartCount, oldPartCount;
1579 lostFraction = lfExactlyZero;
1580 newPartCount = partCountForBits(toSemantics.precision + 1);
1581 oldPartCount = partCount();
1583 /* Handle storage complications. If our new form is wider,
1584 re-allocate our bit pattern into wider storage. If it is
1585 narrower, we ignore the excess parts, but if narrowing to a
1586 single part we need to free the old storage.
1587 Be careful not to reference significandParts for zeroes
1588 and infinities, since it aborts. */
1589 if (newPartCount > oldPartCount) {
1590 integerPart *newParts;
1591 newParts = new integerPart[newPartCount];
1592 APInt::tcSet(newParts, 0, newPartCount);
1593 if (category==fcNormal || category==fcNaN)
1594 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1596 significand.parts = newParts;
1597 } else if (newPartCount < oldPartCount) {
1598 /* Capture any lost fraction through truncation of parts so we get
1599 correct rounding whilst normalizing. */
1600 if (category==fcNormal)
1601 lostFraction = lostFractionThroughTruncation
1602 (significandParts(), oldPartCount, toSemantics.precision);
1603 if (newPartCount == 1) {
1604 integerPart newPart = 0;
1605 if (category==fcNormal || category==fcNaN)
1606 newPart = significandParts()[0];
1608 significand.part = newPart;
1612 if(category == fcNormal) {
1613 /* Re-interpret our bit-pattern. */
1614 exponent += toSemantics.precision - semantics->precision;
1615 semantics = &toSemantics;
1616 fs = normalize(rounding_mode, lostFraction);
1617 } else if (category == fcNaN) {
1618 int shift = toSemantics.precision - semantics->precision;
1619 // No normalization here, just truncate
1621 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1623 APInt::tcShiftRight(significandParts(), newPartCount, -shift);
1624 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1625 // does not give you back the same bits. This is dubious, and we
1626 // don't currently do it. You're really supposed to get
1627 // an invalid operation signal at runtime, but nobody does that.
1628 semantics = &toSemantics;
1631 semantics = &toSemantics;
1638 /* Convert a floating point number to an integer according to the
1639 rounding mode. If the rounded integer value is out of range this
1640 returns an invalid operation exception. If the rounded value is in
1641 range but the floating point number is not the exact integer, the C
1642 standard doesn't require an inexact exception to be raised. IEEE
1643 854 does require it so we do that.
1645 Note that for conversions to integer type the C standard requires
1646 round-to-zero to always be used. */
1648 APFloat::convertToInteger(integerPart *parts, unsigned int width,
1650 roundingMode rounding_mode) const
1652 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1653 "Compile-time arithmetic on PPC long double not supported yet");
1654 lostFraction lost_fraction;
1655 unsigned int msb, partsCount;
1658 partsCount = partCountForBits(width);
1660 /* Handle the three special cases first. We produce
1661 a deterministic result even for the Invalid cases. */
1662 if (category == fcNaN) {
1663 // Neither sign nor isSigned affects this.
1664 APInt::tcSet(parts, 0, partsCount);
1667 if (category == fcInfinity) {
1668 if (!sign && isSigned)
1669 APInt::tcSetLeastSignificantBits(parts, partsCount, width-1);
1670 else if (!sign && !isSigned)
1671 APInt::tcSetLeastSignificantBits(parts, partsCount, width);
1672 else if (sign && isSigned) {
1673 APInt::tcSetLeastSignificantBits(parts, partsCount, 1);
1674 APInt::tcShiftLeft(parts, partsCount, width-1);
1675 } else // sign && !isSigned
1676 APInt::tcSet(parts, 0, partsCount);
1679 if (category == fcZero) {
1680 APInt::tcSet(parts, 0, partsCount);
1684 /* Shift the bit pattern so the fraction is lost. */
1687 bits = (int) semantics->precision - 1 - exponent;
1690 lost_fraction = tmp.shiftSignificandRight(bits);
1692 if (-bits >= semantics->precision) {
1693 // Unrepresentably large.
1694 if (!sign && isSigned)
1695 APInt::tcSetLeastSignificantBits(parts, partsCount, width-1);
1696 else if (!sign && !isSigned)
1697 APInt::tcSetLeastSignificantBits(parts, partsCount, width);
1698 else if (sign && isSigned) {
1699 APInt::tcSetLeastSignificantBits(parts, partsCount, 1);
1700 APInt::tcShiftLeft(parts, partsCount, width-1);
1701 } else // sign && !isSigned
1702 APInt::tcSet(parts, 0, partsCount);
1703 return (opStatus)(opOverflow | opInexact);
1705 tmp.shiftSignificandLeft(-bits);
1706 lost_fraction = lfExactlyZero;
1709 if(lost_fraction != lfExactlyZero
1710 && tmp.roundAwayFromZero(rounding_mode, lost_fraction, 0))
1711 tmp.incrementSignificand();
1713 msb = tmp.significandMSB();
1715 /* Negative numbers cannot be represented as unsigned. */
1716 if(!isSigned && tmp.sign && msb != -1U)
1719 /* It takes exponent + 1 bits to represent the truncated floating
1720 point number without its sign. We lose a bit for the sign, but
1721 the maximally negative integer is a special case. */
1722 if(msb + 1 > width) /* !! Not same as msb >= width !! */
1725 if(isSigned && msb + 1 == width
1726 && (!tmp.sign || tmp.significandLSB() != msb))
1729 APInt::tcAssign(parts, tmp.significandParts(), partsCount);
1732 APInt::tcNegate(parts, partsCount);
1734 if(lost_fraction == lfExactlyZero)
1740 /* Convert an unsigned integer SRC to a floating point number,
1741 rounding according to ROUNDING_MODE. The sign of the floating
1742 point number is not modified. */
1744 APFloat::convertFromUnsignedParts(const integerPart *src,
1745 unsigned int srcCount,
1746 roundingMode rounding_mode)
1748 unsigned int omsb, precision, dstCount;
1750 lostFraction lost_fraction;
1752 category = fcNormal;
1753 omsb = APInt::tcMSB(src, srcCount) + 1;
1754 dst = significandParts();
1755 dstCount = partCount();
1756 precision = semantics->precision;
1758 /* We want the most significant PRECISON bits of SRC. There may not
1759 be that many; extract what we can. */
1760 if (precision <= omsb) {
1761 exponent = omsb - 1;
1762 lost_fraction = lostFractionThroughTruncation(src, srcCount,
1764 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
1766 exponent = precision - 1;
1767 lost_fraction = lfExactlyZero;
1768 APInt::tcExtract(dst, dstCount, src, omsb, 0);
1771 return normalize(rounding_mode, lost_fraction);
1774 /* Convert a two's complement integer SRC to a floating point number,
1775 rounding according to ROUNDING_MODE. ISSIGNED is true if the
1776 integer is signed, in which case it must be sign-extended. */
1778 APFloat::convertFromSignExtendedInteger(const integerPart *src,
1779 unsigned int srcCount,
1781 roundingMode rounding_mode)
1783 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1784 "Compile-time arithmetic on PPC long double not supported yet");
1788 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
1791 /* If we're signed and negative negate a copy. */
1793 copy = new integerPart[srcCount];
1794 APInt::tcAssign(copy, src, srcCount);
1795 APInt::tcNegate(copy, srcCount);
1796 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
1800 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
1806 /* FIXME: should this just take a const APInt reference? */
1808 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
1809 unsigned int width, bool isSigned,
1810 roundingMode rounding_mode)
1812 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1813 "Compile-time arithmetic on PPC long double not supported yet");
1814 unsigned int partCount = partCountForBits(width);
1815 APInt api = APInt(width, partCount, parts);
1818 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
1823 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
1827 APFloat::convertFromHexadecimalString(const char *p,
1828 roundingMode rounding_mode)
1830 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
1831 "Compile-time arithmetic on PPC long double not supported yet");
1832 lostFraction lost_fraction;
1833 integerPart *significand;
1834 unsigned int bitPos, partsCount;
1835 const char *dot, *firstSignificantDigit;
1839 category = fcNormal;
1841 significand = significandParts();
1842 partsCount = partCount();
1843 bitPos = partsCount * integerPartWidth;
1845 /* Skip leading zeroes and any (hexa)decimal point. */
1846 p = skipLeadingZeroesAndAnyDot(p, &dot);
1847 firstSignificantDigit = p;
1850 integerPart hex_value;
1857 hex_value = hexDigitValue(*p);
1858 if(hex_value == -1U) {
1859 lost_fraction = lfExactlyZero;
1865 /* Store the number whilst 4-bit nibbles remain. */
1868 hex_value <<= bitPos % integerPartWidth;
1869 significand[bitPos / integerPartWidth] |= hex_value;
1871 lost_fraction = trailingHexadecimalFraction(p, hex_value);
1872 while(hexDigitValue(*p) != -1U)
1878 /* Hex floats require an exponent but not a hexadecimal point. */
1879 assert(*p == 'p' || *p == 'P');
1881 /* Ignore the exponent if we are zero. */
1882 if(p != firstSignificantDigit) {
1885 /* Implicit hexadecimal point? */
1889 /* Calculate the exponent adjustment implicit in the number of
1890 significant digits. */
1891 expAdjustment = dot - firstSignificantDigit;
1892 if(expAdjustment < 0)
1894 expAdjustment = expAdjustment * 4 - 1;
1896 /* Adjust for writing the significand starting at the most
1897 significant nibble. */
1898 expAdjustment += semantics->precision;
1899 expAdjustment -= partsCount * integerPartWidth;
1901 /* Adjust for the given exponent. */
1902 exponent = totalExponent(p, expAdjustment);
1905 return normalize(rounding_mode, lost_fraction);
1909 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
1910 unsigned sigPartCount, int exp,
1911 roundingMode rounding_mode)
1913 unsigned int parts, pow5PartCount;
1914 fltSemantics calcSemantics = { 32767, -32767, 0 };
1915 integerPart pow5Parts[maxPowerOfFiveParts];
1918 isNearest = (rounding_mode == rmNearestTiesToEven
1919 || rounding_mode == rmNearestTiesToAway);
1921 parts = partCountForBits(semantics->precision + 11);
1923 /* Calculate pow(5, abs(exp)). */
1924 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
1926 for (;; parts *= 2) {
1927 opStatus sigStatus, powStatus;
1928 unsigned int excessPrecision, truncatedBits;
1930 calcSemantics.precision = parts * integerPartWidth - 1;
1931 excessPrecision = calcSemantics.precision - semantics->precision;
1932 truncatedBits = excessPrecision;
1934 APFloat decSig(calcSemantics, fcZero, sign);
1935 APFloat pow5(calcSemantics, fcZero, false);
1937 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
1938 rmNearestTiesToEven);
1939 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
1940 rmNearestTiesToEven);
1941 /* Add exp, as 10^n = 5^n * 2^n. */
1942 decSig.exponent += exp;
1944 lostFraction calcLostFraction;
1945 integerPart HUerr, HUdistance, powHUerr;
1948 /* multiplySignificand leaves the precision-th bit set to 1. */
1949 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
1950 powHUerr = powStatus != opOK;
1952 calcLostFraction = decSig.divideSignificand(pow5);
1953 /* Denormal numbers have less precision. */
1954 if (decSig.exponent < semantics->minExponent) {
1955 excessPrecision += (semantics->minExponent - decSig.exponent);
1956 truncatedBits = excessPrecision;
1957 if (excessPrecision > calcSemantics.precision)
1958 excessPrecision = calcSemantics.precision;
1960 /* Extra half-ulp lost in reciprocal of exponent. */
1961 powHUerr = 1 + powStatus != opOK;
1964 /* Both multiplySignificand and divideSignificand return the
1965 result with the integer bit set. */
1966 assert (APInt::tcExtractBit
1967 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
1969 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
1971 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
1972 excessPrecision, isNearest);
1974 /* Are we guaranteed to round correctly if we truncate? */
1975 if (HUdistance >= HUerr) {
1976 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
1977 calcSemantics.precision - excessPrecision,
1979 /* Take the exponent of decSig. If we tcExtract-ed less bits
1980 above we must adjust our exponent to compensate for the
1981 implicit right shift. */
1982 exponent = (decSig.exponent + semantics->precision
1983 - (calcSemantics.precision - excessPrecision));
1984 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
1987 return normalize(rounding_mode, calcLostFraction);
1993 APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode)
1995 const char *dot, *firstSignificantDigit;
1996 integerPart val, maxVal, decValue;
1999 /* Skip leading zeroes and any decimal point. */
2000 p = skipLeadingZeroesAndAnyDot(p, &dot);
2001 firstSignificantDigit = p;
2003 /* The maximum number that can be multiplied by ten with any digit
2004 added without overflowing an integerPart. */
2005 maxVal = (~ (integerPart) 0 - 9) / 10;
2008 while (val <= maxVal) {
2014 decValue = digitValue(*p);
2015 if (decValue == -1U)
2018 val = val * 10 + decValue;
2021 integerPart *decSignificand;
2022 unsigned int partCount, maxPartCount;
2026 decSignificand = new integerPart[maxPartCount];
2027 decSignificand[partCount++] = val;
2029 /* Now continue to do single-part arithmetic for as long as we can.
2030 Then do a part multiplication, and repeat. */
2031 while (decValue != -1U) {
2032 integerPart multiplier;
2037 while (multiplier <= maxVal) {
2043 decValue = digitValue(*p);
2044 if (decValue == -1U)
2048 val = val * 10 + decValue;
2051 if (partCount == maxPartCount) {
2052 integerPart *newDecSignificand;
2053 newDecSignificand = new integerPart[maxPartCount = partCount * 2];
2054 APInt::tcAssign(newDecSignificand, decSignificand, partCount);
2055 delete [] decSignificand;
2056 decSignificand = newDecSignificand;
2059 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2060 partCount, partCount + 1, false);
2062 /* If we used another part (likely), increase the count. */
2063 if (decSignificand[partCount] != 0)
2067 /* Now decSignificand contains the supplied significand ignoring the
2068 decimal point. Figure out our effective exponent, which is the
2069 specified exponent adjusted for any decimal point. */
2071 if (p == firstSignificantDigit) {
2072 /* Ignore the exponent if we are zero - we cannot overflow. */
2076 int decimalExponent;
2079 decimalExponent = dot + 1 - p;
2081 decimalExponent = 0;
2083 /* Add the given exponent. */
2084 if (*p == 'e' || *p == 'E')
2085 decimalExponent = totalExponent(p, decimalExponent);
2087 category = fcNormal;
2088 fs = roundSignificandWithExponent(decSignificand, partCount,
2089 decimalExponent, rounding_mode);
2092 delete [] decSignificand;
2098 APFloat::convertFromString(const char *p, roundingMode rounding_mode)
2100 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
2101 "Compile-time arithmetic on PPC long double not supported yet");
2102 /* Handle a leading minus sign. */
2108 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
2109 return convertFromHexadecimalString(p + 2, rounding_mode);
2111 return convertFromDecimalString(p, rounding_mode);
2114 /* Write out a hexadecimal representation of the floating point value
2115 to DST, which must be of sufficient size, in the C99 form
2116 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2117 excluding the terminating NUL.
2119 If UPPERCASE, the output is in upper case, otherwise in lower case.
2121 HEXDIGITS digits appear altogether, rounding the value if
2122 necessary. If HEXDIGITS is 0, the minimal precision to display the
2123 number precisely is used instead. If nothing would appear after
2124 the decimal point it is suppressed.
2126 The decimal exponent is always printed and has at least one digit.
2127 Zero values display an exponent of zero. Infinities and NaNs
2128 appear as "infinity" or "nan" respectively.
2130 The above rules are as specified by C99. There is ambiguity about
2131 what the leading hexadecimal digit should be. This implementation
2132 uses whatever is necessary so that the exponent is displayed as
2133 stored. This implies the exponent will fall within the IEEE format
2134 range, and the leading hexadecimal digit will be 0 (for denormals),
2135 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2136 any other digits zero).
2139 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2140 bool upperCase, roundingMode rounding_mode) const
2142 assert(semantics != (const llvm::fltSemantics* const)&PPCDoubleDouble &&
2143 "Compile-time arithmetic on PPC long double not supported yet");
2152 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2153 dst += sizeof infinityL - 1;
2157 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2158 dst += sizeof NaNU - 1;
2163 *dst++ = upperCase ? 'X': 'x';
2165 if (hexDigits > 1) {
2167 memset (dst, '0', hexDigits - 1);
2168 dst += hexDigits - 1;
2170 *dst++ = upperCase ? 'P': 'p';
2175 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2184 /* Does the hard work of outputting the correctly rounded hexadecimal
2185 form of a normal floating point number with the specified number of
2186 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2187 digits necessary to print the value precisely is output. */
2189 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2191 roundingMode rounding_mode) const
2193 unsigned int count, valueBits, shift, partsCount, outputDigits;
2194 const char *hexDigitChars;
2195 const integerPart *significand;
2200 *dst++ = upperCase ? 'X': 'x';
2203 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2205 significand = significandParts();
2206 partsCount = partCount();
2208 /* +3 because the first digit only uses the single integer bit, so
2209 we have 3 virtual zero most-significant-bits. */
2210 valueBits = semantics->precision + 3;
2211 shift = integerPartWidth - valueBits % integerPartWidth;
2213 /* The natural number of digits required ignoring trailing
2214 insignificant zeroes. */
2215 outputDigits = (valueBits - significandLSB () + 3) / 4;
2217 /* hexDigits of zero means use the required number for the
2218 precision. Otherwise, see if we are truncating. If we are,
2219 find out if we need to round away from zero. */
2221 if (hexDigits < outputDigits) {
2222 /* We are dropping non-zero bits, so need to check how to round.
2223 "bits" is the number of dropped bits. */
2225 lostFraction fraction;
2227 bits = valueBits - hexDigits * 4;
2228 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2229 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2231 outputDigits = hexDigits;
2234 /* Write the digits consecutively, and start writing in the location
2235 of the hexadecimal point. We move the most significant digit
2236 left and add the hexadecimal point later. */
2239 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2241 while (outputDigits && count) {
2244 /* Put the most significant integerPartWidth bits in "part". */
2245 if (--count == partsCount)
2246 part = 0; /* An imaginary higher zero part. */
2248 part = significand[count] << shift;
2251 part |= significand[count - 1] >> (integerPartWidth - shift);
2253 /* Convert as much of "part" to hexdigits as we can. */
2254 unsigned int curDigits = integerPartWidth / 4;
2256 if (curDigits > outputDigits)
2257 curDigits = outputDigits;
2258 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2259 outputDigits -= curDigits;
2265 /* Note that hexDigitChars has a trailing '0'. */
2268 *q = hexDigitChars[hexDigitValue (*q) + 1];
2269 } while (*q == '0');
2272 /* Add trailing zeroes. */
2273 memset (dst, '0', outputDigits);
2274 dst += outputDigits;
2277 /* Move the most significant digit to before the point, and if there
2278 is something after the decimal point add it. This must come
2279 after rounding above. */
2286 /* Finally output the exponent. */
2287 *dst++ = upperCase ? 'P': 'p';
2289 return writeSignedDecimal (dst, exponent);
2292 // For good performance it is desirable for different APFloats
2293 // to produce different integers.
2295 APFloat::getHashValue() const
2297 if (category==fcZero) return sign<<8 | semantics->precision ;
2298 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2299 else if (category==fcNaN) return 1<<10 | semantics->precision;
2301 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2302 const integerPart* p = significandParts();
2303 for (int i=partCount(); i>0; i--, p++)
2304 hash ^= ((uint32_t)*p) ^ (*p)>>32;
2309 // Conversion from APFloat to/from host float/double. It may eventually be
2310 // possible to eliminate these and have everybody deal with APFloats, but that
2311 // will take a while. This approach will not easily extend to long double.
2312 // Current implementation requires integerPartWidth==64, which is correct at
2313 // the moment but could be made more general.
2315 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2316 // the actual IEEE respresentations. We compensate for that here.
2319 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2321 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended);
2322 assert (partCount()==2);
2324 uint64_t myexponent, mysignificand;
2326 if (category==fcNormal) {
2327 myexponent = exponent+16383; //bias
2328 mysignificand = significandParts()[0];
2329 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2330 myexponent = 0; // denormal
2331 } else if (category==fcZero) {
2334 } else if (category==fcInfinity) {
2335 myexponent = 0x7fff;
2336 mysignificand = 0x8000000000000000ULL;
2338 assert(category == fcNaN && "Unknown category");
2339 myexponent = 0x7fff;
2340 mysignificand = significandParts()[0];
2344 words[0] = (((uint64_t)sign & 1) << 63) |
2345 ((myexponent & 0x7fff) << 48) |
2346 ((mysignificand >>16) & 0xffffffffffffLL);
2347 words[1] = mysignificand & 0xffff;
2348 return APInt(80, 2, words);
2352 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2354 assert(semantics == (const llvm::fltSemantics* const)&PPCDoubleDouble);
2355 assert (partCount()==2);
2357 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2359 if (category==fcNormal) {
2360 myexponent = exponent + 1023; //bias
2361 myexponent2 = exponent2 + 1023;
2362 mysignificand = significandParts()[0];
2363 mysignificand2 = significandParts()[1];
2364 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2365 myexponent = 0; // denormal
2366 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2367 myexponent2 = 0; // denormal
2368 } else if (category==fcZero) {
2373 } else if (category==fcInfinity) {
2379 assert(category == fcNaN && "Unknown category");
2381 mysignificand = significandParts()[0];
2382 myexponent2 = exponent2;
2383 mysignificand2 = significandParts()[1];
2387 words[0] = (((uint64_t)sign & 1) << 63) |
2388 ((myexponent & 0x7ff) << 52) |
2389 (mysignificand & 0xfffffffffffffLL);
2390 words[1] = (((uint64_t)sign2 & 1) << 63) |
2391 ((myexponent2 & 0x7ff) << 52) |
2392 (mysignificand2 & 0xfffffffffffffLL);
2393 return APInt(128, 2, words);
2397 APFloat::convertDoubleAPFloatToAPInt() const
2399 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2400 assert (partCount()==1);
2402 uint64_t myexponent, mysignificand;
2404 if (category==fcNormal) {
2405 myexponent = exponent+1023; //bias
2406 mysignificand = *significandParts();
2407 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2408 myexponent = 0; // denormal
2409 } else if (category==fcZero) {
2412 } else if (category==fcInfinity) {
2416 assert(category == fcNaN && "Unknown category!");
2418 mysignificand = *significandParts();
2421 return APInt(64, (((((uint64_t)sign & 1) << 63) |
2422 ((myexponent & 0x7ff) << 52) |
2423 (mysignificand & 0xfffffffffffffLL))));
2427 APFloat::convertFloatAPFloatToAPInt() const
2429 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2430 assert (partCount()==1);
2432 uint32_t myexponent, mysignificand;
2434 if (category==fcNormal) {
2435 myexponent = exponent+127; //bias
2436 mysignificand = *significandParts();
2437 if (myexponent == 1 && !(mysignificand & 0x400000))
2438 myexponent = 0; // denormal
2439 } else if (category==fcZero) {
2442 } else if (category==fcInfinity) {
2446 assert(category == fcNaN && "Unknown category!");
2448 mysignificand = *significandParts();
2451 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2452 (mysignificand & 0x7fffff)));
2455 // This function creates an APInt that is just a bit map of the floating
2456 // point constant as it would appear in memory. It is not a conversion,
2457 // and treating the result as a normal integer is unlikely to be useful.
2460 APFloat::convertToAPInt() const
2462 if (semantics == (const llvm::fltSemantics* const)&IEEEsingle)
2463 return convertFloatAPFloatToAPInt();
2465 if (semantics == (const llvm::fltSemantics* const)&IEEEdouble)
2466 return convertDoubleAPFloatToAPInt();
2468 if (semantics == (const llvm::fltSemantics* const)&PPCDoubleDouble)
2469 return convertPPCDoubleDoubleAPFloatToAPInt();
2471 assert(semantics == (const llvm::fltSemantics* const)&x87DoubleExtended &&
2473 return convertF80LongDoubleAPFloatToAPInt();
2477 APFloat::convertToFloat() const
2479 assert(semantics == (const llvm::fltSemantics* const)&IEEEsingle);
2480 APInt api = convertToAPInt();
2481 return api.bitsToFloat();
2485 APFloat::convertToDouble() const
2487 assert(semantics == (const llvm::fltSemantics* const)&IEEEdouble);
2488 APInt api = convertToAPInt();
2489 return api.bitsToDouble();
2492 /// Integer bit is explicit in this format. Current Intel book does not
2493 /// define meaning of:
2494 /// exponent = all 1's, integer bit not set.
2495 /// exponent = 0, integer bit set. (formerly "psuedodenormals")
2496 /// exponent!=0 nor all 1's, integer bit not set. (formerly "unnormals")
2498 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2500 assert(api.getBitWidth()==80);
2501 uint64_t i1 = api.getRawData()[0];
2502 uint64_t i2 = api.getRawData()[1];
2503 uint64_t myexponent = (i1 >> 48) & 0x7fff;
2504 uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) |
2507 initialize(&APFloat::x87DoubleExtended);
2508 assert(partCount()==2);
2511 if (myexponent==0 && mysignificand==0) {
2512 // exponent, significand meaningless
2514 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2515 // exponent, significand meaningless
2516 category = fcInfinity;
2517 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2518 // exponent meaningless
2520 significandParts()[0] = mysignificand;
2521 significandParts()[1] = 0;
2523 category = fcNormal;
2524 exponent = myexponent - 16383;
2525 significandParts()[0] = mysignificand;
2526 significandParts()[1] = 0;
2527 if (myexponent==0) // denormal
2533 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
2535 assert(api.getBitWidth()==128);
2536 uint64_t i1 = api.getRawData()[0];
2537 uint64_t i2 = api.getRawData()[1];
2538 uint64_t myexponent = (i1 >> 52) & 0x7ff;
2539 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
2540 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
2541 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
2543 initialize(&APFloat::PPCDoubleDouble);
2544 assert(partCount()==2);
2548 if (myexponent==0 && mysignificand==0) {
2549 // exponent, significand meaningless
2550 // exponent2 and significand2 are required to be 0; we don't check
2552 } else if (myexponent==0x7ff && mysignificand==0) {
2553 // exponent, significand meaningless
2554 // exponent2 and significand2 are required to be 0; we don't check
2555 category = fcInfinity;
2556 } else if (myexponent==0x7ff && mysignificand!=0) {
2557 // exponent meaningless. So is the whole second word, but keep it
2560 exponent2 = myexponent2;
2561 significandParts()[0] = mysignificand;
2562 significandParts()[1] = mysignificand2;
2564 category = fcNormal;
2565 // Note there is no category2; the second word is treated as if it is
2566 // fcNormal, although it might be something else considered by itself.
2567 exponent = myexponent - 1023;
2568 exponent2 = myexponent2 - 1023;
2569 significandParts()[0] = mysignificand;
2570 significandParts()[1] = mysignificand2;
2571 if (myexponent==0) // denormal
2574 significandParts()[0] |= 0x10000000000000LL; // integer bit
2578 significandParts()[1] |= 0x10000000000000LL; // integer bit
2583 APFloat::initFromDoubleAPInt(const APInt &api)
2585 assert(api.getBitWidth()==64);
2586 uint64_t i = *api.getRawData();
2587 uint64_t myexponent = (i >> 52) & 0x7ff;
2588 uint64_t mysignificand = i & 0xfffffffffffffLL;
2590 initialize(&APFloat::IEEEdouble);
2591 assert(partCount()==1);
2594 if (myexponent==0 && mysignificand==0) {
2595 // exponent, significand meaningless
2597 } else if (myexponent==0x7ff && mysignificand==0) {
2598 // exponent, significand meaningless
2599 category = fcInfinity;
2600 } else if (myexponent==0x7ff && mysignificand!=0) {
2601 // exponent meaningless
2603 *significandParts() = mysignificand;
2605 category = fcNormal;
2606 exponent = myexponent - 1023;
2607 *significandParts() = mysignificand;
2608 if (myexponent==0) // denormal
2611 *significandParts() |= 0x10000000000000LL; // integer bit
2616 APFloat::initFromFloatAPInt(const APInt & api)
2618 assert(api.getBitWidth()==32);
2619 uint32_t i = (uint32_t)*api.getRawData();
2620 uint32_t myexponent = (i >> 23) & 0xff;
2621 uint32_t mysignificand = i & 0x7fffff;
2623 initialize(&APFloat::IEEEsingle);
2624 assert(partCount()==1);
2627 if (myexponent==0 && mysignificand==0) {
2628 // exponent, significand meaningless
2630 } else if (myexponent==0xff && mysignificand==0) {
2631 // exponent, significand meaningless
2632 category = fcInfinity;
2633 } else if (myexponent==0xff && mysignificand!=0) {
2634 // sign, exponent, significand meaningless
2636 *significandParts() = mysignificand;
2638 category = fcNormal;
2639 exponent = myexponent - 127; //bias
2640 *significandParts() = mysignificand;
2641 if (myexponent==0) // denormal
2644 *significandParts() |= 0x800000; // integer bit
2648 /// Treat api as containing the bits of a floating point number. Currently
2649 /// we infer the floating point type from the size of the APInt. The
2650 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
2651 /// when the size is anything else).
2653 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
2655 if (api.getBitWidth() == 32)
2656 return initFromFloatAPInt(api);
2657 else if (api.getBitWidth()==64)
2658 return initFromDoubleAPInt(api);
2659 else if (api.getBitWidth()==80)
2660 return initFromF80LongDoubleAPInt(api);
2661 else if (api.getBitWidth()==128 && !isIEEE)
2662 return initFromPPCDoubleDoubleAPInt(api);
2667 APFloat::APFloat(const APInt& api, bool isIEEE)
2669 initFromAPInt(api, isIEEE);
2672 APFloat::APFloat(float f)
2674 APInt api = APInt(32, 0);
2675 initFromAPInt(api.floatToBits(f));
2678 APFloat::APFloat(double d)
2680 APInt api = APInt(64, 0);
2681 initFromAPInt(api.doubleToBits(d));