1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/Support/MathExtras.h"
22 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
24 /* Assumed in hexadecimal significand parsing, and conversion to
25 hexadecimal strings. */
26 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
27 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
31 /* Represents floating point arithmetic semantics. */
33 /* The largest E such that 2^E is representable; this matches the
34 definition of IEEE 754. */
35 exponent_t maxExponent;
37 /* The smallest E such that 2^E is a normalized number; this
38 matches the definition of IEEE 754. */
39 exponent_t minExponent;
41 /* Number of bits in the significand. This includes the integer
43 unsigned int precision;
45 /* True if arithmetic is supported. */
46 unsigned int arithmeticOK;
49 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
50 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
51 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
52 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
53 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
55 // The PowerPC format consists of two doubles. It does not map cleanly
56 // onto the usual format above. For now only storage of constants of
57 // this type is supported, no arithmetic.
58 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
60 /* A tight upper bound on number of parts required to hold the value
63 power * 815 / (351 * integerPartWidth) + 1
65 However, whilst the result may require only this many parts,
66 because we are multiplying two values to get it, the
67 multiplication may require an extra part with the excess part
68 being zero (consider the trivial case of 1 * 1, tcFullMultiply
69 requires two parts to hold the single-part result). So we add an
70 extra one to guarantee enough space whilst multiplying. */
71 const unsigned int maxExponent = 16383;
72 const unsigned int maxPrecision = 113;
73 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
74 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
75 / (351 * integerPartWidth));
78 /* Put a bunch of private, handy routines in an anonymous namespace. */
81 static inline unsigned int
82 partCountForBits(unsigned int bits)
84 return ((bits) + integerPartWidth - 1) / integerPartWidth;
87 /* Returns 0U-9U. Return values >= 10U are not digits. */
88 static inline unsigned int
89 decDigitValue(unsigned int c)
95 hexDigitValue(unsigned int c)
115 assertArithmeticOK(const llvm::fltSemantics &semantics) {
116 assert(semantics.arithmeticOK
117 && "Compile-time arithmetic does not support these semantics");
120 /* Return the value of a decimal exponent of the form
123 If the exponent overflows, returns a large exponent with the
126 readExponent(const char *p)
129 unsigned int absExponent;
130 const unsigned int overlargeExponent = 24000; /* FIXME. */
132 isNegative = (*p == '-');
133 if (*p == '-' || *p == '+')
136 absExponent = decDigitValue(*p++);
137 assert (absExponent < 10U);
142 value = decDigitValue(*p);
147 value += absExponent * 10;
148 if (absExponent >= overlargeExponent) {
149 absExponent = overlargeExponent;
156 return -(int) absExponent;
158 return (int) absExponent;
161 /* This is ugly and needs cleaning up, but I don't immediately see
162 how whilst remaining safe. */
164 totalExponent(const char *p, int exponentAdjustment)
166 int unsignedExponent;
167 bool negative, overflow;
170 /* Move past the exponent letter and sign to the digits. */
172 negative = *p == '-';
173 if(*p == '-' || *p == '+')
176 unsignedExponent = 0;
181 value = decDigitValue(*p);
186 unsignedExponent = unsignedExponent * 10 + value;
187 if(unsignedExponent > 65535)
191 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
195 exponent = unsignedExponent;
197 exponent = -exponent;
198 exponent += exponentAdjustment;
199 if(exponent > 65535 || exponent < -65536)
204 exponent = negative ? -65536: 65535;
210 skipLeadingZeroesAndAnyDot(const char *p, const char **dot)
225 /* Given a normal decimal floating point number of the form
229 where the decimal point and exponent are optional, fill out the
230 structure D. Exponent is appropriate if the significand is
231 treated as an integer, and normalizedExponent if the significand
232 is taken to have the decimal point after a single leading
235 If the value is zero, V->firstSigDigit points to a non-digit, and
236 the return exponent is zero.
239 const char *firstSigDigit;
240 const char *lastSigDigit;
242 int normalizedExponent;
246 interpretDecimal(const char *p, decimalInfo *D)
250 p = skipLeadingZeroesAndAnyDot (p, &dot);
252 D->firstSigDigit = p;
254 D->normalizedExponent = 0;
261 if (decDigitValue(*p) >= 10U)
266 /* If number is all zerooes accept any exponent. */
267 if (p != D->firstSigDigit) {
268 if (*p == 'e' || *p == 'E')
269 D->exponent = readExponent(p + 1);
271 /* Implied decimal point? */
275 /* Drop insignificant trailing zeroes. */
282 /* Adjust the exponents for any decimal point. */
283 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
284 D->normalizedExponent = (D->exponent +
285 static_cast<exponent_t>((p - D->firstSigDigit)
286 - (dot > D->firstSigDigit && dot < p)));
292 /* Return the trailing fraction of a hexadecimal number.
293 DIGITVALUE is the first hex digit of the fraction, P points to
296 trailingHexadecimalFraction(const char *p, unsigned int digitValue)
298 unsigned int hexDigit;
300 /* If the first trailing digit isn't 0 or 8 we can work out the
301 fraction immediately. */
303 return lfMoreThanHalf;
304 else if(digitValue < 8 && digitValue > 0)
305 return lfLessThanHalf;
307 /* Otherwise we need to find the first non-zero digit. */
311 hexDigit = hexDigitValue(*p);
313 /* If we ran off the end it is exactly zero or one-half, otherwise
316 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
318 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
321 /* Return the fraction lost were a bignum truncated losing the least
322 significant BITS bits. */
324 lostFractionThroughTruncation(const integerPart *parts,
325 unsigned int partCount,
330 lsb = APInt::tcLSB(parts, partCount);
332 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
334 return lfExactlyZero;
336 return lfExactlyHalf;
337 if(bits <= partCount * integerPartWidth
338 && APInt::tcExtractBit(parts, bits - 1))
339 return lfMoreThanHalf;
341 return lfLessThanHalf;
344 /* Shift DST right BITS bits noting lost fraction. */
346 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
348 lostFraction lost_fraction;
350 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
352 APInt::tcShiftRight(dst, parts, bits);
354 return lost_fraction;
357 /* Combine the effect of two lost fractions. */
359 combineLostFractions(lostFraction moreSignificant,
360 lostFraction lessSignificant)
362 if(lessSignificant != lfExactlyZero) {
363 if(moreSignificant == lfExactlyZero)
364 moreSignificant = lfLessThanHalf;
365 else if(moreSignificant == lfExactlyHalf)
366 moreSignificant = lfMoreThanHalf;
369 return moreSignificant;
372 /* The error from the true value, in half-ulps, on multiplying two
373 floating point numbers, which differ from the value they
374 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
375 than the returned value.
377 See "How to Read Floating Point Numbers Accurately" by William D
380 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
382 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
384 if (HUerr1 + HUerr2 == 0)
385 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
387 return inexactMultiply + 2 * (HUerr1 + HUerr2);
390 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
391 when the least significant BITS are truncated. BITS cannot be
394 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
396 unsigned int count, partBits;
397 integerPart part, boundary;
402 count = bits / integerPartWidth;
403 partBits = bits % integerPartWidth + 1;
405 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
408 boundary = (integerPart) 1 << (partBits - 1);
413 if (part - boundary <= boundary - part)
414 return part - boundary;
416 return boundary - part;
419 if (part == boundary) {
422 return ~(integerPart) 0; /* A lot. */
425 } else if (part == boundary - 1) {
428 return ~(integerPart) 0; /* A lot. */
433 return ~(integerPart) 0; /* A lot. */
436 /* Place pow(5, power) in DST, and return the number of parts used.
437 DST must be at least one part larger than size of the answer. */
439 powerOf5(integerPart *dst, unsigned int power)
441 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
443 static integerPart pow5s[maxPowerOfFiveParts * 2 + 5] = { 78125 * 5 };
444 static unsigned int partsCount[16] = { 1 };
446 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
449 assert(power <= maxExponent);
454 *p1 = firstEightPowers[power & 7];
460 for (unsigned int n = 0; power; power >>= 1, n++) {
465 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
467 pc = partsCount[n - 1];
468 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
470 if (pow5[pc - 1] == 0)
478 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
480 if (p2[result - 1] == 0)
483 /* Now result is in p1 with partsCount parts and p2 is scratch
485 tmp = p1, p1 = p2, p2 = tmp;
492 APInt::tcAssign(dst, p1, result);
497 /* Zero at the end to avoid modular arithmetic when adding one; used
498 when rounding up during hexadecimal output. */
499 static const char hexDigitsLower[] = "0123456789abcdef0";
500 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
501 static const char infinityL[] = "infinity";
502 static const char infinityU[] = "INFINITY";
503 static const char NaNL[] = "nan";
504 static const char NaNU[] = "NAN";
506 /* Write out an integerPart in hexadecimal, starting with the most
507 significant nibble. Write out exactly COUNT hexdigits, return
510 partAsHex (char *dst, integerPart part, unsigned int count,
511 const char *hexDigitChars)
513 unsigned int result = count;
515 assert (count != 0 && count <= integerPartWidth / 4);
517 part >>= (integerPartWidth - 4 * count);
519 dst[count] = hexDigitChars[part & 0xf];
526 /* Write out an unsigned decimal integer. */
528 writeUnsignedDecimal (char *dst, unsigned int n)
544 /* Write out a signed decimal integer. */
546 writeSignedDecimal (char *dst, int value)
550 dst = writeUnsignedDecimal(dst, -(unsigned) value);
552 dst = writeUnsignedDecimal(dst, value);
560 APFloat::initialize(const fltSemantics *ourSemantics)
564 semantics = ourSemantics;
567 significand.parts = new integerPart[count];
571 APFloat::freeSignificand()
574 delete [] significand.parts;
578 APFloat::assign(const APFloat &rhs)
580 assert(semantics == rhs.semantics);
583 category = rhs.category;
584 exponent = rhs.exponent;
586 exponent2 = rhs.exponent2;
587 if(category == fcNormal || category == fcNaN)
588 copySignificand(rhs);
592 APFloat::copySignificand(const APFloat &rhs)
594 assert(category == fcNormal || category == fcNaN);
595 assert(rhs.partCount() >= partCount());
597 APInt::tcAssign(significandParts(), rhs.significandParts(),
601 /* Make this number a NaN, with an arbitrary but deterministic value
602 for the significand. */
604 APFloat::makeNaN(void)
607 APInt::tcSet(significandParts(), ~0U, partCount());
611 APFloat::operator=(const APFloat &rhs)
614 if(semantics != rhs.semantics) {
616 initialize(rhs.semantics);
625 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
628 if (semantics != rhs.semantics ||
629 category != rhs.category ||
632 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
635 if (category==fcZero || category==fcInfinity)
637 else if (category==fcNormal && exponent!=rhs.exponent)
639 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
640 exponent2!=rhs.exponent2)
644 const integerPart* p=significandParts();
645 const integerPart* q=rhs.significandParts();
646 for (; i>0; i--, p++, q++) {
654 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
656 assertArithmeticOK(ourSemantics);
657 initialize(&ourSemantics);
660 exponent = ourSemantics.precision - 1;
661 significandParts()[0] = value;
662 normalize(rmNearestTiesToEven, lfExactlyZero);
665 APFloat::APFloat(const fltSemantics &ourSemantics,
666 fltCategory ourCategory, bool negative)
668 assertArithmeticOK(ourSemantics);
669 initialize(&ourSemantics);
670 category = ourCategory;
672 if(category == fcNormal)
674 else if (ourCategory == fcNaN)
678 APFloat::APFloat(const fltSemantics &ourSemantics, const char *text)
680 assertArithmeticOK(ourSemantics);
681 initialize(&ourSemantics);
682 convertFromString(text, rmNearestTiesToEven);
685 APFloat::APFloat(const APFloat &rhs)
687 initialize(rhs.semantics);
696 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
697 void APFloat::Profile(FoldingSetNodeID& ID) const {
698 ID.Add(convertToAPInt());
702 APFloat::partCount() const
704 return partCountForBits(semantics->precision + 1);
708 APFloat::semanticsPrecision(const fltSemantics &semantics)
710 return semantics.precision;
714 APFloat::significandParts() const
716 return const_cast<APFloat *>(this)->significandParts();
720 APFloat::significandParts()
722 assert(category == fcNormal || category == fcNaN);
725 return significand.parts;
727 return &significand.part;
731 APFloat::zeroSignificand()
734 APInt::tcSet(significandParts(), 0, partCount());
737 /* Increment an fcNormal floating point number's significand. */
739 APFloat::incrementSignificand()
743 carry = APInt::tcIncrement(significandParts(), partCount());
745 /* Our callers should never cause us to overflow. */
749 /* Add the significand of the RHS. Returns the carry flag. */
751 APFloat::addSignificand(const APFloat &rhs)
755 parts = significandParts();
757 assert(semantics == rhs.semantics);
758 assert(exponent == rhs.exponent);
760 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
763 /* Subtract the significand of the RHS with a borrow flag. Returns
766 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
770 parts = significandParts();
772 assert(semantics == rhs.semantics);
773 assert(exponent == rhs.exponent);
775 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
779 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
780 on to the full-precision result of the multiplication. Returns the
783 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
785 unsigned int omsb; // One, not zero, based MSB.
786 unsigned int partsCount, newPartsCount, precision;
787 integerPart *lhsSignificand;
788 integerPart scratch[4];
789 integerPart *fullSignificand;
790 lostFraction lost_fraction;
792 assert(semantics == rhs.semantics);
794 precision = semantics->precision;
795 newPartsCount = partCountForBits(precision * 2);
797 if(newPartsCount > 4)
798 fullSignificand = new integerPart[newPartsCount];
800 fullSignificand = scratch;
802 lhsSignificand = significandParts();
803 partsCount = partCount();
805 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
806 rhs.significandParts(), partsCount, partsCount);
808 lost_fraction = lfExactlyZero;
809 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
810 exponent += rhs.exponent;
813 Significand savedSignificand = significand;
814 const fltSemantics *savedSemantics = semantics;
815 fltSemantics extendedSemantics;
817 unsigned int extendedPrecision;
819 /* Normalize our MSB. */
820 extendedPrecision = precision + precision - 1;
821 if(omsb != extendedPrecision)
823 APInt::tcShiftLeft(fullSignificand, newPartsCount,
824 extendedPrecision - omsb);
825 exponent -= extendedPrecision - omsb;
828 /* Create new semantics. */
829 extendedSemantics = *semantics;
830 extendedSemantics.precision = extendedPrecision;
832 if(newPartsCount == 1)
833 significand.part = fullSignificand[0];
835 significand.parts = fullSignificand;
836 semantics = &extendedSemantics;
838 APFloat extendedAddend(*addend);
839 status = extendedAddend.convert(extendedSemantics, rmTowardZero);
840 assert(status == opOK);
841 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
843 /* Restore our state. */
844 if(newPartsCount == 1)
845 fullSignificand[0] = significand.part;
846 significand = savedSignificand;
847 semantics = savedSemantics;
849 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
852 exponent -= (precision - 1);
854 if(omsb > precision) {
855 unsigned int bits, significantParts;
858 bits = omsb - precision;
859 significantParts = partCountForBits(omsb);
860 lf = shiftRight(fullSignificand, significantParts, bits);
861 lost_fraction = combineLostFractions(lf, lost_fraction);
865 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
867 if(newPartsCount > 4)
868 delete [] fullSignificand;
870 return lost_fraction;
873 /* Multiply the significands of LHS and RHS to DST. */
875 APFloat::divideSignificand(const APFloat &rhs)
877 unsigned int bit, i, partsCount;
878 const integerPart *rhsSignificand;
879 integerPart *lhsSignificand, *dividend, *divisor;
880 integerPart scratch[4];
881 lostFraction lost_fraction;
883 assert(semantics == rhs.semantics);
885 lhsSignificand = significandParts();
886 rhsSignificand = rhs.significandParts();
887 partsCount = partCount();
890 dividend = new integerPart[partsCount * 2];
894 divisor = dividend + partsCount;
896 /* Copy the dividend and divisor as they will be modified in-place. */
897 for(i = 0; i < partsCount; i++) {
898 dividend[i] = lhsSignificand[i];
899 divisor[i] = rhsSignificand[i];
900 lhsSignificand[i] = 0;
903 exponent -= rhs.exponent;
905 unsigned int precision = semantics->precision;
907 /* Normalize the divisor. */
908 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
911 APInt::tcShiftLeft(divisor, partsCount, bit);
914 /* Normalize the dividend. */
915 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
918 APInt::tcShiftLeft(dividend, partsCount, bit);
921 /* Ensure the dividend >= divisor initially for the loop below.
922 Incidentally, this means that the division loop below is
923 guaranteed to set the integer bit to one. */
924 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
926 APInt::tcShiftLeft(dividend, partsCount, 1);
927 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
931 for(bit = precision; bit; bit -= 1) {
932 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
933 APInt::tcSubtract(dividend, divisor, 0, partsCount);
934 APInt::tcSetBit(lhsSignificand, bit - 1);
937 APInt::tcShiftLeft(dividend, partsCount, 1);
940 /* Figure out the lost fraction. */
941 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
944 lost_fraction = lfMoreThanHalf;
946 lost_fraction = lfExactlyHalf;
947 else if(APInt::tcIsZero(dividend, partsCount))
948 lost_fraction = lfExactlyZero;
950 lost_fraction = lfLessThanHalf;
955 return lost_fraction;
959 APFloat::significandMSB() const
961 return APInt::tcMSB(significandParts(), partCount());
965 APFloat::significandLSB() const
967 return APInt::tcLSB(significandParts(), partCount());
970 /* Note that a zero result is NOT normalized to fcZero. */
972 APFloat::shiftSignificandRight(unsigned int bits)
974 /* Our exponent should not overflow. */
975 assert((exponent_t) (exponent + bits) >= exponent);
979 return shiftRight(significandParts(), partCount(), bits);
982 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
984 APFloat::shiftSignificandLeft(unsigned int bits)
986 assert(bits < semantics->precision);
989 unsigned int partsCount = partCount();
991 APInt::tcShiftLeft(significandParts(), partsCount, bits);
994 assert(!APInt::tcIsZero(significandParts(), partsCount));
999 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1003 assert(semantics == rhs.semantics);
1004 assert(category == fcNormal);
1005 assert(rhs.category == fcNormal);
1007 compare = exponent - rhs.exponent;
1009 /* If exponents are equal, do an unsigned bignum comparison of the
1012 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1016 return cmpGreaterThan;
1017 else if(compare < 0)
1023 /* Handle overflow. Sign is preserved. We either become infinity or
1024 the largest finite number. */
1026 APFloat::handleOverflow(roundingMode rounding_mode)
1029 if(rounding_mode == rmNearestTiesToEven
1030 || rounding_mode == rmNearestTiesToAway
1031 || (rounding_mode == rmTowardPositive && !sign)
1032 || (rounding_mode == rmTowardNegative && sign))
1034 category = fcInfinity;
1035 return (opStatus) (opOverflow | opInexact);
1038 /* Otherwise we become the largest finite number. */
1039 category = fcNormal;
1040 exponent = semantics->maxExponent;
1041 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1042 semantics->precision);
1047 /* Returns TRUE if, when truncating the current number, with BIT the
1048 new LSB, with the given lost fraction and rounding mode, the result
1049 would need to be rounded away from zero (i.e., by increasing the
1050 signficand). This routine must work for fcZero of both signs, and
1051 fcNormal numbers. */
1053 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1054 lostFraction lost_fraction,
1055 unsigned int bit) const
1057 /* NaNs and infinities should not have lost fractions. */
1058 assert(category == fcNormal || category == fcZero);
1060 /* Current callers never pass this so we don't handle it. */
1061 assert(lost_fraction != lfExactlyZero);
1063 switch(rounding_mode) {
1067 case rmNearestTiesToAway:
1068 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1070 case rmNearestTiesToEven:
1071 if(lost_fraction == lfMoreThanHalf)
1074 /* Our zeroes don't have a significand to test. */
1075 if(lost_fraction == lfExactlyHalf && category != fcZero)
1076 return APInt::tcExtractBit(significandParts(), bit);
1083 case rmTowardPositive:
1084 return sign == false;
1086 case rmTowardNegative:
1087 return sign == true;
1092 APFloat::normalize(roundingMode rounding_mode,
1093 lostFraction lost_fraction)
1095 unsigned int omsb; /* One, not zero, based MSB. */
1098 if(category != fcNormal)
1101 /* Before rounding normalize the exponent of fcNormal numbers. */
1102 omsb = significandMSB() + 1;
1105 /* OMSB is numbered from 1. We want to place it in the integer
1106 bit numbered PRECISON if possible, with a compensating change in
1108 exponentChange = omsb - semantics->precision;
1110 /* If the resulting exponent is too high, overflow according to
1111 the rounding mode. */
1112 if(exponent + exponentChange > semantics->maxExponent)
1113 return handleOverflow(rounding_mode);
1115 /* Subnormal numbers have exponent minExponent, and their MSB
1116 is forced based on that. */
1117 if(exponent + exponentChange < semantics->minExponent)
1118 exponentChange = semantics->minExponent - exponent;
1120 /* Shifting left is easy as we don't lose precision. */
1121 if(exponentChange < 0) {
1122 assert(lost_fraction == lfExactlyZero);
1124 shiftSignificandLeft(-exponentChange);
1129 if(exponentChange > 0) {
1132 /* Shift right and capture any new lost fraction. */
1133 lf = shiftSignificandRight(exponentChange);
1135 lost_fraction = combineLostFractions(lf, lost_fraction);
1137 /* Keep OMSB up-to-date. */
1138 if(omsb > (unsigned) exponentChange)
1139 omsb -= exponentChange;
1145 /* Now round the number according to rounding_mode given the lost
1148 /* As specified in IEEE 754, since we do not trap we do not report
1149 underflow for exact results. */
1150 if(lost_fraction == lfExactlyZero) {
1151 /* Canonicalize zeroes. */
1158 /* Increment the significand if we're rounding away from zero. */
1159 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1161 exponent = semantics->minExponent;
1163 incrementSignificand();
1164 omsb = significandMSB() + 1;
1166 /* Did the significand increment overflow? */
1167 if(omsb == (unsigned) semantics->precision + 1) {
1168 /* Renormalize by incrementing the exponent and shifting our
1169 significand right one. However if we already have the
1170 maximum exponent we overflow to infinity. */
1171 if(exponent == semantics->maxExponent) {
1172 category = fcInfinity;
1174 return (opStatus) (opOverflow | opInexact);
1177 shiftSignificandRight(1);
1183 /* The normal case - we were and are not denormal, and any
1184 significand increment above didn't overflow. */
1185 if(omsb == semantics->precision)
1188 /* We have a non-zero denormal. */
1189 assert(omsb < semantics->precision);
1191 /* Canonicalize zeroes. */
1195 /* The fcZero case is a denormal that underflowed to zero. */
1196 return (opStatus) (opUnderflow | opInexact);
1200 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1202 switch(convolve(category, rhs.category)) {
1206 case convolve(fcNaN, fcZero):
1207 case convolve(fcNaN, fcNormal):
1208 case convolve(fcNaN, fcInfinity):
1209 case convolve(fcNaN, fcNaN):
1210 case convolve(fcNormal, fcZero):
1211 case convolve(fcInfinity, fcNormal):
1212 case convolve(fcInfinity, fcZero):
1215 case convolve(fcZero, fcNaN):
1216 case convolve(fcNormal, fcNaN):
1217 case convolve(fcInfinity, fcNaN):
1219 copySignificand(rhs);
1222 case convolve(fcNormal, fcInfinity):
1223 case convolve(fcZero, fcInfinity):
1224 category = fcInfinity;
1225 sign = rhs.sign ^ subtract;
1228 case convolve(fcZero, fcNormal):
1230 sign = rhs.sign ^ subtract;
1233 case convolve(fcZero, fcZero):
1234 /* Sign depends on rounding mode; handled by caller. */
1237 case convolve(fcInfinity, fcInfinity):
1238 /* Differently signed infinities can only be validly
1240 if((sign ^ rhs.sign) != subtract) {
1247 case convolve(fcNormal, fcNormal):
1252 /* Add or subtract two normal numbers. */
1254 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1257 lostFraction lost_fraction;
1260 /* Determine if the operation on the absolute values is effectively
1261 an addition or subtraction. */
1262 subtract ^= (sign ^ rhs.sign) ? true : false;
1264 /* Are we bigger exponent-wise than the RHS? */
1265 bits = exponent - rhs.exponent;
1267 /* Subtraction is more subtle than one might naively expect. */
1269 APFloat temp_rhs(rhs);
1273 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1274 lost_fraction = lfExactlyZero;
1275 } else if (bits > 0) {
1276 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1277 shiftSignificandLeft(1);
1280 lost_fraction = shiftSignificandRight(-bits - 1);
1281 temp_rhs.shiftSignificandLeft(1);
1286 carry = temp_rhs.subtractSignificand
1287 (*this, lost_fraction != lfExactlyZero);
1288 copySignificand(temp_rhs);
1291 carry = subtractSignificand
1292 (temp_rhs, lost_fraction != lfExactlyZero);
1295 /* Invert the lost fraction - it was on the RHS and
1297 if(lost_fraction == lfLessThanHalf)
1298 lost_fraction = lfMoreThanHalf;
1299 else if(lost_fraction == lfMoreThanHalf)
1300 lost_fraction = lfLessThanHalf;
1302 /* The code above is intended to ensure that no borrow is
1307 APFloat temp_rhs(rhs);
1309 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1310 carry = addSignificand(temp_rhs);
1312 lost_fraction = shiftSignificandRight(-bits);
1313 carry = addSignificand(rhs);
1316 /* We have a guard bit; generating a carry cannot happen. */
1320 return lost_fraction;
1324 APFloat::multiplySpecials(const APFloat &rhs)
1326 switch(convolve(category, rhs.category)) {
1330 case convolve(fcNaN, fcZero):
1331 case convolve(fcNaN, fcNormal):
1332 case convolve(fcNaN, fcInfinity):
1333 case convolve(fcNaN, fcNaN):
1336 case convolve(fcZero, fcNaN):
1337 case convolve(fcNormal, fcNaN):
1338 case convolve(fcInfinity, fcNaN):
1340 copySignificand(rhs);
1343 case convolve(fcNormal, fcInfinity):
1344 case convolve(fcInfinity, fcNormal):
1345 case convolve(fcInfinity, fcInfinity):
1346 category = fcInfinity;
1349 case convolve(fcZero, fcNormal):
1350 case convolve(fcNormal, fcZero):
1351 case convolve(fcZero, fcZero):
1355 case convolve(fcZero, fcInfinity):
1356 case convolve(fcInfinity, fcZero):
1360 case convolve(fcNormal, fcNormal):
1366 APFloat::divideSpecials(const APFloat &rhs)
1368 switch(convolve(category, rhs.category)) {
1372 case convolve(fcNaN, fcZero):
1373 case convolve(fcNaN, fcNormal):
1374 case convolve(fcNaN, fcInfinity):
1375 case convolve(fcNaN, fcNaN):
1376 case convolve(fcInfinity, fcZero):
1377 case convolve(fcInfinity, fcNormal):
1378 case convolve(fcZero, fcInfinity):
1379 case convolve(fcZero, fcNormal):
1382 case convolve(fcZero, fcNaN):
1383 case convolve(fcNormal, fcNaN):
1384 case convolve(fcInfinity, fcNaN):
1386 copySignificand(rhs);
1389 case convolve(fcNormal, fcInfinity):
1393 case convolve(fcNormal, fcZero):
1394 category = fcInfinity;
1397 case convolve(fcInfinity, fcInfinity):
1398 case convolve(fcZero, fcZero):
1402 case convolve(fcNormal, fcNormal):
1409 APFloat::changeSign()
1411 /* Look mummy, this one's easy. */
1416 APFloat::clearSign()
1418 /* So is this one. */
1423 APFloat::copySign(const APFloat &rhs)
1429 /* Normalized addition or subtraction. */
1431 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1436 assertArithmeticOK(*semantics);
1438 fs = addOrSubtractSpecials(rhs, subtract);
1440 /* This return code means it was not a simple case. */
1441 if(fs == opDivByZero) {
1442 lostFraction lost_fraction;
1444 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1445 fs = normalize(rounding_mode, lost_fraction);
1447 /* Can only be zero if we lost no fraction. */
1448 assert(category != fcZero || lost_fraction == lfExactlyZero);
1451 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1452 positive zero unless rounding to minus infinity, except that
1453 adding two like-signed zeroes gives that zero. */
1454 if(category == fcZero) {
1455 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1456 sign = (rounding_mode == rmTowardNegative);
1462 /* Normalized addition. */
1464 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1466 return addOrSubtract(rhs, rounding_mode, false);
1469 /* Normalized subtraction. */
1471 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1473 return addOrSubtract(rhs, rounding_mode, true);
1476 /* Normalized multiply. */
1478 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1482 assertArithmeticOK(*semantics);
1484 fs = multiplySpecials(rhs);
1486 if(category == fcNormal) {
1487 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1488 fs = normalize(rounding_mode, lost_fraction);
1489 if(lost_fraction != lfExactlyZero)
1490 fs = (opStatus) (fs | opInexact);
1496 /* Normalized divide. */
1498 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1502 assertArithmeticOK(*semantics);
1504 fs = divideSpecials(rhs);
1506 if(category == fcNormal) {
1507 lostFraction lost_fraction = divideSignificand(rhs);
1508 fs = normalize(rounding_mode, lost_fraction);
1509 if(lost_fraction != lfExactlyZero)
1510 fs = (opStatus) (fs | opInexact);
1516 /* Normalized remainder. This is not currently doing TRT. */
1518 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1522 unsigned int origSign = sign;
1524 assertArithmeticOK(*semantics);
1525 fs = V.divide(rhs, rmNearestTiesToEven);
1526 if (fs == opDivByZero)
1529 int parts = partCount();
1530 integerPart *x = new integerPart[parts];
1531 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1532 rmNearestTiesToEven);
1533 if (fs==opInvalidOp)
1536 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1537 rmNearestTiesToEven);
1538 assert(fs==opOK); // should always work
1540 fs = V.multiply(rhs, rounding_mode);
1541 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1543 fs = subtract(V, rounding_mode);
1544 assert(fs==opOK || fs==opInexact); // likewise
1547 sign = origSign; // IEEE754 requires this
1552 /* Normalized fused-multiply-add. */
1554 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1555 const APFloat &addend,
1556 roundingMode rounding_mode)
1560 assertArithmeticOK(*semantics);
1562 /* Post-multiplication sign, before addition. */
1563 sign ^= multiplicand.sign;
1565 /* If and only if all arguments are normal do we need to do an
1566 extended-precision calculation. */
1567 if(category == fcNormal
1568 && multiplicand.category == fcNormal
1569 && addend.category == fcNormal) {
1570 lostFraction lost_fraction;
1572 lost_fraction = multiplySignificand(multiplicand, &addend);
1573 fs = normalize(rounding_mode, lost_fraction);
1574 if(lost_fraction != lfExactlyZero)
1575 fs = (opStatus) (fs | opInexact);
1577 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1578 positive zero unless rounding to minus infinity, except that
1579 adding two like-signed zeroes gives that zero. */
1580 if(category == fcZero && sign != addend.sign)
1581 sign = (rounding_mode == rmTowardNegative);
1583 fs = multiplySpecials(multiplicand);
1585 /* FS can only be opOK or opInvalidOp. There is no more work
1586 to do in the latter case. The IEEE-754R standard says it is
1587 implementation-defined in this case whether, if ADDEND is a
1588 quiet NaN, we raise invalid op; this implementation does so.
1590 If we need to do the addition we can do so with normal
1593 fs = addOrSubtract(addend, rounding_mode, false);
1599 /* Comparison requires normalized numbers. */
1601 APFloat::compare(const APFloat &rhs) const
1605 assertArithmeticOK(*semantics);
1606 assert(semantics == rhs.semantics);
1608 switch(convolve(category, rhs.category)) {
1612 case convolve(fcNaN, fcZero):
1613 case convolve(fcNaN, fcNormal):
1614 case convolve(fcNaN, fcInfinity):
1615 case convolve(fcNaN, fcNaN):
1616 case convolve(fcZero, fcNaN):
1617 case convolve(fcNormal, fcNaN):
1618 case convolve(fcInfinity, fcNaN):
1619 return cmpUnordered;
1621 case convolve(fcInfinity, fcNormal):
1622 case convolve(fcInfinity, fcZero):
1623 case convolve(fcNormal, fcZero):
1627 return cmpGreaterThan;
1629 case convolve(fcNormal, fcInfinity):
1630 case convolve(fcZero, fcInfinity):
1631 case convolve(fcZero, fcNormal):
1633 return cmpGreaterThan;
1637 case convolve(fcInfinity, fcInfinity):
1638 if(sign == rhs.sign)
1643 return cmpGreaterThan;
1645 case convolve(fcZero, fcZero):
1648 case convolve(fcNormal, fcNormal):
1652 /* Two normal numbers. Do they have the same sign? */
1653 if(sign != rhs.sign) {
1655 result = cmpLessThan;
1657 result = cmpGreaterThan;
1659 /* Compare absolute values; invert result if negative. */
1660 result = compareAbsoluteValue(rhs);
1663 if(result == cmpLessThan)
1664 result = cmpGreaterThan;
1665 else if(result == cmpGreaterThan)
1666 result = cmpLessThan;
1674 APFloat::convert(const fltSemantics &toSemantics,
1675 roundingMode rounding_mode)
1677 lostFraction lostFraction;
1678 unsigned int newPartCount, oldPartCount;
1681 assertArithmeticOK(*semantics);
1682 assertArithmeticOK(toSemantics);
1683 lostFraction = lfExactlyZero;
1684 newPartCount = partCountForBits(toSemantics.precision + 1);
1685 oldPartCount = partCount();
1687 /* Handle storage complications. If our new form is wider,
1688 re-allocate our bit pattern into wider storage. If it is
1689 narrower, we ignore the excess parts, but if narrowing to a
1690 single part we need to free the old storage.
1691 Be careful not to reference significandParts for zeroes
1692 and infinities, since it aborts. */
1693 if (newPartCount > oldPartCount) {
1694 integerPart *newParts;
1695 newParts = new integerPart[newPartCount];
1696 APInt::tcSet(newParts, 0, newPartCount);
1697 if (category==fcNormal || category==fcNaN)
1698 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1700 significand.parts = newParts;
1701 } else if (newPartCount < oldPartCount) {
1702 /* Capture any lost fraction through truncation of parts so we get
1703 correct rounding whilst normalizing. */
1704 if (category==fcNormal)
1705 lostFraction = lostFractionThroughTruncation
1706 (significandParts(), oldPartCount, toSemantics.precision);
1707 if (newPartCount == 1) {
1708 integerPart newPart = 0;
1709 if (category==fcNormal || category==fcNaN)
1710 newPart = significandParts()[0];
1712 significand.part = newPart;
1716 if(category == fcNormal) {
1717 /* Re-interpret our bit-pattern. */
1718 exponent += toSemantics.precision - semantics->precision;
1719 semantics = &toSemantics;
1720 fs = normalize(rounding_mode, lostFraction);
1721 } else if (category == fcNaN) {
1722 int shift = toSemantics.precision - semantics->precision;
1723 // Do this now so significandParts gets the right answer
1724 const fltSemantics *oldSemantics = semantics;
1725 semantics = &toSemantics;
1727 // No normalization here, just truncate
1729 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1730 else if (shift < 0) {
1731 unsigned ushift = -shift;
1732 // We mark this as Inexact if we are losing information. This happens
1733 // if are shifting out something other than 0s, or if the x87 long
1734 // double input did not have its integer bit set (pseudo-NaN), or if the
1735 // x87 long double input did not have its QNan bit set (because the x87
1736 // hardware sets this bit when converting a lower-precision NaN to
1737 // x87 long double).
1738 if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
1740 if (oldSemantics == &APFloat::x87DoubleExtended &&
1741 (!(*significandParts() & 0x8000000000000000ULL) ||
1742 !(*significandParts() & 0x4000000000000000ULL)))
1744 APInt::tcShiftRight(significandParts(), newPartCount, ushift);
1746 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1747 // does not give you back the same bits. This is dubious, and we
1748 // don't currently do it. You're really supposed to get
1749 // an invalid operation signal at runtime, but nobody does that.
1751 semantics = &toSemantics;
1758 /* Convert a floating point number to an integer according to the
1759 rounding mode. If the rounded integer value is out of range this
1760 returns an invalid operation exception and the contents of the
1761 destination parts are unspecified. If the rounded value is in
1762 range but the floating point number is not the exact integer, the C
1763 standard doesn't require an inexact exception to be raised. IEEE
1764 854 does require it so we do that.
1766 Note that for conversions to integer type the C standard requires
1767 round-to-zero to always be used. */
1769 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1771 roundingMode rounding_mode) const
1773 lostFraction lost_fraction;
1774 const integerPart *src;
1775 unsigned int dstPartsCount, truncatedBits;
1777 assertArithmeticOK(*semantics);
1779 /* Handle the three special cases first. */
1780 if(category == fcInfinity || category == fcNaN)
1783 dstPartsCount = partCountForBits(width);
1785 if(category == fcZero) {
1786 APInt::tcSet(parts, 0, dstPartsCount);
1787 // Negative zero can't be represented as an int.
1788 return sign ? opInexact : opOK;
1791 src = significandParts();
1793 /* Step 1: place our absolute value, with any fraction truncated, in
1796 /* Our absolute value is less than one; truncate everything. */
1797 APInt::tcSet(parts, 0, dstPartsCount);
1798 truncatedBits = semantics->precision;
1800 /* We want the most significant (exponent + 1) bits; the rest are
1802 unsigned int bits = exponent + 1U;
1804 /* Hopelessly large in magnitude? */
1808 if (bits < semantics->precision) {
1809 /* We truncate (semantics->precision - bits) bits. */
1810 truncatedBits = semantics->precision - bits;
1811 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1813 /* We want at least as many bits as are available. */
1814 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
1815 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
1820 /* Step 2: work out any lost fraction, and increment the absolute
1821 value if we would round away from zero. */
1822 if (truncatedBits) {
1823 lost_fraction = lostFractionThroughTruncation(src, partCount(),
1825 if (lost_fraction != lfExactlyZero
1826 && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
1827 if (APInt::tcIncrement(parts, dstPartsCount))
1828 return opInvalidOp; /* Overflow. */
1831 lost_fraction = lfExactlyZero;
1834 /* Step 3: check if we fit in the destination. */
1835 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
1839 /* Negative numbers cannot be represented as unsigned. */
1843 /* It takes omsb bits to represent the unsigned integer value.
1844 We lose a bit for the sign, but care is needed as the
1845 maximally negative integer is a special case. */
1846 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
1849 /* This case can happen because of rounding. */
1854 APInt::tcNegate (parts, dstPartsCount);
1856 if (omsb >= width + !isSigned)
1860 if (lost_fraction == lfExactlyZero)
1866 /* Same as convertToSignExtendedInteger, except we provide
1867 deterministic values in case of an invalid operation exception,
1868 namely zero for NaNs and the minimal or maximal value respectively
1869 for underflow or overflow. */
1871 APFloat::convertToInteger(integerPart *parts, unsigned int width,
1873 roundingMode rounding_mode) const
1877 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode);
1879 if (fs == opInvalidOp) {
1880 unsigned int bits, dstPartsCount;
1882 dstPartsCount = partCountForBits(width);
1884 if (category == fcNaN)
1889 bits = width - isSigned;
1891 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
1892 if (sign && isSigned)
1893 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
1899 /* Convert an unsigned integer SRC to a floating point number,
1900 rounding according to ROUNDING_MODE. The sign of the floating
1901 point number is not modified. */
1903 APFloat::convertFromUnsignedParts(const integerPart *src,
1904 unsigned int srcCount,
1905 roundingMode rounding_mode)
1907 unsigned int omsb, precision, dstCount;
1909 lostFraction lost_fraction;
1911 assertArithmeticOK(*semantics);
1912 category = fcNormal;
1913 omsb = APInt::tcMSB(src, srcCount) + 1;
1914 dst = significandParts();
1915 dstCount = partCount();
1916 precision = semantics->precision;
1918 /* We want the most significant PRECISON bits of SRC. There may not
1919 be that many; extract what we can. */
1920 if (precision <= omsb) {
1921 exponent = omsb - 1;
1922 lost_fraction = lostFractionThroughTruncation(src, srcCount,
1924 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
1926 exponent = precision - 1;
1927 lost_fraction = lfExactlyZero;
1928 APInt::tcExtract(dst, dstCount, src, omsb, 0);
1931 return normalize(rounding_mode, lost_fraction);
1935 APFloat::convertFromAPInt(const APInt &Val,
1937 roundingMode rounding_mode)
1939 unsigned int partCount = Val.getNumWords();
1943 if (isSigned && api.isNegative()) {
1948 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
1951 /* Convert a two's complement integer SRC to a floating point number,
1952 rounding according to ROUNDING_MODE. ISSIGNED is true if the
1953 integer is signed, in which case it must be sign-extended. */
1955 APFloat::convertFromSignExtendedInteger(const integerPart *src,
1956 unsigned int srcCount,
1958 roundingMode rounding_mode)
1962 assertArithmeticOK(*semantics);
1964 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
1967 /* If we're signed and negative negate a copy. */
1969 copy = new integerPart[srcCount];
1970 APInt::tcAssign(copy, src, srcCount);
1971 APInt::tcNegate(copy, srcCount);
1972 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
1976 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
1982 /* FIXME: should this just take a const APInt reference? */
1984 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
1985 unsigned int width, bool isSigned,
1986 roundingMode rounding_mode)
1988 unsigned int partCount = partCountForBits(width);
1989 APInt api = APInt(width, partCount, parts);
1992 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
1997 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2001 APFloat::convertFromHexadecimalString(const char *p,
2002 roundingMode rounding_mode)
2004 lostFraction lost_fraction;
2005 integerPart *significand;
2006 unsigned int bitPos, partsCount;
2007 const char *dot, *firstSignificantDigit;
2011 category = fcNormal;
2013 significand = significandParts();
2014 partsCount = partCount();
2015 bitPos = partsCount * integerPartWidth;
2017 /* Skip leading zeroes and any (hexa)decimal point. */
2018 p = skipLeadingZeroesAndAnyDot(p, &dot);
2019 firstSignificantDigit = p;
2022 integerPart hex_value;
2029 hex_value = hexDigitValue(*p);
2030 if(hex_value == -1U) {
2031 lost_fraction = lfExactlyZero;
2037 /* Store the number whilst 4-bit nibbles remain. */
2040 hex_value <<= bitPos % integerPartWidth;
2041 significand[bitPos / integerPartWidth] |= hex_value;
2043 lost_fraction = trailingHexadecimalFraction(p, hex_value);
2044 while(hexDigitValue(*p) != -1U)
2050 /* Hex floats require an exponent but not a hexadecimal point. */
2051 assert(*p == 'p' || *p == 'P');
2053 /* Ignore the exponent if we are zero. */
2054 if(p != firstSignificantDigit) {
2057 /* Implicit hexadecimal point? */
2061 /* Calculate the exponent adjustment implicit in the number of
2062 significant digits. */
2063 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2064 if(expAdjustment < 0)
2066 expAdjustment = expAdjustment * 4 - 1;
2068 /* Adjust for writing the significand starting at the most
2069 significant nibble. */
2070 expAdjustment += semantics->precision;
2071 expAdjustment -= partsCount * integerPartWidth;
2073 /* Adjust for the given exponent. */
2074 exponent = totalExponent(p, expAdjustment);
2077 return normalize(rounding_mode, lost_fraction);
2081 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2082 unsigned sigPartCount, int exp,
2083 roundingMode rounding_mode)
2085 unsigned int parts, pow5PartCount;
2086 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2087 integerPart pow5Parts[maxPowerOfFiveParts];
2090 isNearest = (rounding_mode == rmNearestTiesToEven
2091 || rounding_mode == rmNearestTiesToAway);
2093 parts = partCountForBits(semantics->precision + 11);
2095 /* Calculate pow(5, abs(exp)). */
2096 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2098 for (;; parts *= 2) {
2099 opStatus sigStatus, powStatus;
2100 unsigned int excessPrecision, truncatedBits;
2102 calcSemantics.precision = parts * integerPartWidth - 1;
2103 excessPrecision = calcSemantics.precision - semantics->precision;
2104 truncatedBits = excessPrecision;
2106 APFloat decSig(calcSemantics, fcZero, sign);
2107 APFloat pow5(calcSemantics, fcZero, false);
2109 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2110 rmNearestTiesToEven);
2111 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2112 rmNearestTiesToEven);
2113 /* Add exp, as 10^n = 5^n * 2^n. */
2114 decSig.exponent += exp;
2116 lostFraction calcLostFraction;
2117 integerPart HUerr, HUdistance;
2118 unsigned int powHUerr;
2121 /* multiplySignificand leaves the precision-th bit set to 1. */
2122 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2123 powHUerr = powStatus != opOK;
2125 calcLostFraction = decSig.divideSignificand(pow5);
2126 /* Denormal numbers have less precision. */
2127 if (decSig.exponent < semantics->minExponent) {
2128 excessPrecision += (semantics->minExponent - decSig.exponent);
2129 truncatedBits = excessPrecision;
2130 if (excessPrecision > calcSemantics.precision)
2131 excessPrecision = calcSemantics.precision;
2133 /* Extra half-ulp lost in reciprocal of exponent. */
2134 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2137 /* Both multiplySignificand and divideSignificand return the
2138 result with the integer bit set. */
2139 assert (APInt::tcExtractBit
2140 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2142 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2144 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2145 excessPrecision, isNearest);
2147 /* Are we guaranteed to round correctly if we truncate? */
2148 if (HUdistance >= HUerr) {
2149 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2150 calcSemantics.precision - excessPrecision,
2152 /* Take the exponent of decSig. If we tcExtract-ed less bits
2153 above we must adjust our exponent to compensate for the
2154 implicit right shift. */
2155 exponent = (decSig.exponent + semantics->precision
2156 - (calcSemantics.precision - excessPrecision));
2157 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2160 return normalize(rounding_mode, calcLostFraction);
2166 APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode)
2171 /* Scan the text. */
2172 interpretDecimal(p, &D);
2174 /* Handle the quick cases. First the case of no significant digits,
2175 i.e. zero, and then exponents that are obviously too large or too
2176 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2177 definitely overflows if
2179 (exp - 1) * L >= maxExponent
2181 and definitely underflows to zero where
2183 (exp + 1) * L <= minExponent - precision
2185 With integer arithmetic the tightest bounds for L are
2187 93/28 < L < 196/59 [ numerator <= 256 ]
2188 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2191 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2194 } else if ((D.normalizedExponent + 1) * 28738
2195 <= 8651 * (semantics->minExponent - (int) semantics->precision)) {
2196 /* Underflow to zero and round. */
2198 fs = normalize(rounding_mode, lfLessThanHalf);
2199 } else if ((D.normalizedExponent - 1) * 42039
2200 >= 12655 * semantics->maxExponent) {
2201 /* Overflow and round. */
2202 fs = handleOverflow(rounding_mode);
2204 integerPart *decSignificand;
2205 unsigned int partCount;
2207 /* A tight upper bound on number of bits required to hold an
2208 N-digit decimal integer is N * 196 / 59. Allocate enough space
2209 to hold the full significand, and an extra part required by
2211 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2212 partCount = partCountForBits(1 + 196 * partCount / 59);
2213 decSignificand = new integerPart[partCount + 1];
2216 /* Convert to binary efficiently - we do almost all multiplication
2217 in an integerPart. When this would overflow do we do a single
2218 bignum multiplication, and then revert again to multiplication
2219 in an integerPart. */
2221 integerPart decValue, val, multiplier;
2230 decValue = decDigitValue(*p++);
2232 val = val * 10 + decValue;
2233 /* The maximum number that can be multiplied by ten with any
2234 digit added without overflowing an integerPart. */
2235 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2237 /* Multiply out the current part. */
2238 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2239 partCount, partCount + 1, false);
2241 /* If we used another part (likely but not guaranteed), increase
2243 if (decSignificand[partCount])
2245 } while (p <= D.lastSigDigit);
2247 category = fcNormal;
2248 fs = roundSignificandWithExponent(decSignificand, partCount,
2249 D.exponent, rounding_mode);
2251 delete [] decSignificand;
2258 APFloat::convertFromString(const char *p, roundingMode rounding_mode)
2260 assertArithmeticOK(*semantics);
2262 /* Handle a leading minus sign. */
2268 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
2269 return convertFromHexadecimalString(p + 2, rounding_mode);
2271 return convertFromDecimalString(p, rounding_mode);
2274 /* Write out a hexadecimal representation of the floating point value
2275 to DST, which must be of sufficient size, in the C99 form
2276 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2277 excluding the terminating NUL.
2279 If UPPERCASE, the output is in upper case, otherwise in lower case.
2281 HEXDIGITS digits appear altogether, rounding the value if
2282 necessary. If HEXDIGITS is 0, the minimal precision to display the
2283 number precisely is used instead. If nothing would appear after
2284 the decimal point it is suppressed.
2286 The decimal exponent is always printed and has at least one digit.
2287 Zero values display an exponent of zero. Infinities and NaNs
2288 appear as "infinity" or "nan" respectively.
2290 The above rules are as specified by C99. There is ambiguity about
2291 what the leading hexadecimal digit should be. This implementation
2292 uses whatever is necessary so that the exponent is displayed as
2293 stored. This implies the exponent will fall within the IEEE format
2294 range, and the leading hexadecimal digit will be 0 (for denormals),
2295 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2296 any other digits zero).
2299 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2300 bool upperCase, roundingMode rounding_mode) const
2304 assertArithmeticOK(*semantics);
2312 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2313 dst += sizeof infinityL - 1;
2317 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2318 dst += sizeof NaNU - 1;
2323 *dst++ = upperCase ? 'X': 'x';
2325 if (hexDigits > 1) {
2327 memset (dst, '0', hexDigits - 1);
2328 dst += hexDigits - 1;
2330 *dst++ = upperCase ? 'P': 'p';
2335 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2341 return static_cast<unsigned int>(dst - p);
2344 /* Does the hard work of outputting the correctly rounded hexadecimal
2345 form of a normal floating point number with the specified number of
2346 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2347 digits necessary to print the value precisely is output. */
2349 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2351 roundingMode rounding_mode) const
2353 unsigned int count, valueBits, shift, partsCount, outputDigits;
2354 const char *hexDigitChars;
2355 const integerPart *significand;
2360 *dst++ = upperCase ? 'X': 'x';
2363 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2365 significand = significandParts();
2366 partsCount = partCount();
2368 /* +3 because the first digit only uses the single integer bit, so
2369 we have 3 virtual zero most-significant-bits. */
2370 valueBits = semantics->precision + 3;
2371 shift = integerPartWidth - valueBits % integerPartWidth;
2373 /* The natural number of digits required ignoring trailing
2374 insignificant zeroes. */
2375 outputDigits = (valueBits - significandLSB () + 3) / 4;
2377 /* hexDigits of zero means use the required number for the
2378 precision. Otherwise, see if we are truncating. If we are,
2379 find out if we need to round away from zero. */
2381 if (hexDigits < outputDigits) {
2382 /* We are dropping non-zero bits, so need to check how to round.
2383 "bits" is the number of dropped bits. */
2385 lostFraction fraction;
2387 bits = valueBits - hexDigits * 4;
2388 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2389 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2391 outputDigits = hexDigits;
2394 /* Write the digits consecutively, and start writing in the location
2395 of the hexadecimal point. We move the most significant digit
2396 left and add the hexadecimal point later. */
2399 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2401 while (outputDigits && count) {
2404 /* Put the most significant integerPartWidth bits in "part". */
2405 if (--count == partsCount)
2406 part = 0; /* An imaginary higher zero part. */
2408 part = significand[count] << shift;
2411 part |= significand[count - 1] >> (integerPartWidth - shift);
2413 /* Convert as much of "part" to hexdigits as we can. */
2414 unsigned int curDigits = integerPartWidth / 4;
2416 if (curDigits > outputDigits)
2417 curDigits = outputDigits;
2418 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2419 outputDigits -= curDigits;
2425 /* Note that hexDigitChars has a trailing '0'. */
2428 *q = hexDigitChars[hexDigitValue (*q) + 1];
2429 } while (*q == '0');
2432 /* Add trailing zeroes. */
2433 memset (dst, '0', outputDigits);
2434 dst += outputDigits;
2437 /* Move the most significant digit to before the point, and if there
2438 is something after the decimal point add it. This must come
2439 after rounding above. */
2446 /* Finally output the exponent. */
2447 *dst++ = upperCase ? 'P': 'p';
2449 return writeSignedDecimal (dst, exponent);
2452 // For good performance it is desirable for different APFloats
2453 // to produce different integers.
2455 APFloat::getHashValue() const
2457 if (category==fcZero) return sign<<8 | semantics->precision ;
2458 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2459 else if (category==fcNaN) return 1<<10 | semantics->precision;
2461 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2462 const integerPart* p = significandParts();
2463 for (int i=partCount(); i>0; i--, p++)
2464 hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
2469 // Conversion from APFloat to/from host float/double. It may eventually be
2470 // possible to eliminate these and have everybody deal with APFloats, but that
2471 // will take a while. This approach will not easily extend to long double.
2472 // Current implementation requires integerPartWidth==64, which is correct at
2473 // the moment but could be made more general.
2475 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2476 // the actual IEEE respresentations. We compensate for that here.
2479 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2481 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2482 assert (partCount()==2);
2484 uint64_t myexponent, mysignificand;
2486 if (category==fcNormal) {
2487 myexponent = exponent+16383; //bias
2488 mysignificand = significandParts()[0];
2489 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2490 myexponent = 0; // denormal
2491 } else if (category==fcZero) {
2494 } else if (category==fcInfinity) {
2495 myexponent = 0x7fff;
2496 mysignificand = 0x8000000000000000ULL;
2498 assert(category == fcNaN && "Unknown category");
2499 myexponent = 0x7fff;
2500 mysignificand = significandParts()[0];
2504 words[0] = ((uint64_t)(sign & 1) << 63) |
2505 ((myexponent & 0x7fffLL) << 48) |
2506 ((mysignificand >>16) & 0xffffffffffffLL);
2507 words[1] = mysignificand & 0xffff;
2508 return APInt(80, 2, words);
2512 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2514 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2515 assert (partCount()==2);
2517 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2519 if (category==fcNormal) {
2520 myexponent = exponent + 1023; //bias
2521 myexponent2 = exponent2 + 1023;
2522 mysignificand = significandParts()[0];
2523 mysignificand2 = significandParts()[1];
2524 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2525 myexponent = 0; // denormal
2526 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2527 myexponent2 = 0; // denormal
2528 } else if (category==fcZero) {
2533 } else if (category==fcInfinity) {
2539 assert(category == fcNaN && "Unknown category");
2541 mysignificand = significandParts()[0];
2542 myexponent2 = exponent2;
2543 mysignificand2 = significandParts()[1];
2547 words[0] = ((uint64_t)(sign & 1) << 63) |
2548 ((myexponent & 0x7ff) << 52) |
2549 (mysignificand & 0xfffffffffffffLL);
2550 words[1] = ((uint64_t)(sign2 & 1) << 63) |
2551 ((myexponent2 & 0x7ff) << 52) |
2552 (mysignificand2 & 0xfffffffffffffLL);
2553 return APInt(128, 2, words);
2557 APFloat::convertDoubleAPFloatToAPInt() const
2559 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2560 assert (partCount()==1);
2562 uint64_t myexponent, mysignificand;
2564 if (category==fcNormal) {
2565 myexponent = exponent+1023; //bias
2566 mysignificand = *significandParts();
2567 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2568 myexponent = 0; // denormal
2569 } else if (category==fcZero) {
2572 } else if (category==fcInfinity) {
2576 assert(category == fcNaN && "Unknown category!");
2578 mysignificand = *significandParts();
2581 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2582 ((myexponent & 0x7ff) << 52) |
2583 (mysignificand & 0xfffffffffffffLL))));
2587 APFloat::convertFloatAPFloatToAPInt() const
2589 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2590 assert (partCount()==1);
2592 uint32_t myexponent, mysignificand;
2594 if (category==fcNormal) {
2595 myexponent = exponent+127; //bias
2596 mysignificand = (uint32_t)*significandParts();
2597 if (myexponent == 1 && !(mysignificand & 0x800000))
2598 myexponent = 0; // denormal
2599 } else if (category==fcZero) {
2602 } else if (category==fcInfinity) {
2606 assert(category == fcNaN && "Unknown category!");
2608 mysignificand = (uint32_t)*significandParts();
2611 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2612 (mysignificand & 0x7fffff)));
2615 // This function creates an APInt that is just a bit map of the floating
2616 // point constant as it would appear in memory. It is not a conversion,
2617 // and treating the result as a normal integer is unlikely to be useful.
2620 APFloat::convertToAPInt() const
2622 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2623 return convertFloatAPFloatToAPInt();
2625 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2626 return convertDoubleAPFloatToAPInt();
2628 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2629 return convertPPCDoubleDoubleAPFloatToAPInt();
2631 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2633 return convertF80LongDoubleAPFloatToAPInt();
2637 APFloat::convertToFloat() const
2639 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2640 APInt api = convertToAPInt();
2641 return api.bitsToFloat();
2645 APFloat::convertToDouble() const
2647 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2648 APInt api = convertToAPInt();
2649 return api.bitsToDouble();
2652 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2653 /// does not support these bit patterns:
2654 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2655 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2656 /// exponent = 0, integer bit 1 ("pseudodenormal")
2657 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2658 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2660 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2662 assert(api.getBitWidth()==80);
2663 uint64_t i1 = api.getRawData()[0];
2664 uint64_t i2 = api.getRawData()[1];
2665 uint64_t myexponent = (i1 >> 48) & 0x7fff;
2666 uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) |
2669 initialize(&APFloat::x87DoubleExtended);
2670 assert(partCount()==2);
2672 sign = static_cast<unsigned int>(i1>>63);
2673 if (myexponent==0 && mysignificand==0) {
2674 // exponent, significand meaningless
2676 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2677 // exponent, significand meaningless
2678 category = fcInfinity;
2679 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2680 // exponent meaningless
2682 significandParts()[0] = mysignificand;
2683 significandParts()[1] = 0;
2685 category = fcNormal;
2686 exponent = myexponent - 16383;
2687 significandParts()[0] = mysignificand;
2688 significandParts()[1] = 0;
2689 if (myexponent==0) // denormal
2695 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
2697 assert(api.getBitWidth()==128);
2698 uint64_t i1 = api.getRawData()[0];
2699 uint64_t i2 = api.getRawData()[1];
2700 uint64_t myexponent = (i1 >> 52) & 0x7ff;
2701 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
2702 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
2703 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
2705 initialize(&APFloat::PPCDoubleDouble);
2706 assert(partCount()==2);
2708 sign = static_cast<unsigned int>(i1>>63);
2709 sign2 = static_cast<unsigned int>(i2>>63);
2710 if (myexponent==0 && mysignificand==0) {
2711 // exponent, significand meaningless
2712 // exponent2 and significand2 are required to be 0; we don't check
2714 } else if (myexponent==0x7ff && mysignificand==0) {
2715 // exponent, significand meaningless
2716 // exponent2 and significand2 are required to be 0; we don't check
2717 category = fcInfinity;
2718 } else if (myexponent==0x7ff && mysignificand!=0) {
2719 // exponent meaningless. So is the whole second word, but keep it
2722 exponent2 = myexponent2;
2723 significandParts()[0] = mysignificand;
2724 significandParts()[1] = mysignificand2;
2726 category = fcNormal;
2727 // Note there is no category2; the second word is treated as if it is
2728 // fcNormal, although it might be something else considered by itself.
2729 exponent = myexponent - 1023;
2730 exponent2 = myexponent2 - 1023;
2731 significandParts()[0] = mysignificand;
2732 significandParts()[1] = mysignificand2;
2733 if (myexponent==0) // denormal
2736 significandParts()[0] |= 0x10000000000000LL; // integer bit
2740 significandParts()[1] |= 0x10000000000000LL; // integer bit
2745 APFloat::initFromDoubleAPInt(const APInt &api)
2747 assert(api.getBitWidth()==64);
2748 uint64_t i = *api.getRawData();
2749 uint64_t myexponent = (i >> 52) & 0x7ff;
2750 uint64_t mysignificand = i & 0xfffffffffffffLL;
2752 initialize(&APFloat::IEEEdouble);
2753 assert(partCount()==1);
2755 sign = static_cast<unsigned int>(i>>63);
2756 if (myexponent==0 && mysignificand==0) {
2757 // exponent, significand meaningless
2759 } else if (myexponent==0x7ff && mysignificand==0) {
2760 // exponent, significand meaningless
2761 category = fcInfinity;
2762 } else if (myexponent==0x7ff && mysignificand!=0) {
2763 // exponent meaningless
2765 *significandParts() = mysignificand;
2767 category = fcNormal;
2768 exponent = myexponent - 1023;
2769 *significandParts() = mysignificand;
2770 if (myexponent==0) // denormal
2773 *significandParts() |= 0x10000000000000LL; // integer bit
2778 APFloat::initFromFloatAPInt(const APInt & api)
2780 assert(api.getBitWidth()==32);
2781 uint32_t i = (uint32_t)*api.getRawData();
2782 uint32_t myexponent = (i >> 23) & 0xff;
2783 uint32_t mysignificand = i & 0x7fffff;
2785 initialize(&APFloat::IEEEsingle);
2786 assert(partCount()==1);
2789 if (myexponent==0 && mysignificand==0) {
2790 // exponent, significand meaningless
2792 } else if (myexponent==0xff && mysignificand==0) {
2793 // exponent, significand meaningless
2794 category = fcInfinity;
2795 } else if (myexponent==0xff && mysignificand!=0) {
2796 // sign, exponent, significand meaningless
2798 *significandParts() = mysignificand;
2800 category = fcNormal;
2801 exponent = myexponent - 127; //bias
2802 *significandParts() = mysignificand;
2803 if (myexponent==0) // denormal
2806 *significandParts() |= 0x800000; // integer bit
2810 /// Treat api as containing the bits of a floating point number. Currently
2811 /// we infer the floating point type from the size of the APInt. The
2812 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
2813 /// when the size is anything else).
2815 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
2817 if (api.getBitWidth() == 32)
2818 return initFromFloatAPInt(api);
2819 else if (api.getBitWidth()==64)
2820 return initFromDoubleAPInt(api);
2821 else if (api.getBitWidth()==80)
2822 return initFromF80LongDoubleAPInt(api);
2823 else if (api.getBitWidth()==128 && !isIEEE)
2824 return initFromPPCDoubleDoubleAPInt(api);
2829 APFloat::APFloat(const APInt& api, bool isIEEE)
2831 initFromAPInt(api, isIEEE);
2834 APFloat::APFloat(float f)
2836 APInt api = APInt(32, 0);
2837 initFromAPInt(api.floatToBits(f));
2840 APFloat::APFloat(double d)
2842 APInt api = APInt(64, 0);
2843 initFromAPInt(api.doubleToBits(d));