1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/Support/MathExtras.h"
22 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
24 /* Assumed in hexadecimal significand parsing, and conversion to
25 hexadecimal strings. */
26 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
27 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
31 /* Represents floating point arithmetic semantics. */
33 /* The largest E such that 2^E is representable; this matches the
34 definition of IEEE 754. */
35 exponent_t maxExponent;
37 /* The smallest E such that 2^E is a normalized number; this
38 matches the definition of IEEE 754. */
39 exponent_t minExponent;
41 /* Number of bits in the significand. This includes the integer
43 unsigned int precision;
45 /* True if arithmetic is supported. */
46 unsigned int arithmeticOK;
49 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
50 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
51 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
52 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
53 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
55 // The PowerPC format consists of two doubles. It does not map cleanly
56 // onto the usual format above. For now only storage of constants of
57 // this type is supported, no arithmetic.
58 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
60 /* A tight upper bound on number of parts required to hold the value
63 power * 815 / (351 * integerPartWidth) + 1
65 However, whilst the result may require only this many parts,
66 because we are multiplying two values to get it, the
67 multiplication may require an extra part with the excess part
68 being zero (consider the trivial case of 1 * 1, tcFullMultiply
69 requires two parts to hold the single-part result). So we add an
70 extra one to guarantee enough space whilst multiplying. */
71 const unsigned int maxExponent = 16383;
72 const unsigned int maxPrecision = 113;
73 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
74 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
75 / (351 * integerPartWidth));
78 /* Put a bunch of private, handy routines in an anonymous namespace. */
81 static inline unsigned int
82 partCountForBits(unsigned int bits)
84 return ((bits) + integerPartWidth - 1) / integerPartWidth;
87 /* Returns 0U-9U. Return values >= 10U are not digits. */
88 static inline unsigned int
89 decDigitValue(unsigned int c)
95 hexDigitValue(unsigned int c)
115 assertArithmeticOK(const llvm::fltSemantics &semantics) {
116 assert(semantics.arithmeticOK
117 && "Compile-time arithmetic does not support these semantics");
120 /* Return the value of a decimal exponent of the form
123 If the exponent overflows, returns a large exponent with the
126 readExponent(const char *p)
129 unsigned int absExponent;
130 const unsigned int overlargeExponent = 24000; /* FIXME. */
132 isNegative = (*p == '-');
133 if (*p == '-' || *p == '+')
136 absExponent = decDigitValue(*p++);
137 assert (absExponent < 10U);
142 value = decDigitValue(*p);
147 value += absExponent * 10;
148 if (absExponent >= overlargeExponent) {
149 absExponent = overlargeExponent;
156 return -(int) absExponent;
158 return (int) absExponent;
161 /* This is ugly and needs cleaning up, but I don't immediately see
162 how whilst remaining safe. */
164 totalExponent(const char *p, int exponentAdjustment)
166 int unsignedExponent;
167 bool negative, overflow;
170 /* Move past the exponent letter and sign to the digits. */
172 negative = *p == '-';
173 if(*p == '-' || *p == '+')
176 unsignedExponent = 0;
181 value = decDigitValue(*p);
186 unsignedExponent = unsignedExponent * 10 + value;
187 if(unsignedExponent > 65535)
191 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
195 exponent = unsignedExponent;
197 exponent = -exponent;
198 exponent += exponentAdjustment;
199 if(exponent > 65535 || exponent < -65536)
204 exponent = negative ? -65536: 65535;
210 skipLeadingZeroesAndAnyDot(const char *p, const char **dot)
225 /* Given a normal decimal floating point number of the form
229 where the decimal point and exponent are optional, fill out the
230 structure D. Exponent is appropriate if the significand is
231 treated as an integer, and normalizedExponent if the significand
232 is taken to have the decimal point after a single leading
235 If the value is zero, V->firstSigDigit points to a non-digit, and
236 the return exponent is zero.
239 const char *firstSigDigit;
240 const char *lastSigDigit;
242 int normalizedExponent;
246 interpretDecimal(const char *p, decimalInfo *D)
250 p = skipLeadingZeroesAndAnyDot (p, &dot);
252 D->firstSigDigit = p;
254 D->normalizedExponent = 0;
261 if (decDigitValue(*p) >= 10U)
266 /* If number is all zerooes accept any exponent. */
267 if (p != D->firstSigDigit) {
268 if (*p == 'e' || *p == 'E')
269 D->exponent = readExponent(p + 1);
271 /* Implied decimal point? */
275 /* Drop insignificant trailing zeroes. */
282 /* Adjust the exponents for any decimal point. */
283 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
284 D->normalizedExponent = (D->exponent +
285 static_cast<exponent_t>((p - D->firstSigDigit)
286 - (dot > D->firstSigDigit && dot < p)));
292 /* Return the trailing fraction of a hexadecimal number.
293 DIGITVALUE is the first hex digit of the fraction, P points to
296 trailingHexadecimalFraction(const char *p, unsigned int digitValue)
298 unsigned int hexDigit;
300 /* If the first trailing digit isn't 0 or 8 we can work out the
301 fraction immediately. */
303 return lfMoreThanHalf;
304 else if(digitValue < 8 && digitValue > 0)
305 return lfLessThanHalf;
307 /* Otherwise we need to find the first non-zero digit. */
311 hexDigit = hexDigitValue(*p);
313 /* If we ran off the end it is exactly zero or one-half, otherwise
316 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
318 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
321 /* Return the fraction lost were a bignum truncated losing the least
322 significant BITS bits. */
324 lostFractionThroughTruncation(const integerPart *parts,
325 unsigned int partCount,
330 lsb = APInt::tcLSB(parts, partCount);
332 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
334 return lfExactlyZero;
336 return lfExactlyHalf;
337 if(bits <= partCount * integerPartWidth
338 && APInt::tcExtractBit(parts, bits - 1))
339 return lfMoreThanHalf;
341 return lfLessThanHalf;
344 /* Shift DST right BITS bits noting lost fraction. */
346 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
348 lostFraction lost_fraction;
350 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
352 APInt::tcShiftRight(dst, parts, bits);
354 return lost_fraction;
357 /* Combine the effect of two lost fractions. */
359 combineLostFractions(lostFraction moreSignificant,
360 lostFraction lessSignificant)
362 if(lessSignificant != lfExactlyZero) {
363 if(moreSignificant == lfExactlyZero)
364 moreSignificant = lfLessThanHalf;
365 else if(moreSignificant == lfExactlyHalf)
366 moreSignificant = lfMoreThanHalf;
369 return moreSignificant;
372 /* The error from the true value, in half-ulps, on multiplying two
373 floating point numbers, which differ from the value they
374 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
375 than the returned value.
377 See "How to Read Floating Point Numbers Accurately" by William D
380 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
382 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
384 if (HUerr1 + HUerr2 == 0)
385 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
387 return inexactMultiply + 2 * (HUerr1 + HUerr2);
390 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
391 when the least significant BITS are truncated. BITS cannot be
394 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
396 unsigned int count, partBits;
397 integerPart part, boundary;
402 count = bits / integerPartWidth;
403 partBits = bits % integerPartWidth + 1;
405 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
408 boundary = (integerPart) 1 << (partBits - 1);
413 if (part - boundary <= boundary - part)
414 return part - boundary;
416 return boundary - part;
419 if (part == boundary) {
422 return ~(integerPart) 0; /* A lot. */
425 } else if (part == boundary - 1) {
428 return ~(integerPart) 0; /* A lot. */
433 return ~(integerPart) 0; /* A lot. */
436 /* Place pow(5, power) in DST, and return the number of parts used.
437 DST must be at least one part larger than size of the answer. */
439 powerOf5(integerPart *dst, unsigned int power)
441 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
443 static integerPart pow5s[maxPowerOfFiveParts * 2 + 5] = { 78125 * 5 };
444 static unsigned int partsCount[16] = { 1 };
446 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
449 assert(power <= maxExponent);
454 *p1 = firstEightPowers[power & 7];
460 for (unsigned int n = 0; power; power >>= 1, n++) {
465 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
467 pc = partsCount[n - 1];
468 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
470 if (pow5[pc - 1] == 0)
478 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
480 if (p2[result - 1] == 0)
483 /* Now result is in p1 with partsCount parts and p2 is scratch
485 tmp = p1, p1 = p2, p2 = tmp;
492 APInt::tcAssign(dst, p1, result);
497 /* Zero at the end to avoid modular arithmetic when adding one; used
498 when rounding up during hexadecimal output. */
499 static const char hexDigitsLower[] = "0123456789abcdef0";
500 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
501 static const char infinityL[] = "infinity";
502 static const char infinityU[] = "INFINITY";
503 static const char NaNL[] = "nan";
504 static const char NaNU[] = "NAN";
506 /* Write out an integerPart in hexadecimal, starting with the most
507 significant nibble. Write out exactly COUNT hexdigits, return
510 partAsHex (char *dst, integerPart part, unsigned int count,
511 const char *hexDigitChars)
513 unsigned int result = count;
515 assert (count != 0 && count <= integerPartWidth / 4);
517 part >>= (integerPartWidth - 4 * count);
519 dst[count] = hexDigitChars[part & 0xf];
526 /* Write out an unsigned decimal integer. */
528 writeUnsignedDecimal (char *dst, unsigned int n)
544 /* Write out a signed decimal integer. */
546 writeSignedDecimal (char *dst, int value)
550 dst = writeUnsignedDecimal(dst, -(unsigned) value);
552 dst = writeUnsignedDecimal(dst, value);
560 APFloat::initialize(const fltSemantics *ourSemantics)
564 semantics = ourSemantics;
567 significand.parts = new integerPart[count];
571 APFloat::freeSignificand()
574 delete [] significand.parts;
578 APFloat::assign(const APFloat &rhs)
580 assert(semantics == rhs.semantics);
583 category = rhs.category;
584 exponent = rhs.exponent;
586 exponent2 = rhs.exponent2;
587 if(category == fcNormal || category == fcNaN)
588 copySignificand(rhs);
592 APFloat::copySignificand(const APFloat &rhs)
594 assert(category == fcNormal || category == fcNaN);
595 assert(rhs.partCount() >= partCount());
597 APInt::tcAssign(significandParts(), rhs.significandParts(),
601 /* Make this number a NaN, with an arbitrary but deterministic value
602 for the significand. */
604 APFloat::makeNaN(void)
607 APInt::tcSet(significandParts(), ~0U, partCount());
611 APFloat::operator=(const APFloat &rhs)
614 if(semantics != rhs.semantics) {
616 initialize(rhs.semantics);
625 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
628 if (semantics != rhs.semantics ||
629 category != rhs.category ||
632 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
635 if (category==fcZero || category==fcInfinity)
637 else if (category==fcNormal && exponent!=rhs.exponent)
639 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
640 exponent2!=rhs.exponent2)
644 const integerPart* p=significandParts();
645 const integerPart* q=rhs.significandParts();
646 for (; i>0; i--, p++, q++) {
654 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
656 assertArithmeticOK(ourSemantics);
657 initialize(&ourSemantics);
660 exponent = ourSemantics.precision - 1;
661 significandParts()[0] = value;
662 normalize(rmNearestTiesToEven, lfExactlyZero);
665 APFloat::APFloat(const fltSemantics &ourSemantics,
666 fltCategory ourCategory, bool negative)
668 assertArithmeticOK(ourSemantics);
669 initialize(&ourSemantics);
670 category = ourCategory;
672 if(category == fcNormal)
674 else if (ourCategory == fcNaN)
678 APFloat::APFloat(const fltSemantics &ourSemantics, const char *text)
680 assertArithmeticOK(ourSemantics);
681 initialize(&ourSemantics);
682 convertFromString(text, rmNearestTiesToEven);
685 APFloat::APFloat(const APFloat &rhs)
687 initialize(rhs.semantics);
696 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
697 void APFloat::Profile(FoldingSetNodeID& ID) const {
698 ID.Add(bitcastToAPInt());
702 APFloat::partCount() const
704 return partCountForBits(semantics->precision + 1);
708 APFloat::semanticsPrecision(const fltSemantics &semantics)
710 return semantics.precision;
714 APFloat::significandParts() const
716 return const_cast<APFloat *>(this)->significandParts();
720 APFloat::significandParts()
722 assert(category == fcNormal || category == fcNaN);
725 return significand.parts;
727 return &significand.part;
731 APFloat::zeroSignificand()
734 APInt::tcSet(significandParts(), 0, partCount());
737 /* Increment an fcNormal floating point number's significand. */
739 APFloat::incrementSignificand()
743 carry = APInt::tcIncrement(significandParts(), partCount());
745 /* Our callers should never cause us to overflow. */
749 /* Add the significand of the RHS. Returns the carry flag. */
751 APFloat::addSignificand(const APFloat &rhs)
755 parts = significandParts();
757 assert(semantics == rhs.semantics);
758 assert(exponent == rhs.exponent);
760 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
763 /* Subtract the significand of the RHS with a borrow flag. Returns
766 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
770 parts = significandParts();
772 assert(semantics == rhs.semantics);
773 assert(exponent == rhs.exponent);
775 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
779 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
780 on to the full-precision result of the multiplication. Returns the
783 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
785 unsigned int omsb; // One, not zero, based MSB.
786 unsigned int partsCount, newPartsCount, precision;
787 integerPart *lhsSignificand;
788 integerPart scratch[4];
789 integerPart *fullSignificand;
790 lostFraction lost_fraction;
793 assert(semantics == rhs.semantics);
795 precision = semantics->precision;
796 newPartsCount = partCountForBits(precision * 2);
798 if(newPartsCount > 4)
799 fullSignificand = new integerPart[newPartsCount];
801 fullSignificand = scratch;
803 lhsSignificand = significandParts();
804 partsCount = partCount();
806 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
807 rhs.significandParts(), partsCount, partsCount);
809 lost_fraction = lfExactlyZero;
810 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
811 exponent += rhs.exponent;
814 Significand savedSignificand = significand;
815 const fltSemantics *savedSemantics = semantics;
816 fltSemantics extendedSemantics;
818 unsigned int extendedPrecision;
820 /* Normalize our MSB. */
821 extendedPrecision = precision + precision - 1;
822 if(omsb != extendedPrecision)
824 APInt::tcShiftLeft(fullSignificand, newPartsCount,
825 extendedPrecision - omsb);
826 exponent -= extendedPrecision - omsb;
829 /* Create new semantics. */
830 extendedSemantics = *semantics;
831 extendedSemantics.precision = extendedPrecision;
833 if(newPartsCount == 1)
834 significand.part = fullSignificand[0];
836 significand.parts = fullSignificand;
837 semantics = &extendedSemantics;
839 APFloat extendedAddend(*addend);
840 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
841 assert(status == opOK);
842 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
844 /* Restore our state. */
845 if(newPartsCount == 1)
846 fullSignificand[0] = significand.part;
847 significand = savedSignificand;
848 semantics = savedSemantics;
850 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
853 exponent -= (precision - 1);
855 if(omsb > precision) {
856 unsigned int bits, significantParts;
859 bits = omsb - precision;
860 significantParts = partCountForBits(omsb);
861 lf = shiftRight(fullSignificand, significantParts, bits);
862 lost_fraction = combineLostFractions(lf, lost_fraction);
866 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
868 if(newPartsCount > 4)
869 delete [] fullSignificand;
871 return lost_fraction;
874 /* Multiply the significands of LHS and RHS to DST. */
876 APFloat::divideSignificand(const APFloat &rhs)
878 unsigned int bit, i, partsCount;
879 const integerPart *rhsSignificand;
880 integerPart *lhsSignificand, *dividend, *divisor;
881 integerPart scratch[4];
882 lostFraction lost_fraction;
884 assert(semantics == rhs.semantics);
886 lhsSignificand = significandParts();
887 rhsSignificand = rhs.significandParts();
888 partsCount = partCount();
891 dividend = new integerPart[partsCount * 2];
895 divisor = dividend + partsCount;
897 /* Copy the dividend and divisor as they will be modified in-place. */
898 for(i = 0; i < partsCount; i++) {
899 dividend[i] = lhsSignificand[i];
900 divisor[i] = rhsSignificand[i];
901 lhsSignificand[i] = 0;
904 exponent -= rhs.exponent;
906 unsigned int precision = semantics->precision;
908 /* Normalize the divisor. */
909 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
912 APInt::tcShiftLeft(divisor, partsCount, bit);
915 /* Normalize the dividend. */
916 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
919 APInt::tcShiftLeft(dividend, partsCount, bit);
922 /* Ensure the dividend >= divisor initially for the loop below.
923 Incidentally, this means that the division loop below is
924 guaranteed to set the integer bit to one. */
925 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
927 APInt::tcShiftLeft(dividend, partsCount, 1);
928 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
932 for(bit = precision; bit; bit -= 1) {
933 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
934 APInt::tcSubtract(dividend, divisor, 0, partsCount);
935 APInt::tcSetBit(lhsSignificand, bit - 1);
938 APInt::tcShiftLeft(dividend, partsCount, 1);
941 /* Figure out the lost fraction. */
942 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
945 lost_fraction = lfMoreThanHalf;
947 lost_fraction = lfExactlyHalf;
948 else if(APInt::tcIsZero(dividend, partsCount))
949 lost_fraction = lfExactlyZero;
951 lost_fraction = lfLessThanHalf;
956 return lost_fraction;
960 APFloat::significandMSB() const
962 return APInt::tcMSB(significandParts(), partCount());
966 APFloat::significandLSB() const
968 return APInt::tcLSB(significandParts(), partCount());
971 /* Note that a zero result is NOT normalized to fcZero. */
973 APFloat::shiftSignificandRight(unsigned int bits)
975 /* Our exponent should not overflow. */
976 assert((exponent_t) (exponent + bits) >= exponent);
980 return shiftRight(significandParts(), partCount(), bits);
983 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
985 APFloat::shiftSignificandLeft(unsigned int bits)
987 assert(bits < semantics->precision);
990 unsigned int partsCount = partCount();
992 APInt::tcShiftLeft(significandParts(), partsCount, bits);
995 assert(!APInt::tcIsZero(significandParts(), partsCount));
1000 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1004 assert(semantics == rhs.semantics);
1005 assert(category == fcNormal);
1006 assert(rhs.category == fcNormal);
1008 compare = exponent - rhs.exponent;
1010 /* If exponents are equal, do an unsigned bignum comparison of the
1013 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1017 return cmpGreaterThan;
1018 else if(compare < 0)
1024 /* Handle overflow. Sign is preserved. We either become infinity or
1025 the largest finite number. */
1027 APFloat::handleOverflow(roundingMode rounding_mode)
1030 if(rounding_mode == rmNearestTiesToEven
1031 || rounding_mode == rmNearestTiesToAway
1032 || (rounding_mode == rmTowardPositive && !sign)
1033 || (rounding_mode == rmTowardNegative && sign))
1035 category = fcInfinity;
1036 return (opStatus) (opOverflow | opInexact);
1039 /* Otherwise we become the largest finite number. */
1040 category = fcNormal;
1041 exponent = semantics->maxExponent;
1042 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1043 semantics->precision);
1048 /* Returns TRUE if, when truncating the current number, with BIT the
1049 new LSB, with the given lost fraction and rounding mode, the result
1050 would need to be rounded away from zero (i.e., by increasing the
1051 signficand). This routine must work for fcZero of both signs, and
1052 fcNormal numbers. */
1054 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1055 lostFraction lost_fraction,
1056 unsigned int bit) const
1058 /* NaNs and infinities should not have lost fractions. */
1059 assert(category == fcNormal || category == fcZero);
1061 /* Current callers never pass this so we don't handle it. */
1062 assert(lost_fraction != lfExactlyZero);
1064 switch(rounding_mode) {
1068 case rmNearestTiesToAway:
1069 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1071 case rmNearestTiesToEven:
1072 if(lost_fraction == lfMoreThanHalf)
1075 /* Our zeroes don't have a significand to test. */
1076 if(lost_fraction == lfExactlyHalf && category != fcZero)
1077 return APInt::tcExtractBit(significandParts(), bit);
1084 case rmTowardPositive:
1085 return sign == false;
1087 case rmTowardNegative:
1088 return sign == true;
1093 APFloat::normalize(roundingMode rounding_mode,
1094 lostFraction lost_fraction)
1096 unsigned int omsb; /* One, not zero, based MSB. */
1099 if(category != fcNormal)
1102 /* Before rounding normalize the exponent of fcNormal numbers. */
1103 omsb = significandMSB() + 1;
1106 /* OMSB is numbered from 1. We want to place it in the integer
1107 bit numbered PRECISON if possible, with a compensating change in
1109 exponentChange = omsb - semantics->precision;
1111 /* If the resulting exponent is too high, overflow according to
1112 the rounding mode. */
1113 if(exponent + exponentChange > semantics->maxExponent)
1114 return handleOverflow(rounding_mode);
1116 /* Subnormal numbers have exponent minExponent, and their MSB
1117 is forced based on that. */
1118 if(exponent + exponentChange < semantics->minExponent)
1119 exponentChange = semantics->minExponent - exponent;
1121 /* Shifting left is easy as we don't lose precision. */
1122 if(exponentChange < 0) {
1123 assert(lost_fraction == lfExactlyZero);
1125 shiftSignificandLeft(-exponentChange);
1130 if(exponentChange > 0) {
1133 /* Shift right and capture any new lost fraction. */
1134 lf = shiftSignificandRight(exponentChange);
1136 lost_fraction = combineLostFractions(lf, lost_fraction);
1138 /* Keep OMSB up-to-date. */
1139 if(omsb > (unsigned) exponentChange)
1140 omsb -= exponentChange;
1146 /* Now round the number according to rounding_mode given the lost
1149 /* As specified in IEEE 754, since we do not trap we do not report
1150 underflow for exact results. */
1151 if(lost_fraction == lfExactlyZero) {
1152 /* Canonicalize zeroes. */
1159 /* Increment the significand if we're rounding away from zero. */
1160 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1162 exponent = semantics->minExponent;
1164 incrementSignificand();
1165 omsb = significandMSB() + 1;
1167 /* Did the significand increment overflow? */
1168 if(omsb == (unsigned) semantics->precision + 1) {
1169 /* Renormalize by incrementing the exponent and shifting our
1170 significand right one. However if we already have the
1171 maximum exponent we overflow to infinity. */
1172 if(exponent == semantics->maxExponent) {
1173 category = fcInfinity;
1175 return (opStatus) (opOverflow | opInexact);
1178 shiftSignificandRight(1);
1184 /* The normal case - we were and are not denormal, and any
1185 significand increment above didn't overflow. */
1186 if(omsb == semantics->precision)
1189 /* We have a non-zero denormal. */
1190 assert(omsb < semantics->precision);
1192 /* Canonicalize zeroes. */
1196 /* The fcZero case is a denormal that underflowed to zero. */
1197 return (opStatus) (opUnderflow | opInexact);
1201 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1203 switch(convolve(category, rhs.category)) {
1207 case convolve(fcNaN, fcZero):
1208 case convolve(fcNaN, fcNormal):
1209 case convolve(fcNaN, fcInfinity):
1210 case convolve(fcNaN, fcNaN):
1211 case convolve(fcNormal, fcZero):
1212 case convolve(fcInfinity, fcNormal):
1213 case convolve(fcInfinity, fcZero):
1216 case convolve(fcZero, fcNaN):
1217 case convolve(fcNormal, fcNaN):
1218 case convolve(fcInfinity, fcNaN):
1220 copySignificand(rhs);
1223 case convolve(fcNormal, fcInfinity):
1224 case convolve(fcZero, fcInfinity):
1225 category = fcInfinity;
1226 sign = rhs.sign ^ subtract;
1229 case convolve(fcZero, fcNormal):
1231 sign = rhs.sign ^ subtract;
1234 case convolve(fcZero, fcZero):
1235 /* Sign depends on rounding mode; handled by caller. */
1238 case convolve(fcInfinity, fcInfinity):
1239 /* Differently signed infinities can only be validly
1241 if((sign ^ rhs.sign) != subtract) {
1248 case convolve(fcNormal, fcNormal):
1253 /* Add or subtract two normal numbers. */
1255 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1258 lostFraction lost_fraction;
1261 /* Determine if the operation on the absolute values is effectively
1262 an addition or subtraction. */
1263 subtract ^= (sign ^ rhs.sign) ? true : false;
1265 /* Are we bigger exponent-wise than the RHS? */
1266 bits = exponent - rhs.exponent;
1268 /* Subtraction is more subtle than one might naively expect. */
1270 APFloat temp_rhs(rhs);
1274 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1275 lost_fraction = lfExactlyZero;
1276 } else if (bits > 0) {
1277 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1278 shiftSignificandLeft(1);
1281 lost_fraction = shiftSignificandRight(-bits - 1);
1282 temp_rhs.shiftSignificandLeft(1);
1287 carry = temp_rhs.subtractSignificand
1288 (*this, lost_fraction != lfExactlyZero);
1289 copySignificand(temp_rhs);
1292 carry = subtractSignificand
1293 (temp_rhs, lost_fraction != lfExactlyZero);
1296 /* Invert the lost fraction - it was on the RHS and
1298 if(lost_fraction == lfLessThanHalf)
1299 lost_fraction = lfMoreThanHalf;
1300 else if(lost_fraction == lfMoreThanHalf)
1301 lost_fraction = lfLessThanHalf;
1303 /* The code above is intended to ensure that no borrow is
1308 APFloat temp_rhs(rhs);
1310 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1311 carry = addSignificand(temp_rhs);
1313 lost_fraction = shiftSignificandRight(-bits);
1314 carry = addSignificand(rhs);
1317 /* We have a guard bit; generating a carry cannot happen. */
1321 return lost_fraction;
1325 APFloat::multiplySpecials(const APFloat &rhs)
1327 switch(convolve(category, rhs.category)) {
1331 case convolve(fcNaN, fcZero):
1332 case convolve(fcNaN, fcNormal):
1333 case convolve(fcNaN, fcInfinity):
1334 case convolve(fcNaN, fcNaN):
1337 case convolve(fcZero, fcNaN):
1338 case convolve(fcNormal, fcNaN):
1339 case convolve(fcInfinity, fcNaN):
1341 copySignificand(rhs);
1344 case convolve(fcNormal, fcInfinity):
1345 case convolve(fcInfinity, fcNormal):
1346 case convolve(fcInfinity, fcInfinity):
1347 category = fcInfinity;
1350 case convolve(fcZero, fcNormal):
1351 case convolve(fcNormal, fcZero):
1352 case convolve(fcZero, fcZero):
1356 case convolve(fcZero, fcInfinity):
1357 case convolve(fcInfinity, fcZero):
1361 case convolve(fcNormal, fcNormal):
1367 APFloat::divideSpecials(const APFloat &rhs)
1369 switch(convolve(category, rhs.category)) {
1373 case convolve(fcNaN, fcZero):
1374 case convolve(fcNaN, fcNormal):
1375 case convolve(fcNaN, fcInfinity):
1376 case convolve(fcNaN, fcNaN):
1377 case convolve(fcInfinity, fcZero):
1378 case convolve(fcInfinity, fcNormal):
1379 case convolve(fcZero, fcInfinity):
1380 case convolve(fcZero, fcNormal):
1383 case convolve(fcZero, fcNaN):
1384 case convolve(fcNormal, fcNaN):
1385 case convolve(fcInfinity, fcNaN):
1387 copySignificand(rhs);
1390 case convolve(fcNormal, fcInfinity):
1394 case convolve(fcNormal, fcZero):
1395 category = fcInfinity;
1398 case convolve(fcInfinity, fcInfinity):
1399 case convolve(fcZero, fcZero):
1403 case convolve(fcNormal, fcNormal):
1409 APFloat::modSpecials(const APFloat &rhs)
1411 switch(convolve(category, rhs.category)) {
1415 case convolve(fcNaN, fcZero):
1416 case convolve(fcNaN, fcNormal):
1417 case convolve(fcNaN, fcInfinity):
1418 case convolve(fcNaN, fcNaN):
1419 case convolve(fcZero, fcInfinity):
1420 case convolve(fcZero, fcNormal):
1421 case convolve(fcNormal, fcInfinity):
1424 case convolve(fcZero, fcNaN):
1425 case convolve(fcNormal, fcNaN):
1426 case convolve(fcInfinity, fcNaN):
1428 copySignificand(rhs);
1431 case convolve(fcNormal, fcZero):
1432 case convolve(fcInfinity, fcZero):
1433 case convolve(fcInfinity, fcNormal):
1434 case convolve(fcInfinity, fcInfinity):
1435 case convolve(fcZero, fcZero):
1439 case convolve(fcNormal, fcNormal):
1446 APFloat::changeSign()
1448 /* Look mummy, this one's easy. */
1453 APFloat::clearSign()
1455 /* So is this one. */
1460 APFloat::copySign(const APFloat &rhs)
1466 /* Normalized addition or subtraction. */
1468 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1473 assertArithmeticOK(*semantics);
1475 fs = addOrSubtractSpecials(rhs, subtract);
1477 /* This return code means it was not a simple case. */
1478 if(fs == opDivByZero) {
1479 lostFraction lost_fraction;
1481 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1482 fs = normalize(rounding_mode, lost_fraction);
1484 /* Can only be zero if we lost no fraction. */
1485 assert(category != fcZero || lost_fraction == lfExactlyZero);
1488 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1489 positive zero unless rounding to minus infinity, except that
1490 adding two like-signed zeroes gives that zero. */
1491 if(category == fcZero) {
1492 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1493 sign = (rounding_mode == rmTowardNegative);
1499 /* Normalized addition. */
1501 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1503 return addOrSubtract(rhs, rounding_mode, false);
1506 /* Normalized subtraction. */
1508 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1510 return addOrSubtract(rhs, rounding_mode, true);
1513 /* Normalized multiply. */
1515 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1519 assertArithmeticOK(*semantics);
1521 fs = multiplySpecials(rhs);
1523 if(category == fcNormal) {
1524 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1525 fs = normalize(rounding_mode, lost_fraction);
1526 if(lost_fraction != lfExactlyZero)
1527 fs = (opStatus) (fs | opInexact);
1533 /* Normalized divide. */
1535 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1539 assertArithmeticOK(*semantics);
1541 fs = divideSpecials(rhs);
1543 if(category == fcNormal) {
1544 lostFraction lost_fraction = divideSignificand(rhs);
1545 fs = normalize(rounding_mode, lost_fraction);
1546 if(lost_fraction != lfExactlyZero)
1547 fs = (opStatus) (fs | opInexact);
1553 /* Normalized remainder. This is not currently correct in all cases. */
1555 APFloat::remainder(const APFloat &rhs)
1559 unsigned int origSign = sign;
1561 assertArithmeticOK(*semantics);
1562 fs = V.divide(rhs, rmNearestTiesToEven);
1563 if (fs == opDivByZero)
1566 int parts = partCount();
1567 integerPart *x = new integerPart[parts];
1569 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1570 rmNearestTiesToEven, &ignored);
1571 if (fs==opInvalidOp)
1574 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1575 rmNearestTiesToEven);
1576 assert(fs==opOK); // should always work
1578 fs = V.multiply(rhs, rmNearestTiesToEven);
1579 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1581 fs = subtract(V, rmNearestTiesToEven);
1582 assert(fs==opOK || fs==opInexact); // likewise
1585 sign = origSign; // IEEE754 requires this
1590 /* Normalized llvm frem (C fmod).
1591 This is not currently correct in all cases. */
1593 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1596 assertArithmeticOK(*semantics);
1597 fs = modSpecials(rhs);
1599 if (category == fcNormal && rhs.category == fcNormal) {
1601 unsigned int origSign = sign;
1603 fs = V.divide(rhs, rmNearestTiesToEven);
1604 if (fs == opDivByZero)
1607 int parts = partCount();
1608 integerPart *x = new integerPart[parts];
1610 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1611 rmTowardZero, &ignored);
1612 if (fs==opInvalidOp)
1615 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1616 rmNearestTiesToEven);
1617 assert(fs==opOK); // should always work
1619 fs = V.multiply(rhs, rounding_mode);
1620 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1622 fs = subtract(V, rounding_mode);
1623 assert(fs==opOK || fs==opInexact); // likewise
1626 sign = origSign; // IEEE754 requires this
1632 /* Normalized fused-multiply-add. */
1634 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1635 const APFloat &addend,
1636 roundingMode rounding_mode)
1640 assertArithmeticOK(*semantics);
1642 /* Post-multiplication sign, before addition. */
1643 sign ^= multiplicand.sign;
1645 /* If and only if all arguments are normal do we need to do an
1646 extended-precision calculation. */
1647 if(category == fcNormal
1648 && multiplicand.category == fcNormal
1649 && addend.category == fcNormal) {
1650 lostFraction lost_fraction;
1652 lost_fraction = multiplySignificand(multiplicand, &addend);
1653 fs = normalize(rounding_mode, lost_fraction);
1654 if(lost_fraction != lfExactlyZero)
1655 fs = (opStatus) (fs | opInexact);
1657 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1658 positive zero unless rounding to minus infinity, except that
1659 adding two like-signed zeroes gives that zero. */
1660 if(category == fcZero && sign != addend.sign)
1661 sign = (rounding_mode == rmTowardNegative);
1663 fs = multiplySpecials(multiplicand);
1665 /* FS can only be opOK or opInvalidOp. There is no more work
1666 to do in the latter case. The IEEE-754R standard says it is
1667 implementation-defined in this case whether, if ADDEND is a
1668 quiet NaN, we raise invalid op; this implementation does so.
1670 If we need to do the addition we can do so with normal
1673 fs = addOrSubtract(addend, rounding_mode, false);
1679 /* Comparison requires normalized numbers. */
1681 APFloat::compare(const APFloat &rhs) const
1685 assertArithmeticOK(*semantics);
1686 assert(semantics == rhs.semantics);
1688 switch(convolve(category, rhs.category)) {
1692 case convolve(fcNaN, fcZero):
1693 case convolve(fcNaN, fcNormal):
1694 case convolve(fcNaN, fcInfinity):
1695 case convolve(fcNaN, fcNaN):
1696 case convolve(fcZero, fcNaN):
1697 case convolve(fcNormal, fcNaN):
1698 case convolve(fcInfinity, fcNaN):
1699 return cmpUnordered;
1701 case convolve(fcInfinity, fcNormal):
1702 case convolve(fcInfinity, fcZero):
1703 case convolve(fcNormal, fcZero):
1707 return cmpGreaterThan;
1709 case convolve(fcNormal, fcInfinity):
1710 case convolve(fcZero, fcInfinity):
1711 case convolve(fcZero, fcNormal):
1713 return cmpGreaterThan;
1717 case convolve(fcInfinity, fcInfinity):
1718 if(sign == rhs.sign)
1723 return cmpGreaterThan;
1725 case convolve(fcZero, fcZero):
1728 case convolve(fcNormal, fcNormal):
1732 /* Two normal numbers. Do they have the same sign? */
1733 if(sign != rhs.sign) {
1735 result = cmpLessThan;
1737 result = cmpGreaterThan;
1739 /* Compare absolute values; invert result if negative. */
1740 result = compareAbsoluteValue(rhs);
1743 if(result == cmpLessThan)
1744 result = cmpGreaterThan;
1745 else if(result == cmpGreaterThan)
1746 result = cmpLessThan;
1753 /// APFloat::convert - convert a value of one floating point type to another.
1754 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1755 /// records whether the transformation lost information, i.e. whether
1756 /// converting the result back to the original type will produce the
1757 /// original value (this is almost the same as return value==fsOK, but there
1758 /// are edge cases where this is not so).
1761 APFloat::convert(const fltSemantics &toSemantics,
1762 roundingMode rounding_mode, bool *losesInfo)
1764 lostFraction lostFraction;
1765 unsigned int newPartCount, oldPartCount;
1768 assertArithmeticOK(*semantics);
1769 assertArithmeticOK(toSemantics);
1770 lostFraction = lfExactlyZero;
1771 newPartCount = partCountForBits(toSemantics.precision + 1);
1772 oldPartCount = partCount();
1774 /* Handle storage complications. If our new form is wider,
1775 re-allocate our bit pattern into wider storage. If it is
1776 narrower, we ignore the excess parts, but if narrowing to a
1777 single part we need to free the old storage.
1778 Be careful not to reference significandParts for zeroes
1779 and infinities, since it aborts. */
1780 if (newPartCount > oldPartCount) {
1781 integerPart *newParts;
1782 newParts = new integerPart[newPartCount];
1783 APInt::tcSet(newParts, 0, newPartCount);
1784 if (category==fcNormal || category==fcNaN)
1785 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1787 significand.parts = newParts;
1788 } else if (newPartCount < oldPartCount) {
1789 /* Capture any lost fraction through truncation of parts so we get
1790 correct rounding whilst normalizing. */
1791 if (category==fcNormal)
1792 lostFraction = lostFractionThroughTruncation
1793 (significandParts(), oldPartCount, toSemantics.precision);
1794 if (newPartCount == 1) {
1795 integerPart newPart = 0;
1796 if (category==fcNormal || category==fcNaN)
1797 newPart = significandParts()[0];
1799 significand.part = newPart;
1803 if(category == fcNormal) {
1804 /* Re-interpret our bit-pattern. */
1805 exponent += toSemantics.precision - semantics->precision;
1806 semantics = &toSemantics;
1807 fs = normalize(rounding_mode, lostFraction);
1808 *losesInfo = (fs != opOK);
1809 } else if (category == fcNaN) {
1810 int shift = toSemantics.precision - semantics->precision;
1811 // Do this now so significandParts gets the right answer
1812 const fltSemantics *oldSemantics = semantics;
1813 semantics = &toSemantics;
1815 // No normalization here, just truncate
1817 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1818 else if (shift < 0) {
1819 unsigned ushift = -shift;
1820 // Figure out if we are losing information. This happens
1821 // if are shifting out something other than 0s, or if the x87 long
1822 // double input did not have its integer bit set (pseudo-NaN), or if the
1823 // x87 long double input did not have its QNan bit set (because the x87
1824 // hardware sets this bit when converting a lower-precision NaN to
1825 // x87 long double).
1826 if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
1828 if (oldSemantics == &APFloat::x87DoubleExtended &&
1829 (!(*significandParts() & 0x8000000000000000ULL) ||
1830 !(*significandParts() & 0x4000000000000000ULL)))
1832 APInt::tcShiftRight(significandParts(), newPartCount, ushift);
1834 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1835 // does not give you back the same bits. This is dubious, and we
1836 // don't currently do it. You're really supposed to get
1837 // an invalid operation signal at runtime, but nobody does that.
1840 semantics = &toSemantics;
1848 /* Convert a floating point number to an integer according to the
1849 rounding mode. If the rounded integer value is out of range this
1850 returns an invalid operation exception and the contents of the
1851 destination parts are unspecified. If the rounded value is in
1852 range but the floating point number is not the exact integer, the C
1853 standard doesn't require an inexact exception to be raised. IEEE
1854 854 does require it so we do that.
1856 Note that for conversions to integer type the C standard requires
1857 round-to-zero to always be used. */
1859 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1861 roundingMode rounding_mode,
1862 bool *isExact) const
1864 lostFraction lost_fraction;
1865 const integerPart *src;
1866 unsigned int dstPartsCount, truncatedBits;
1868 assertArithmeticOK(*semantics);
1872 /* Handle the three special cases first. */
1873 if(category == fcInfinity || category == fcNaN)
1876 dstPartsCount = partCountForBits(width);
1878 if(category == fcZero) {
1879 APInt::tcSet(parts, 0, dstPartsCount);
1880 // Negative zero can't be represented as an int.
1885 src = significandParts();
1887 /* Step 1: place our absolute value, with any fraction truncated, in
1890 /* Our absolute value is less than one; truncate everything. */
1891 APInt::tcSet(parts, 0, dstPartsCount);
1892 /* For exponent -1 the integer bit represents .5, look at that.
1893 For smaller exponents leftmost truncated bit is 0. */
1894 truncatedBits = semantics->precision -1U - exponent;
1896 /* We want the most significant (exponent + 1) bits; the rest are
1898 unsigned int bits = exponent + 1U;
1900 /* Hopelessly large in magnitude? */
1904 if (bits < semantics->precision) {
1905 /* We truncate (semantics->precision - bits) bits. */
1906 truncatedBits = semantics->precision - bits;
1907 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1909 /* We want at least as many bits as are available. */
1910 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
1911 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
1916 /* Step 2: work out any lost fraction, and increment the absolute
1917 value if we would round away from zero. */
1918 if (truncatedBits) {
1919 lost_fraction = lostFractionThroughTruncation(src, partCount(),
1921 if (lost_fraction != lfExactlyZero
1922 && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
1923 if (APInt::tcIncrement(parts, dstPartsCount))
1924 return opInvalidOp; /* Overflow. */
1927 lost_fraction = lfExactlyZero;
1930 /* Step 3: check if we fit in the destination. */
1931 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
1935 /* Negative numbers cannot be represented as unsigned. */
1939 /* It takes omsb bits to represent the unsigned integer value.
1940 We lose a bit for the sign, but care is needed as the
1941 maximally negative integer is a special case. */
1942 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
1945 /* This case can happen because of rounding. */
1950 APInt::tcNegate (parts, dstPartsCount);
1952 if (omsb >= width + !isSigned)
1956 if (lost_fraction == lfExactlyZero) {
1963 /* Same as convertToSignExtendedInteger, except we provide
1964 deterministic values in case of an invalid operation exception,
1965 namely zero for NaNs and the minimal or maximal value respectively
1966 for underflow or overflow.
1967 The *isExact output tells whether the result is exact, in the sense
1968 that converting it back to the original floating point type produces
1969 the original value. This is almost equivalent to result==opOK,
1970 except for negative zeroes.
1973 APFloat::convertToInteger(integerPart *parts, unsigned int width,
1975 roundingMode rounding_mode, bool *isExact) const
1979 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
1982 if (fs == opInvalidOp) {
1983 unsigned int bits, dstPartsCount;
1985 dstPartsCount = partCountForBits(width);
1987 if (category == fcNaN)
1992 bits = width - isSigned;
1994 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
1995 if (sign && isSigned)
1996 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2002 /* Convert an unsigned integer SRC to a floating point number,
2003 rounding according to ROUNDING_MODE. The sign of the floating
2004 point number is not modified. */
2006 APFloat::convertFromUnsignedParts(const integerPart *src,
2007 unsigned int srcCount,
2008 roundingMode rounding_mode)
2010 unsigned int omsb, precision, dstCount;
2012 lostFraction lost_fraction;
2014 assertArithmeticOK(*semantics);
2015 category = fcNormal;
2016 omsb = APInt::tcMSB(src, srcCount) + 1;
2017 dst = significandParts();
2018 dstCount = partCount();
2019 precision = semantics->precision;
2021 /* We want the most significant PRECISON bits of SRC. There may not
2022 be that many; extract what we can. */
2023 if (precision <= omsb) {
2024 exponent = omsb - 1;
2025 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2027 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2029 exponent = precision - 1;
2030 lost_fraction = lfExactlyZero;
2031 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2034 return normalize(rounding_mode, lost_fraction);
2038 APFloat::convertFromAPInt(const APInt &Val,
2040 roundingMode rounding_mode)
2042 unsigned int partCount = Val.getNumWords();
2046 if (isSigned && api.isNegative()) {
2051 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2054 /* Convert a two's complement integer SRC to a floating point number,
2055 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2056 integer is signed, in which case it must be sign-extended. */
2058 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2059 unsigned int srcCount,
2061 roundingMode rounding_mode)
2065 assertArithmeticOK(*semantics);
2067 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2070 /* If we're signed and negative negate a copy. */
2072 copy = new integerPart[srcCount];
2073 APInt::tcAssign(copy, src, srcCount);
2074 APInt::tcNegate(copy, srcCount);
2075 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2079 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2085 /* FIXME: should this just take a const APInt reference? */
2087 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2088 unsigned int width, bool isSigned,
2089 roundingMode rounding_mode)
2091 unsigned int partCount = partCountForBits(width);
2092 APInt api = APInt(width, partCount, parts);
2095 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
2100 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2104 APFloat::convertFromHexadecimalString(const char *p,
2105 roundingMode rounding_mode)
2107 lostFraction lost_fraction;
2108 integerPart *significand;
2109 unsigned int bitPos, partsCount;
2110 const char *dot, *firstSignificantDigit;
2114 category = fcNormal;
2116 significand = significandParts();
2117 partsCount = partCount();
2118 bitPos = partsCount * integerPartWidth;
2120 /* Skip leading zeroes and any (hexa)decimal point. */
2121 p = skipLeadingZeroesAndAnyDot(p, &dot);
2122 firstSignificantDigit = p;
2125 integerPart hex_value;
2132 hex_value = hexDigitValue(*p);
2133 if(hex_value == -1U) {
2134 lost_fraction = lfExactlyZero;
2140 /* Store the number whilst 4-bit nibbles remain. */
2143 hex_value <<= bitPos % integerPartWidth;
2144 significand[bitPos / integerPartWidth] |= hex_value;
2146 lost_fraction = trailingHexadecimalFraction(p, hex_value);
2147 while(hexDigitValue(*p) != -1U)
2153 /* Hex floats require an exponent but not a hexadecimal point. */
2154 assert(*p == 'p' || *p == 'P');
2156 /* Ignore the exponent if we are zero. */
2157 if(p != firstSignificantDigit) {
2160 /* Implicit hexadecimal point? */
2164 /* Calculate the exponent adjustment implicit in the number of
2165 significant digits. */
2166 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2167 if(expAdjustment < 0)
2169 expAdjustment = expAdjustment * 4 - 1;
2171 /* Adjust for writing the significand starting at the most
2172 significant nibble. */
2173 expAdjustment += semantics->precision;
2174 expAdjustment -= partsCount * integerPartWidth;
2176 /* Adjust for the given exponent. */
2177 exponent = totalExponent(p, expAdjustment);
2180 return normalize(rounding_mode, lost_fraction);
2184 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2185 unsigned sigPartCount, int exp,
2186 roundingMode rounding_mode)
2188 unsigned int parts, pow5PartCount;
2189 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2190 integerPart pow5Parts[maxPowerOfFiveParts];
2193 isNearest = (rounding_mode == rmNearestTiesToEven
2194 || rounding_mode == rmNearestTiesToAway);
2196 parts = partCountForBits(semantics->precision + 11);
2198 /* Calculate pow(5, abs(exp)). */
2199 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2201 for (;; parts *= 2) {
2202 opStatus sigStatus, powStatus;
2203 unsigned int excessPrecision, truncatedBits;
2205 calcSemantics.precision = parts * integerPartWidth - 1;
2206 excessPrecision = calcSemantics.precision - semantics->precision;
2207 truncatedBits = excessPrecision;
2209 APFloat decSig(calcSemantics, fcZero, sign);
2210 APFloat pow5(calcSemantics, fcZero, false);
2212 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2213 rmNearestTiesToEven);
2214 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2215 rmNearestTiesToEven);
2216 /* Add exp, as 10^n = 5^n * 2^n. */
2217 decSig.exponent += exp;
2219 lostFraction calcLostFraction;
2220 integerPart HUerr, HUdistance;
2221 unsigned int powHUerr;
2224 /* multiplySignificand leaves the precision-th bit set to 1. */
2225 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2226 powHUerr = powStatus != opOK;
2228 calcLostFraction = decSig.divideSignificand(pow5);
2229 /* Denormal numbers have less precision. */
2230 if (decSig.exponent < semantics->minExponent) {
2231 excessPrecision += (semantics->minExponent - decSig.exponent);
2232 truncatedBits = excessPrecision;
2233 if (excessPrecision > calcSemantics.precision)
2234 excessPrecision = calcSemantics.precision;
2236 /* Extra half-ulp lost in reciprocal of exponent. */
2237 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2240 /* Both multiplySignificand and divideSignificand return the
2241 result with the integer bit set. */
2242 assert (APInt::tcExtractBit
2243 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2245 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2247 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2248 excessPrecision, isNearest);
2250 /* Are we guaranteed to round correctly if we truncate? */
2251 if (HUdistance >= HUerr) {
2252 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2253 calcSemantics.precision - excessPrecision,
2255 /* Take the exponent of decSig. If we tcExtract-ed less bits
2256 above we must adjust our exponent to compensate for the
2257 implicit right shift. */
2258 exponent = (decSig.exponent + semantics->precision
2259 - (calcSemantics.precision - excessPrecision));
2260 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2263 return normalize(rounding_mode, calcLostFraction);
2269 APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode)
2274 /* Scan the text. */
2275 interpretDecimal(p, &D);
2277 /* Handle the quick cases. First the case of no significant digits,
2278 i.e. zero, and then exponents that are obviously too large or too
2279 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2280 definitely overflows if
2282 (exp - 1) * L >= maxExponent
2284 and definitely underflows to zero where
2286 (exp + 1) * L <= minExponent - precision
2288 With integer arithmetic the tightest bounds for L are
2290 93/28 < L < 196/59 [ numerator <= 256 ]
2291 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2294 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2297 } else if ((D.normalizedExponent + 1) * 28738
2298 <= 8651 * (semantics->minExponent - (int) semantics->precision)) {
2299 /* Underflow to zero and round. */
2301 fs = normalize(rounding_mode, lfLessThanHalf);
2302 } else if ((D.normalizedExponent - 1) * 42039
2303 >= 12655 * semantics->maxExponent) {
2304 /* Overflow and round. */
2305 fs = handleOverflow(rounding_mode);
2307 integerPart *decSignificand;
2308 unsigned int partCount;
2310 /* A tight upper bound on number of bits required to hold an
2311 N-digit decimal integer is N * 196 / 59. Allocate enough space
2312 to hold the full significand, and an extra part required by
2314 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2315 partCount = partCountForBits(1 + 196 * partCount / 59);
2316 decSignificand = new integerPart[partCount + 1];
2319 /* Convert to binary efficiently - we do almost all multiplication
2320 in an integerPart. When this would overflow do we do a single
2321 bignum multiplication, and then revert again to multiplication
2322 in an integerPart. */
2324 integerPart decValue, val, multiplier;
2333 decValue = decDigitValue(*p++);
2335 val = val * 10 + decValue;
2336 /* The maximum number that can be multiplied by ten with any
2337 digit added without overflowing an integerPart. */
2338 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2340 /* Multiply out the current part. */
2341 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2342 partCount, partCount + 1, false);
2344 /* If we used another part (likely but not guaranteed), increase
2346 if (decSignificand[partCount])
2348 } while (p <= D.lastSigDigit);
2350 category = fcNormal;
2351 fs = roundSignificandWithExponent(decSignificand, partCount,
2352 D.exponent, rounding_mode);
2354 delete [] decSignificand;
2361 APFloat::convertFromString(const char *p, roundingMode rounding_mode)
2363 assertArithmeticOK(*semantics);
2365 /* Handle a leading minus sign. */
2371 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
2372 return convertFromHexadecimalString(p + 2, rounding_mode);
2374 return convertFromDecimalString(p, rounding_mode);
2377 /* Write out a hexadecimal representation of the floating point value
2378 to DST, which must be of sufficient size, in the C99 form
2379 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2380 excluding the terminating NUL.
2382 If UPPERCASE, the output is in upper case, otherwise in lower case.
2384 HEXDIGITS digits appear altogether, rounding the value if
2385 necessary. If HEXDIGITS is 0, the minimal precision to display the
2386 number precisely is used instead. If nothing would appear after
2387 the decimal point it is suppressed.
2389 The decimal exponent is always printed and has at least one digit.
2390 Zero values display an exponent of zero. Infinities and NaNs
2391 appear as "infinity" or "nan" respectively.
2393 The above rules are as specified by C99. There is ambiguity about
2394 what the leading hexadecimal digit should be. This implementation
2395 uses whatever is necessary so that the exponent is displayed as
2396 stored. This implies the exponent will fall within the IEEE format
2397 range, and the leading hexadecimal digit will be 0 (for denormals),
2398 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2399 any other digits zero).
2402 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2403 bool upperCase, roundingMode rounding_mode) const
2407 assertArithmeticOK(*semantics);
2415 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2416 dst += sizeof infinityL - 1;
2420 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2421 dst += sizeof NaNU - 1;
2426 *dst++ = upperCase ? 'X': 'x';
2428 if (hexDigits > 1) {
2430 memset (dst, '0', hexDigits - 1);
2431 dst += hexDigits - 1;
2433 *dst++ = upperCase ? 'P': 'p';
2438 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2444 return static_cast<unsigned int>(dst - p);
2447 /* Does the hard work of outputting the correctly rounded hexadecimal
2448 form of a normal floating point number with the specified number of
2449 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2450 digits necessary to print the value precisely is output. */
2452 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2454 roundingMode rounding_mode) const
2456 unsigned int count, valueBits, shift, partsCount, outputDigits;
2457 const char *hexDigitChars;
2458 const integerPart *significand;
2463 *dst++ = upperCase ? 'X': 'x';
2466 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2468 significand = significandParts();
2469 partsCount = partCount();
2471 /* +3 because the first digit only uses the single integer bit, so
2472 we have 3 virtual zero most-significant-bits. */
2473 valueBits = semantics->precision + 3;
2474 shift = integerPartWidth - valueBits % integerPartWidth;
2476 /* The natural number of digits required ignoring trailing
2477 insignificant zeroes. */
2478 outputDigits = (valueBits - significandLSB () + 3) / 4;
2480 /* hexDigits of zero means use the required number for the
2481 precision. Otherwise, see if we are truncating. If we are,
2482 find out if we need to round away from zero. */
2484 if (hexDigits < outputDigits) {
2485 /* We are dropping non-zero bits, so need to check how to round.
2486 "bits" is the number of dropped bits. */
2488 lostFraction fraction;
2490 bits = valueBits - hexDigits * 4;
2491 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2492 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2494 outputDigits = hexDigits;
2497 /* Write the digits consecutively, and start writing in the location
2498 of the hexadecimal point. We move the most significant digit
2499 left and add the hexadecimal point later. */
2502 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2504 while (outputDigits && count) {
2507 /* Put the most significant integerPartWidth bits in "part". */
2508 if (--count == partsCount)
2509 part = 0; /* An imaginary higher zero part. */
2511 part = significand[count] << shift;
2514 part |= significand[count - 1] >> (integerPartWidth - shift);
2516 /* Convert as much of "part" to hexdigits as we can. */
2517 unsigned int curDigits = integerPartWidth / 4;
2519 if (curDigits > outputDigits)
2520 curDigits = outputDigits;
2521 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2522 outputDigits -= curDigits;
2528 /* Note that hexDigitChars has a trailing '0'. */
2531 *q = hexDigitChars[hexDigitValue (*q) + 1];
2532 } while (*q == '0');
2535 /* Add trailing zeroes. */
2536 memset (dst, '0', outputDigits);
2537 dst += outputDigits;
2540 /* Move the most significant digit to before the point, and if there
2541 is something after the decimal point add it. This must come
2542 after rounding above. */
2549 /* Finally output the exponent. */
2550 *dst++ = upperCase ? 'P': 'p';
2552 return writeSignedDecimal (dst, exponent);
2555 // For good performance it is desirable for different APFloats
2556 // to produce different integers.
2558 APFloat::getHashValue() const
2560 if (category==fcZero) return sign<<8 | semantics->precision ;
2561 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2562 else if (category==fcNaN) return 1<<10 | semantics->precision;
2564 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2565 const integerPart* p = significandParts();
2566 for (int i=partCount(); i>0; i--, p++)
2567 hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
2572 // Conversion from APFloat to/from host float/double. It may eventually be
2573 // possible to eliminate these and have everybody deal with APFloats, but that
2574 // will take a while. This approach will not easily extend to long double.
2575 // Current implementation requires integerPartWidth==64, which is correct at
2576 // the moment but could be made more general.
2578 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2579 // the actual IEEE respresentations. We compensate for that here.
2582 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2584 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2585 assert (partCount()==2);
2587 uint64_t myexponent, mysignificand;
2589 if (category==fcNormal) {
2590 myexponent = exponent+16383; //bias
2591 mysignificand = significandParts()[0];
2592 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2593 myexponent = 0; // denormal
2594 } else if (category==fcZero) {
2597 } else if (category==fcInfinity) {
2598 myexponent = 0x7fff;
2599 mysignificand = 0x8000000000000000ULL;
2601 assert(category == fcNaN && "Unknown category");
2602 myexponent = 0x7fff;
2603 mysignificand = significandParts()[0];
2607 words[0] = ((uint64_t)(sign & 1) << 63) |
2608 ((myexponent & 0x7fffLL) << 48) |
2609 ((mysignificand >>16) & 0xffffffffffffLL);
2610 words[1] = mysignificand & 0xffff;
2611 return APInt(80, 2, words);
2615 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2617 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2618 assert (partCount()==2);
2620 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2622 if (category==fcNormal) {
2623 myexponent = exponent + 1023; //bias
2624 myexponent2 = exponent2 + 1023;
2625 mysignificand = significandParts()[0];
2626 mysignificand2 = significandParts()[1];
2627 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2628 myexponent = 0; // denormal
2629 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2630 myexponent2 = 0; // denormal
2631 } else if (category==fcZero) {
2636 } else if (category==fcInfinity) {
2642 assert(category == fcNaN && "Unknown category");
2644 mysignificand = significandParts()[0];
2645 myexponent2 = exponent2;
2646 mysignificand2 = significandParts()[1];
2650 words[0] = ((uint64_t)(sign & 1) << 63) |
2651 ((myexponent & 0x7ff) << 52) |
2652 (mysignificand & 0xfffffffffffffLL);
2653 words[1] = ((uint64_t)(sign2 & 1) << 63) |
2654 ((myexponent2 & 0x7ff) << 52) |
2655 (mysignificand2 & 0xfffffffffffffLL);
2656 return APInt(128, 2, words);
2660 APFloat::convertDoubleAPFloatToAPInt() const
2662 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2663 assert (partCount()==1);
2665 uint64_t myexponent, mysignificand;
2667 if (category==fcNormal) {
2668 myexponent = exponent+1023; //bias
2669 mysignificand = *significandParts();
2670 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2671 myexponent = 0; // denormal
2672 } else if (category==fcZero) {
2675 } else if (category==fcInfinity) {
2679 assert(category == fcNaN && "Unknown category!");
2681 mysignificand = *significandParts();
2684 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2685 ((myexponent & 0x7ff) << 52) |
2686 (mysignificand & 0xfffffffffffffLL))));
2690 APFloat::convertFloatAPFloatToAPInt() const
2692 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2693 assert (partCount()==1);
2695 uint32_t myexponent, mysignificand;
2697 if (category==fcNormal) {
2698 myexponent = exponent+127; //bias
2699 mysignificand = (uint32_t)*significandParts();
2700 if (myexponent == 1 && !(mysignificand & 0x800000))
2701 myexponent = 0; // denormal
2702 } else if (category==fcZero) {
2705 } else if (category==fcInfinity) {
2709 assert(category == fcNaN && "Unknown category!");
2711 mysignificand = (uint32_t)*significandParts();
2714 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2715 (mysignificand & 0x7fffff)));
2718 // This function creates an APInt that is just a bit map of the floating
2719 // point constant as it would appear in memory. It is not a conversion,
2720 // and treating the result as a normal integer is unlikely to be useful.
2723 APFloat::bitcastToAPInt() const
2725 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2726 return convertFloatAPFloatToAPInt();
2728 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2729 return convertDoubleAPFloatToAPInt();
2731 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2732 return convertPPCDoubleDoubleAPFloatToAPInt();
2734 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2736 return convertF80LongDoubleAPFloatToAPInt();
2740 APFloat::convertToFloat() const
2742 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2743 APInt api = bitcastToAPInt();
2744 return api.bitsToFloat();
2748 APFloat::convertToDouble() const
2750 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2751 APInt api = bitcastToAPInt();
2752 return api.bitsToDouble();
2755 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2756 /// does not support these bit patterns:
2757 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2758 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2759 /// exponent = 0, integer bit 1 ("pseudodenormal")
2760 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2761 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2763 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2765 assert(api.getBitWidth()==80);
2766 uint64_t i1 = api.getRawData()[0];
2767 uint64_t i2 = api.getRawData()[1];
2768 uint64_t myexponent = (i1 >> 48) & 0x7fff;
2769 uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) |
2772 initialize(&APFloat::x87DoubleExtended);
2773 assert(partCount()==2);
2775 sign = static_cast<unsigned int>(i1>>63);
2776 if (myexponent==0 && mysignificand==0) {
2777 // exponent, significand meaningless
2779 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2780 // exponent, significand meaningless
2781 category = fcInfinity;
2782 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2783 // exponent meaningless
2785 significandParts()[0] = mysignificand;
2786 significandParts()[1] = 0;
2788 category = fcNormal;
2789 exponent = myexponent - 16383;
2790 significandParts()[0] = mysignificand;
2791 significandParts()[1] = 0;
2792 if (myexponent==0) // denormal
2798 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
2800 assert(api.getBitWidth()==128);
2801 uint64_t i1 = api.getRawData()[0];
2802 uint64_t i2 = api.getRawData()[1];
2803 uint64_t myexponent = (i1 >> 52) & 0x7ff;
2804 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
2805 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
2806 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
2808 initialize(&APFloat::PPCDoubleDouble);
2809 assert(partCount()==2);
2811 sign = static_cast<unsigned int>(i1>>63);
2812 sign2 = static_cast<unsigned int>(i2>>63);
2813 if (myexponent==0 && mysignificand==0) {
2814 // exponent, significand meaningless
2815 // exponent2 and significand2 are required to be 0; we don't check
2817 } else if (myexponent==0x7ff && mysignificand==0) {
2818 // exponent, significand meaningless
2819 // exponent2 and significand2 are required to be 0; we don't check
2820 category = fcInfinity;
2821 } else if (myexponent==0x7ff && mysignificand!=0) {
2822 // exponent meaningless. So is the whole second word, but keep it
2825 exponent2 = myexponent2;
2826 significandParts()[0] = mysignificand;
2827 significandParts()[1] = mysignificand2;
2829 category = fcNormal;
2830 // Note there is no category2; the second word is treated as if it is
2831 // fcNormal, although it might be something else considered by itself.
2832 exponent = myexponent - 1023;
2833 exponent2 = myexponent2 - 1023;
2834 significandParts()[0] = mysignificand;
2835 significandParts()[1] = mysignificand2;
2836 if (myexponent==0) // denormal
2839 significandParts()[0] |= 0x10000000000000LL; // integer bit
2843 significandParts()[1] |= 0x10000000000000LL; // integer bit
2848 APFloat::initFromDoubleAPInt(const APInt &api)
2850 assert(api.getBitWidth()==64);
2851 uint64_t i = *api.getRawData();
2852 uint64_t myexponent = (i >> 52) & 0x7ff;
2853 uint64_t mysignificand = i & 0xfffffffffffffLL;
2855 initialize(&APFloat::IEEEdouble);
2856 assert(partCount()==1);
2858 sign = static_cast<unsigned int>(i>>63);
2859 if (myexponent==0 && mysignificand==0) {
2860 // exponent, significand meaningless
2862 } else if (myexponent==0x7ff && mysignificand==0) {
2863 // exponent, significand meaningless
2864 category = fcInfinity;
2865 } else if (myexponent==0x7ff && mysignificand!=0) {
2866 // exponent meaningless
2868 *significandParts() = mysignificand;
2870 category = fcNormal;
2871 exponent = myexponent - 1023;
2872 *significandParts() = mysignificand;
2873 if (myexponent==0) // denormal
2876 *significandParts() |= 0x10000000000000LL; // integer bit
2881 APFloat::initFromFloatAPInt(const APInt & api)
2883 assert(api.getBitWidth()==32);
2884 uint32_t i = (uint32_t)*api.getRawData();
2885 uint32_t myexponent = (i >> 23) & 0xff;
2886 uint32_t mysignificand = i & 0x7fffff;
2888 initialize(&APFloat::IEEEsingle);
2889 assert(partCount()==1);
2892 if (myexponent==0 && mysignificand==0) {
2893 // exponent, significand meaningless
2895 } else if (myexponent==0xff && mysignificand==0) {
2896 // exponent, significand meaningless
2897 category = fcInfinity;
2898 } else if (myexponent==0xff && mysignificand!=0) {
2899 // sign, exponent, significand meaningless
2901 *significandParts() = mysignificand;
2903 category = fcNormal;
2904 exponent = myexponent - 127; //bias
2905 *significandParts() = mysignificand;
2906 if (myexponent==0) // denormal
2909 *significandParts() |= 0x800000; // integer bit
2913 /// Treat api as containing the bits of a floating point number. Currently
2914 /// we infer the floating point type from the size of the APInt. The
2915 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
2916 /// when the size is anything else).
2918 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
2920 if (api.getBitWidth() == 32)
2921 return initFromFloatAPInt(api);
2922 else if (api.getBitWidth()==64)
2923 return initFromDoubleAPInt(api);
2924 else if (api.getBitWidth()==80)
2925 return initFromF80LongDoubleAPInt(api);
2926 else if (api.getBitWidth()==128 && !isIEEE)
2927 return initFromPPCDoubleDoubleAPInt(api);
2932 APFloat::APFloat(const APInt& api, bool isIEEE)
2934 initFromAPInt(api, isIEEE);
2937 APFloat::APFloat(float f)
2939 APInt api = APInt(32, 0);
2940 initFromAPInt(api.floatToBits(f));
2943 APFloat::APFloat(double d)
2945 APInt api = APInt(64, 0);
2946 initFromAPInt(api.doubleToBits(d));