1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/Support/MathExtras.h"
22 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
24 /* Assumed in hexadecimal significand parsing, and conversion to
25 hexadecimal strings. */
26 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
27 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
31 /* Represents floating point arithmetic semantics. */
33 /* The largest E such that 2^E is representable; this matches the
34 definition of IEEE 754. */
35 exponent_t maxExponent;
37 /* The smallest E such that 2^E is a normalized number; this
38 matches the definition of IEEE 754. */
39 exponent_t minExponent;
41 /* Number of bits in the significand. This includes the integer
43 unsigned int precision;
45 /* True if arithmetic is supported. */
46 unsigned int arithmeticOK;
49 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
50 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
51 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
52 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
53 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
55 // The PowerPC format consists of two doubles. It does not map cleanly
56 // onto the usual format above. For now only storage of constants of
57 // this type is supported, no arithmetic.
58 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
60 /* A tight upper bound on number of parts required to hold the value
63 power * 815 / (351 * integerPartWidth) + 1
65 However, whilst the result may require only this many parts,
66 because we are multiplying two values to get it, the
67 multiplication may require an extra part with the excess part
68 being zero (consider the trivial case of 1 * 1, tcFullMultiply
69 requires two parts to hold the single-part result). So we add an
70 extra one to guarantee enough space whilst multiplying. */
71 const unsigned int maxExponent = 16383;
72 const unsigned int maxPrecision = 113;
73 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
74 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
75 / (351 * integerPartWidth));
78 /* Put a bunch of private, handy routines in an anonymous namespace. */
81 static inline unsigned int
82 partCountForBits(unsigned int bits)
84 return ((bits) + integerPartWidth - 1) / integerPartWidth;
87 /* Returns 0U-9U. Return values >= 10U are not digits. */
88 static inline unsigned int
89 decDigitValue(unsigned int c)
95 hexDigitValue(unsigned int c)
115 assertArithmeticOK(const llvm::fltSemantics &semantics) {
116 assert(semantics.arithmeticOK
117 && "Compile-time arithmetic does not support these semantics");
120 /* Return the value of a decimal exponent of the form
123 If the exponent overflows, returns a large exponent with the
126 readExponent(const char *p)
129 unsigned int absExponent;
130 const unsigned int overlargeExponent = 24000; /* FIXME. */
132 isNegative = (*p == '-');
133 if (*p == '-' || *p == '+')
136 absExponent = decDigitValue(*p++);
137 assert (absExponent < 10U);
142 value = decDigitValue(*p);
147 value += absExponent * 10;
148 if (absExponent >= overlargeExponent) {
149 absExponent = overlargeExponent;
156 return -(int) absExponent;
158 return (int) absExponent;
161 /* This is ugly and needs cleaning up, but I don't immediately see
162 how whilst remaining safe. */
164 totalExponent(const char *p, int exponentAdjustment)
166 int unsignedExponent;
167 bool negative, overflow;
170 /* Move past the exponent letter and sign to the digits. */
172 negative = *p == '-';
173 if(*p == '-' || *p == '+')
176 unsignedExponent = 0;
181 value = decDigitValue(*p);
186 unsignedExponent = unsignedExponent * 10 + value;
187 if(unsignedExponent > 65535)
191 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
195 exponent = unsignedExponent;
197 exponent = -exponent;
198 exponent += exponentAdjustment;
199 if(exponent > 65535 || exponent < -65536)
204 exponent = negative ? -65536: 65535;
210 skipLeadingZeroesAndAnyDot(const char *p, const char **dot)
225 /* Given a normal decimal floating point number of the form
229 where the decimal point and exponent are optional, fill out the
230 structure D. Exponent is appropriate if the significand is
231 treated as an integer, and normalizedExponent if the significand
232 is taken to have the decimal point after a single leading
235 If the value is zero, V->firstSigDigit points to a non-digit, and
236 the return exponent is zero.
239 const char *firstSigDigit;
240 const char *lastSigDigit;
242 int normalizedExponent;
246 interpretDecimal(const char *p, decimalInfo *D)
250 p = skipLeadingZeroesAndAnyDot (p, &dot);
252 D->firstSigDigit = p;
254 D->normalizedExponent = 0;
261 if (decDigitValue(*p) >= 10U)
266 /* If number is all zerooes accept any exponent. */
267 if (p != D->firstSigDigit) {
268 if (*p == 'e' || *p == 'E')
269 D->exponent = readExponent(p + 1);
271 /* Implied decimal point? */
275 /* Drop insignificant trailing zeroes. */
282 /* Adjust the exponents for any decimal point. */
283 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
284 D->normalizedExponent = (D->exponent +
285 static_cast<exponent_t>((p - D->firstSigDigit)
286 - (dot > D->firstSigDigit && dot < p)));
292 /* Return the trailing fraction of a hexadecimal number.
293 DIGITVALUE is the first hex digit of the fraction, P points to
296 trailingHexadecimalFraction(const char *p, unsigned int digitValue)
298 unsigned int hexDigit;
300 /* If the first trailing digit isn't 0 or 8 we can work out the
301 fraction immediately. */
303 return lfMoreThanHalf;
304 else if(digitValue < 8 && digitValue > 0)
305 return lfLessThanHalf;
307 /* Otherwise we need to find the first non-zero digit. */
311 hexDigit = hexDigitValue(*p);
313 /* If we ran off the end it is exactly zero or one-half, otherwise
316 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
318 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
321 /* Return the fraction lost were a bignum truncated losing the least
322 significant BITS bits. */
324 lostFractionThroughTruncation(const integerPart *parts,
325 unsigned int partCount,
330 lsb = APInt::tcLSB(parts, partCount);
332 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
334 return lfExactlyZero;
336 return lfExactlyHalf;
337 if(bits <= partCount * integerPartWidth
338 && APInt::tcExtractBit(parts, bits - 1))
339 return lfMoreThanHalf;
341 return lfLessThanHalf;
344 /* Shift DST right BITS bits noting lost fraction. */
346 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
348 lostFraction lost_fraction;
350 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
352 APInt::tcShiftRight(dst, parts, bits);
354 return lost_fraction;
357 /* Combine the effect of two lost fractions. */
359 combineLostFractions(lostFraction moreSignificant,
360 lostFraction lessSignificant)
362 if(lessSignificant != lfExactlyZero) {
363 if(moreSignificant == lfExactlyZero)
364 moreSignificant = lfLessThanHalf;
365 else if(moreSignificant == lfExactlyHalf)
366 moreSignificant = lfMoreThanHalf;
369 return moreSignificant;
372 /* The error from the true value, in half-ulps, on multiplying two
373 floating point numbers, which differ from the value they
374 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
375 than the returned value.
377 See "How to Read Floating Point Numbers Accurately" by William D
380 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
382 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
384 if (HUerr1 + HUerr2 == 0)
385 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
387 return inexactMultiply + 2 * (HUerr1 + HUerr2);
390 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
391 when the least significant BITS are truncated. BITS cannot be
394 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
396 unsigned int count, partBits;
397 integerPart part, boundary;
402 count = bits / integerPartWidth;
403 partBits = bits % integerPartWidth + 1;
405 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
408 boundary = (integerPart) 1 << (partBits - 1);
413 if (part - boundary <= boundary - part)
414 return part - boundary;
416 return boundary - part;
419 if (part == boundary) {
422 return ~(integerPart) 0; /* A lot. */
425 } else if (part == boundary - 1) {
428 return ~(integerPart) 0; /* A lot. */
433 return ~(integerPart) 0; /* A lot. */
436 /* Place pow(5, power) in DST, and return the number of parts used.
437 DST must be at least one part larger than size of the answer. */
439 powerOf5(integerPart *dst, unsigned int power)
441 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
443 static integerPart pow5s[maxPowerOfFiveParts * 2 + 5] = { 78125 * 5 };
444 static unsigned int partsCount[16] = { 1 };
446 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
449 assert(power <= maxExponent);
454 *p1 = firstEightPowers[power & 7];
460 for (unsigned int n = 0; power; power >>= 1, n++) {
465 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
467 pc = partsCount[n - 1];
468 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
470 if (pow5[pc - 1] == 0)
478 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
480 if (p2[result - 1] == 0)
483 /* Now result is in p1 with partsCount parts and p2 is scratch
485 tmp = p1, p1 = p2, p2 = tmp;
492 APInt::tcAssign(dst, p1, result);
497 /* Zero at the end to avoid modular arithmetic when adding one; used
498 when rounding up during hexadecimal output. */
499 static const char hexDigitsLower[] = "0123456789abcdef0";
500 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
501 static const char infinityL[] = "infinity";
502 static const char infinityU[] = "INFINITY";
503 static const char NaNL[] = "nan";
504 static const char NaNU[] = "NAN";
506 /* Write out an integerPart in hexadecimal, starting with the most
507 significant nibble. Write out exactly COUNT hexdigits, return
510 partAsHex (char *dst, integerPart part, unsigned int count,
511 const char *hexDigitChars)
513 unsigned int result = count;
515 assert (count != 0 && count <= integerPartWidth / 4);
517 part >>= (integerPartWidth - 4 * count);
519 dst[count] = hexDigitChars[part & 0xf];
526 /* Write out an unsigned decimal integer. */
528 writeUnsignedDecimal (char *dst, unsigned int n)
544 /* Write out a signed decimal integer. */
546 writeSignedDecimal (char *dst, int value)
550 dst = writeUnsignedDecimal(dst, -(unsigned) value);
552 dst = writeUnsignedDecimal(dst, value);
560 APFloat::initialize(const fltSemantics *ourSemantics)
564 semantics = ourSemantics;
567 significand.parts = new integerPart[count];
571 APFloat::freeSignificand()
574 delete [] significand.parts;
578 APFloat::assign(const APFloat &rhs)
580 assert(semantics == rhs.semantics);
583 category = rhs.category;
584 exponent = rhs.exponent;
586 exponent2 = rhs.exponent2;
587 if(category == fcNormal || category == fcNaN)
588 copySignificand(rhs);
592 APFloat::copySignificand(const APFloat &rhs)
594 assert(category == fcNormal || category == fcNaN);
595 assert(rhs.partCount() >= partCount());
597 APInt::tcAssign(significandParts(), rhs.significandParts(),
601 /* Make this number a NaN, with an arbitrary but deterministic value
602 for the significand. If double or longer, this is a signalling NaN,
603 which may not be ideal. */
605 APFloat::makeNaN(void)
608 APInt::tcSet(significandParts(), ~0U, partCount());
612 APFloat::operator=(const APFloat &rhs)
615 if(semantics != rhs.semantics) {
617 initialize(rhs.semantics);
626 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
629 if (semantics != rhs.semantics ||
630 category != rhs.category ||
633 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
636 if (category==fcZero || category==fcInfinity)
638 else if (category==fcNormal && exponent!=rhs.exponent)
640 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
641 exponent2!=rhs.exponent2)
645 const integerPart* p=significandParts();
646 const integerPart* q=rhs.significandParts();
647 for (; i>0; i--, p++, q++) {
655 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
657 assertArithmeticOK(ourSemantics);
658 initialize(&ourSemantics);
661 exponent = ourSemantics.precision - 1;
662 significandParts()[0] = value;
663 normalize(rmNearestTiesToEven, lfExactlyZero);
666 APFloat::APFloat(const fltSemantics &ourSemantics,
667 fltCategory ourCategory, bool negative)
669 assertArithmeticOK(ourSemantics);
670 initialize(&ourSemantics);
671 category = ourCategory;
673 if(category == fcNormal)
675 else if (ourCategory == fcNaN)
679 APFloat::APFloat(const fltSemantics &ourSemantics, const char *text)
681 assertArithmeticOK(ourSemantics);
682 initialize(&ourSemantics);
683 convertFromString(text, rmNearestTiesToEven);
686 APFloat::APFloat(const APFloat &rhs)
688 initialize(rhs.semantics);
697 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
698 void APFloat::Profile(FoldingSetNodeID& ID) const {
699 ID.Add(bitcastToAPInt());
703 APFloat::partCount() const
705 return partCountForBits(semantics->precision + 1);
709 APFloat::semanticsPrecision(const fltSemantics &semantics)
711 return semantics.precision;
715 APFloat::significandParts() const
717 return const_cast<APFloat *>(this)->significandParts();
721 APFloat::significandParts()
723 assert(category == fcNormal || category == fcNaN);
726 return significand.parts;
728 return &significand.part;
732 APFloat::zeroSignificand()
735 APInt::tcSet(significandParts(), 0, partCount());
738 /* Increment an fcNormal floating point number's significand. */
740 APFloat::incrementSignificand()
744 carry = APInt::tcIncrement(significandParts(), partCount());
746 /* Our callers should never cause us to overflow. */
750 /* Add the significand of the RHS. Returns the carry flag. */
752 APFloat::addSignificand(const APFloat &rhs)
756 parts = significandParts();
758 assert(semantics == rhs.semantics);
759 assert(exponent == rhs.exponent);
761 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
764 /* Subtract the significand of the RHS with a borrow flag. Returns
767 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
771 parts = significandParts();
773 assert(semantics == rhs.semantics);
774 assert(exponent == rhs.exponent);
776 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
780 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
781 on to the full-precision result of the multiplication. Returns the
784 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
786 unsigned int omsb; // One, not zero, based MSB.
787 unsigned int partsCount, newPartsCount, precision;
788 integerPart *lhsSignificand;
789 integerPart scratch[4];
790 integerPart *fullSignificand;
791 lostFraction lost_fraction;
794 assert(semantics == rhs.semantics);
796 precision = semantics->precision;
797 newPartsCount = partCountForBits(precision * 2);
799 if(newPartsCount > 4)
800 fullSignificand = new integerPart[newPartsCount];
802 fullSignificand = scratch;
804 lhsSignificand = significandParts();
805 partsCount = partCount();
807 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
808 rhs.significandParts(), partsCount, partsCount);
810 lost_fraction = lfExactlyZero;
811 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
812 exponent += rhs.exponent;
815 Significand savedSignificand = significand;
816 const fltSemantics *savedSemantics = semantics;
817 fltSemantics extendedSemantics;
819 unsigned int extendedPrecision;
821 /* Normalize our MSB. */
822 extendedPrecision = precision + precision - 1;
823 if(omsb != extendedPrecision)
825 APInt::tcShiftLeft(fullSignificand, newPartsCount,
826 extendedPrecision - omsb);
827 exponent -= extendedPrecision - omsb;
830 /* Create new semantics. */
831 extendedSemantics = *semantics;
832 extendedSemantics.precision = extendedPrecision;
834 if(newPartsCount == 1)
835 significand.part = fullSignificand[0];
837 significand.parts = fullSignificand;
838 semantics = &extendedSemantics;
840 APFloat extendedAddend(*addend);
841 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
842 assert(status == opOK);
843 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
845 /* Restore our state. */
846 if(newPartsCount == 1)
847 fullSignificand[0] = significand.part;
848 significand = savedSignificand;
849 semantics = savedSemantics;
851 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
854 exponent -= (precision - 1);
856 if(omsb > precision) {
857 unsigned int bits, significantParts;
860 bits = omsb - precision;
861 significantParts = partCountForBits(omsb);
862 lf = shiftRight(fullSignificand, significantParts, bits);
863 lost_fraction = combineLostFractions(lf, lost_fraction);
867 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
869 if(newPartsCount > 4)
870 delete [] fullSignificand;
872 return lost_fraction;
875 /* Multiply the significands of LHS and RHS to DST. */
877 APFloat::divideSignificand(const APFloat &rhs)
879 unsigned int bit, i, partsCount;
880 const integerPart *rhsSignificand;
881 integerPart *lhsSignificand, *dividend, *divisor;
882 integerPart scratch[4];
883 lostFraction lost_fraction;
885 assert(semantics == rhs.semantics);
887 lhsSignificand = significandParts();
888 rhsSignificand = rhs.significandParts();
889 partsCount = partCount();
892 dividend = new integerPart[partsCount * 2];
896 divisor = dividend + partsCount;
898 /* Copy the dividend and divisor as they will be modified in-place. */
899 for(i = 0; i < partsCount; i++) {
900 dividend[i] = lhsSignificand[i];
901 divisor[i] = rhsSignificand[i];
902 lhsSignificand[i] = 0;
905 exponent -= rhs.exponent;
907 unsigned int precision = semantics->precision;
909 /* Normalize the divisor. */
910 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
913 APInt::tcShiftLeft(divisor, partsCount, bit);
916 /* Normalize the dividend. */
917 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
920 APInt::tcShiftLeft(dividend, partsCount, bit);
923 /* Ensure the dividend >= divisor initially for the loop below.
924 Incidentally, this means that the division loop below is
925 guaranteed to set the integer bit to one. */
926 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
928 APInt::tcShiftLeft(dividend, partsCount, 1);
929 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
933 for(bit = precision; bit; bit -= 1) {
934 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
935 APInt::tcSubtract(dividend, divisor, 0, partsCount);
936 APInt::tcSetBit(lhsSignificand, bit - 1);
939 APInt::tcShiftLeft(dividend, partsCount, 1);
942 /* Figure out the lost fraction. */
943 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
946 lost_fraction = lfMoreThanHalf;
948 lost_fraction = lfExactlyHalf;
949 else if(APInt::tcIsZero(dividend, partsCount))
950 lost_fraction = lfExactlyZero;
952 lost_fraction = lfLessThanHalf;
957 return lost_fraction;
961 APFloat::significandMSB() const
963 return APInt::tcMSB(significandParts(), partCount());
967 APFloat::significandLSB() const
969 return APInt::tcLSB(significandParts(), partCount());
972 /* Note that a zero result is NOT normalized to fcZero. */
974 APFloat::shiftSignificandRight(unsigned int bits)
976 /* Our exponent should not overflow. */
977 assert((exponent_t) (exponent + bits) >= exponent);
981 return shiftRight(significandParts(), partCount(), bits);
984 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
986 APFloat::shiftSignificandLeft(unsigned int bits)
988 assert(bits < semantics->precision);
991 unsigned int partsCount = partCount();
993 APInt::tcShiftLeft(significandParts(), partsCount, bits);
996 assert(!APInt::tcIsZero(significandParts(), partsCount));
1001 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1005 assert(semantics == rhs.semantics);
1006 assert(category == fcNormal);
1007 assert(rhs.category == fcNormal);
1009 compare = exponent - rhs.exponent;
1011 /* If exponents are equal, do an unsigned bignum comparison of the
1014 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1018 return cmpGreaterThan;
1019 else if(compare < 0)
1025 /* Handle overflow. Sign is preserved. We either become infinity or
1026 the largest finite number. */
1028 APFloat::handleOverflow(roundingMode rounding_mode)
1031 if(rounding_mode == rmNearestTiesToEven
1032 || rounding_mode == rmNearestTiesToAway
1033 || (rounding_mode == rmTowardPositive && !sign)
1034 || (rounding_mode == rmTowardNegative && sign))
1036 category = fcInfinity;
1037 return (opStatus) (opOverflow | opInexact);
1040 /* Otherwise we become the largest finite number. */
1041 category = fcNormal;
1042 exponent = semantics->maxExponent;
1043 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1044 semantics->precision);
1049 /* Returns TRUE if, when truncating the current number, with BIT the
1050 new LSB, with the given lost fraction and rounding mode, the result
1051 would need to be rounded away from zero (i.e., by increasing the
1052 signficand). This routine must work for fcZero of both signs, and
1053 fcNormal numbers. */
1055 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1056 lostFraction lost_fraction,
1057 unsigned int bit) const
1059 /* NaNs and infinities should not have lost fractions. */
1060 assert(category == fcNormal || category == fcZero);
1062 /* Current callers never pass this so we don't handle it. */
1063 assert(lost_fraction != lfExactlyZero);
1065 switch(rounding_mode) {
1069 case rmNearestTiesToAway:
1070 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1072 case rmNearestTiesToEven:
1073 if(lost_fraction == lfMoreThanHalf)
1076 /* Our zeroes don't have a significand to test. */
1077 if(lost_fraction == lfExactlyHalf && category != fcZero)
1078 return APInt::tcExtractBit(significandParts(), bit);
1085 case rmTowardPositive:
1086 return sign == false;
1088 case rmTowardNegative:
1089 return sign == true;
1094 APFloat::normalize(roundingMode rounding_mode,
1095 lostFraction lost_fraction)
1097 unsigned int omsb; /* One, not zero, based MSB. */
1100 if(category != fcNormal)
1103 /* Before rounding normalize the exponent of fcNormal numbers. */
1104 omsb = significandMSB() + 1;
1107 /* OMSB is numbered from 1. We want to place it in the integer
1108 bit numbered PRECISON if possible, with a compensating change in
1110 exponentChange = omsb - semantics->precision;
1112 /* If the resulting exponent is too high, overflow according to
1113 the rounding mode. */
1114 if(exponent + exponentChange > semantics->maxExponent)
1115 return handleOverflow(rounding_mode);
1117 /* Subnormal numbers have exponent minExponent, and their MSB
1118 is forced based on that. */
1119 if(exponent + exponentChange < semantics->minExponent)
1120 exponentChange = semantics->minExponent - exponent;
1122 /* Shifting left is easy as we don't lose precision. */
1123 if(exponentChange < 0) {
1124 assert(lost_fraction == lfExactlyZero);
1126 shiftSignificandLeft(-exponentChange);
1131 if(exponentChange > 0) {
1134 /* Shift right and capture any new lost fraction. */
1135 lf = shiftSignificandRight(exponentChange);
1137 lost_fraction = combineLostFractions(lf, lost_fraction);
1139 /* Keep OMSB up-to-date. */
1140 if(omsb > (unsigned) exponentChange)
1141 omsb -= exponentChange;
1147 /* Now round the number according to rounding_mode given the lost
1150 /* As specified in IEEE 754, since we do not trap we do not report
1151 underflow for exact results. */
1152 if(lost_fraction == lfExactlyZero) {
1153 /* Canonicalize zeroes. */
1160 /* Increment the significand if we're rounding away from zero. */
1161 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1163 exponent = semantics->minExponent;
1165 incrementSignificand();
1166 omsb = significandMSB() + 1;
1168 /* Did the significand increment overflow? */
1169 if(omsb == (unsigned) semantics->precision + 1) {
1170 /* Renormalize by incrementing the exponent and shifting our
1171 significand right one. However if we already have the
1172 maximum exponent we overflow to infinity. */
1173 if(exponent == semantics->maxExponent) {
1174 category = fcInfinity;
1176 return (opStatus) (opOverflow | opInexact);
1179 shiftSignificandRight(1);
1185 /* The normal case - we were and are not denormal, and any
1186 significand increment above didn't overflow. */
1187 if(omsb == semantics->precision)
1190 /* We have a non-zero denormal. */
1191 assert(omsb < semantics->precision);
1193 /* Canonicalize zeroes. */
1197 /* The fcZero case is a denormal that underflowed to zero. */
1198 return (opStatus) (opUnderflow | opInexact);
1202 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1204 switch(convolve(category, rhs.category)) {
1208 case convolve(fcNaN, fcZero):
1209 case convolve(fcNaN, fcNormal):
1210 case convolve(fcNaN, fcInfinity):
1211 case convolve(fcNaN, fcNaN):
1212 case convolve(fcNormal, fcZero):
1213 case convolve(fcInfinity, fcNormal):
1214 case convolve(fcInfinity, fcZero):
1217 case convolve(fcZero, fcNaN):
1218 case convolve(fcNormal, fcNaN):
1219 case convolve(fcInfinity, fcNaN):
1221 copySignificand(rhs);
1224 case convolve(fcNormal, fcInfinity):
1225 case convolve(fcZero, fcInfinity):
1226 category = fcInfinity;
1227 sign = rhs.sign ^ subtract;
1230 case convolve(fcZero, fcNormal):
1232 sign = rhs.sign ^ subtract;
1235 case convolve(fcZero, fcZero):
1236 /* Sign depends on rounding mode; handled by caller. */
1239 case convolve(fcInfinity, fcInfinity):
1240 /* Differently signed infinities can only be validly
1242 if(((sign ^ rhs.sign)!=0) != subtract) {
1249 case convolve(fcNormal, fcNormal):
1254 /* Add or subtract two normal numbers. */
1256 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1259 lostFraction lost_fraction;
1262 /* Determine if the operation on the absolute values is effectively
1263 an addition or subtraction. */
1264 subtract ^= (sign ^ rhs.sign) ? true : false;
1266 /* Are we bigger exponent-wise than the RHS? */
1267 bits = exponent - rhs.exponent;
1269 /* Subtraction is more subtle than one might naively expect. */
1271 APFloat temp_rhs(rhs);
1275 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1276 lost_fraction = lfExactlyZero;
1277 } else if (bits > 0) {
1278 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1279 shiftSignificandLeft(1);
1282 lost_fraction = shiftSignificandRight(-bits - 1);
1283 temp_rhs.shiftSignificandLeft(1);
1288 carry = temp_rhs.subtractSignificand
1289 (*this, lost_fraction != lfExactlyZero);
1290 copySignificand(temp_rhs);
1293 carry = subtractSignificand
1294 (temp_rhs, lost_fraction != lfExactlyZero);
1297 /* Invert the lost fraction - it was on the RHS and
1299 if(lost_fraction == lfLessThanHalf)
1300 lost_fraction = lfMoreThanHalf;
1301 else if(lost_fraction == lfMoreThanHalf)
1302 lost_fraction = lfLessThanHalf;
1304 /* The code above is intended to ensure that no borrow is
1309 APFloat temp_rhs(rhs);
1311 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1312 carry = addSignificand(temp_rhs);
1314 lost_fraction = shiftSignificandRight(-bits);
1315 carry = addSignificand(rhs);
1318 /* We have a guard bit; generating a carry cannot happen. */
1322 return lost_fraction;
1326 APFloat::multiplySpecials(const APFloat &rhs)
1328 switch(convolve(category, rhs.category)) {
1332 case convolve(fcNaN, fcZero):
1333 case convolve(fcNaN, fcNormal):
1334 case convolve(fcNaN, fcInfinity):
1335 case convolve(fcNaN, fcNaN):
1338 case convolve(fcZero, fcNaN):
1339 case convolve(fcNormal, fcNaN):
1340 case convolve(fcInfinity, fcNaN):
1342 copySignificand(rhs);
1345 case convolve(fcNormal, fcInfinity):
1346 case convolve(fcInfinity, fcNormal):
1347 case convolve(fcInfinity, fcInfinity):
1348 category = fcInfinity;
1351 case convolve(fcZero, fcNormal):
1352 case convolve(fcNormal, fcZero):
1353 case convolve(fcZero, fcZero):
1357 case convolve(fcZero, fcInfinity):
1358 case convolve(fcInfinity, fcZero):
1362 case convolve(fcNormal, fcNormal):
1368 APFloat::divideSpecials(const APFloat &rhs)
1370 switch(convolve(category, rhs.category)) {
1374 case convolve(fcNaN, fcZero):
1375 case convolve(fcNaN, fcNormal):
1376 case convolve(fcNaN, fcInfinity):
1377 case convolve(fcNaN, fcNaN):
1378 case convolve(fcInfinity, fcZero):
1379 case convolve(fcInfinity, fcNormal):
1380 case convolve(fcZero, fcInfinity):
1381 case convolve(fcZero, fcNormal):
1384 case convolve(fcZero, fcNaN):
1385 case convolve(fcNormal, fcNaN):
1386 case convolve(fcInfinity, fcNaN):
1388 copySignificand(rhs);
1391 case convolve(fcNormal, fcInfinity):
1395 case convolve(fcNormal, fcZero):
1396 category = fcInfinity;
1399 case convolve(fcInfinity, fcInfinity):
1400 case convolve(fcZero, fcZero):
1404 case convolve(fcNormal, fcNormal):
1410 APFloat::modSpecials(const APFloat &rhs)
1412 switch(convolve(category, rhs.category)) {
1416 case convolve(fcNaN, fcZero):
1417 case convolve(fcNaN, fcNormal):
1418 case convolve(fcNaN, fcInfinity):
1419 case convolve(fcNaN, fcNaN):
1420 case convolve(fcZero, fcInfinity):
1421 case convolve(fcZero, fcNormal):
1422 case convolve(fcNormal, fcInfinity):
1425 case convolve(fcZero, fcNaN):
1426 case convolve(fcNormal, fcNaN):
1427 case convolve(fcInfinity, fcNaN):
1429 copySignificand(rhs);
1432 case convolve(fcNormal, fcZero):
1433 case convolve(fcInfinity, fcZero):
1434 case convolve(fcInfinity, fcNormal):
1435 case convolve(fcInfinity, fcInfinity):
1436 case convolve(fcZero, fcZero):
1440 case convolve(fcNormal, fcNormal):
1447 APFloat::changeSign()
1449 /* Look mummy, this one's easy. */
1454 APFloat::clearSign()
1456 /* So is this one. */
1461 APFloat::copySign(const APFloat &rhs)
1467 /* Normalized addition or subtraction. */
1469 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1474 assertArithmeticOK(*semantics);
1476 fs = addOrSubtractSpecials(rhs, subtract);
1478 /* This return code means it was not a simple case. */
1479 if(fs == opDivByZero) {
1480 lostFraction lost_fraction;
1482 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1483 fs = normalize(rounding_mode, lost_fraction);
1485 /* Can only be zero if we lost no fraction. */
1486 assert(category != fcZero || lost_fraction == lfExactlyZero);
1489 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1490 positive zero unless rounding to minus infinity, except that
1491 adding two like-signed zeroes gives that zero. */
1492 if(category == fcZero) {
1493 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1494 sign = (rounding_mode == rmTowardNegative);
1500 /* Normalized addition. */
1502 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1504 return addOrSubtract(rhs, rounding_mode, false);
1507 /* Normalized subtraction. */
1509 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1511 return addOrSubtract(rhs, rounding_mode, true);
1514 /* Normalized multiply. */
1516 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1520 assertArithmeticOK(*semantics);
1522 fs = multiplySpecials(rhs);
1524 if(category == fcNormal) {
1525 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1526 fs = normalize(rounding_mode, lost_fraction);
1527 if(lost_fraction != lfExactlyZero)
1528 fs = (opStatus) (fs | opInexact);
1534 /* Normalized divide. */
1536 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1540 assertArithmeticOK(*semantics);
1542 fs = divideSpecials(rhs);
1544 if(category == fcNormal) {
1545 lostFraction lost_fraction = divideSignificand(rhs);
1546 fs = normalize(rounding_mode, lost_fraction);
1547 if(lost_fraction != lfExactlyZero)
1548 fs = (opStatus) (fs | opInexact);
1554 /* Normalized remainder. This is not currently correct in all cases. */
1556 APFloat::remainder(const APFloat &rhs)
1560 unsigned int origSign = sign;
1562 assertArithmeticOK(*semantics);
1563 fs = V.divide(rhs, rmNearestTiesToEven);
1564 if (fs == opDivByZero)
1567 int parts = partCount();
1568 integerPart *x = new integerPart[parts];
1570 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1571 rmNearestTiesToEven, &ignored);
1572 if (fs==opInvalidOp)
1575 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1576 rmNearestTiesToEven);
1577 assert(fs==opOK); // should always work
1579 fs = V.multiply(rhs, rmNearestTiesToEven);
1580 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1582 fs = subtract(V, rmNearestTiesToEven);
1583 assert(fs==opOK || fs==opInexact); // likewise
1586 sign = origSign; // IEEE754 requires this
1591 /* Normalized llvm frem (C fmod).
1592 This is not currently correct in all cases. */
1594 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1597 assertArithmeticOK(*semantics);
1598 fs = modSpecials(rhs);
1600 if (category == fcNormal && rhs.category == fcNormal) {
1602 unsigned int origSign = sign;
1604 fs = V.divide(rhs, rmNearestTiesToEven);
1605 if (fs == opDivByZero)
1608 int parts = partCount();
1609 integerPart *x = new integerPart[parts];
1611 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1612 rmTowardZero, &ignored);
1613 if (fs==opInvalidOp)
1616 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1617 rmNearestTiesToEven);
1618 assert(fs==opOK); // should always work
1620 fs = V.multiply(rhs, rounding_mode);
1621 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1623 fs = subtract(V, rounding_mode);
1624 assert(fs==opOK || fs==opInexact); // likewise
1627 sign = origSign; // IEEE754 requires this
1633 /* Normalized fused-multiply-add. */
1635 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1636 const APFloat &addend,
1637 roundingMode rounding_mode)
1641 assertArithmeticOK(*semantics);
1643 /* Post-multiplication sign, before addition. */
1644 sign ^= multiplicand.sign;
1646 /* If and only if all arguments are normal do we need to do an
1647 extended-precision calculation. */
1648 if(category == fcNormal
1649 && multiplicand.category == fcNormal
1650 && addend.category == fcNormal) {
1651 lostFraction lost_fraction;
1653 lost_fraction = multiplySignificand(multiplicand, &addend);
1654 fs = normalize(rounding_mode, lost_fraction);
1655 if(lost_fraction != lfExactlyZero)
1656 fs = (opStatus) (fs | opInexact);
1658 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1659 positive zero unless rounding to minus infinity, except that
1660 adding two like-signed zeroes gives that zero. */
1661 if(category == fcZero && sign != addend.sign)
1662 sign = (rounding_mode == rmTowardNegative);
1664 fs = multiplySpecials(multiplicand);
1666 /* FS can only be opOK or opInvalidOp. There is no more work
1667 to do in the latter case. The IEEE-754R standard says it is
1668 implementation-defined in this case whether, if ADDEND is a
1669 quiet NaN, we raise invalid op; this implementation does so.
1671 If we need to do the addition we can do so with normal
1674 fs = addOrSubtract(addend, rounding_mode, false);
1680 /* Comparison requires normalized numbers. */
1682 APFloat::compare(const APFloat &rhs) const
1686 assertArithmeticOK(*semantics);
1687 assert(semantics == rhs.semantics);
1689 switch(convolve(category, rhs.category)) {
1693 case convolve(fcNaN, fcZero):
1694 case convolve(fcNaN, fcNormal):
1695 case convolve(fcNaN, fcInfinity):
1696 case convolve(fcNaN, fcNaN):
1697 case convolve(fcZero, fcNaN):
1698 case convolve(fcNormal, fcNaN):
1699 case convolve(fcInfinity, fcNaN):
1700 return cmpUnordered;
1702 case convolve(fcInfinity, fcNormal):
1703 case convolve(fcInfinity, fcZero):
1704 case convolve(fcNormal, fcZero):
1708 return cmpGreaterThan;
1710 case convolve(fcNormal, fcInfinity):
1711 case convolve(fcZero, fcInfinity):
1712 case convolve(fcZero, fcNormal):
1714 return cmpGreaterThan;
1718 case convolve(fcInfinity, fcInfinity):
1719 if(sign == rhs.sign)
1724 return cmpGreaterThan;
1726 case convolve(fcZero, fcZero):
1729 case convolve(fcNormal, fcNormal):
1733 /* Two normal numbers. Do they have the same sign? */
1734 if(sign != rhs.sign) {
1736 result = cmpLessThan;
1738 result = cmpGreaterThan;
1740 /* Compare absolute values; invert result if negative. */
1741 result = compareAbsoluteValue(rhs);
1744 if(result == cmpLessThan)
1745 result = cmpGreaterThan;
1746 else if(result == cmpGreaterThan)
1747 result = cmpLessThan;
1754 /// APFloat::convert - convert a value of one floating point type to another.
1755 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1756 /// records whether the transformation lost information, i.e. whether
1757 /// converting the result back to the original type will produce the
1758 /// original value (this is almost the same as return value==fsOK, but there
1759 /// are edge cases where this is not so).
1762 APFloat::convert(const fltSemantics &toSemantics,
1763 roundingMode rounding_mode, bool *losesInfo)
1765 lostFraction lostFraction;
1766 unsigned int newPartCount, oldPartCount;
1769 assertArithmeticOK(*semantics);
1770 assertArithmeticOK(toSemantics);
1771 lostFraction = lfExactlyZero;
1772 newPartCount = partCountForBits(toSemantics.precision + 1);
1773 oldPartCount = partCount();
1775 /* Handle storage complications. If our new form is wider,
1776 re-allocate our bit pattern into wider storage. If it is
1777 narrower, we ignore the excess parts, but if narrowing to a
1778 single part we need to free the old storage.
1779 Be careful not to reference significandParts for zeroes
1780 and infinities, since it aborts. */
1781 if (newPartCount > oldPartCount) {
1782 integerPart *newParts;
1783 newParts = new integerPart[newPartCount];
1784 APInt::tcSet(newParts, 0, newPartCount);
1785 if (category==fcNormal || category==fcNaN)
1786 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1788 significand.parts = newParts;
1789 } else if (newPartCount < oldPartCount) {
1790 /* Capture any lost fraction through truncation of parts so we get
1791 correct rounding whilst normalizing. */
1792 if (category==fcNormal)
1793 lostFraction = lostFractionThroughTruncation
1794 (significandParts(), oldPartCount, toSemantics.precision);
1795 if (newPartCount == 1) {
1796 integerPart newPart = 0;
1797 if (category==fcNormal || category==fcNaN)
1798 newPart = significandParts()[0];
1800 significand.part = newPart;
1804 if(category == fcNormal) {
1805 /* Re-interpret our bit-pattern. */
1806 exponent += toSemantics.precision - semantics->precision;
1807 semantics = &toSemantics;
1808 fs = normalize(rounding_mode, lostFraction);
1809 *losesInfo = (fs != opOK);
1810 } else if (category == fcNaN) {
1811 int shift = toSemantics.precision - semantics->precision;
1812 // Do this now so significandParts gets the right answer
1813 const fltSemantics *oldSemantics = semantics;
1814 semantics = &toSemantics;
1816 // No normalization here, just truncate
1818 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1819 else if (shift < 0) {
1820 unsigned ushift = -shift;
1821 // Figure out if we are losing information. This happens
1822 // if are shifting out something other than 0s, or if the x87 long
1823 // double input did not have its integer bit set (pseudo-NaN), or if the
1824 // x87 long double input did not have its QNan bit set (because the x87
1825 // hardware sets this bit when converting a lower-precision NaN to
1826 // x87 long double).
1827 if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
1829 if (oldSemantics == &APFloat::x87DoubleExtended &&
1830 (!(*significandParts() & 0x8000000000000000ULL) ||
1831 !(*significandParts() & 0x4000000000000000ULL)))
1833 APInt::tcShiftRight(significandParts(), newPartCount, ushift);
1835 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1836 // does not give you back the same bits. This is dubious, and we
1837 // don't currently do it. You're really supposed to get
1838 // an invalid operation signal at runtime, but nobody does that.
1841 semantics = &toSemantics;
1849 /* Convert a floating point number to an integer according to the
1850 rounding mode. If the rounded integer value is out of range this
1851 returns an invalid operation exception and the contents of the
1852 destination parts are unspecified. If the rounded value is in
1853 range but the floating point number is not the exact integer, the C
1854 standard doesn't require an inexact exception to be raised. IEEE
1855 854 does require it so we do that.
1857 Note that for conversions to integer type the C standard requires
1858 round-to-zero to always be used. */
1860 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1862 roundingMode rounding_mode,
1863 bool *isExact) const
1865 lostFraction lost_fraction;
1866 const integerPart *src;
1867 unsigned int dstPartsCount, truncatedBits;
1869 assertArithmeticOK(*semantics);
1873 /* Handle the three special cases first. */
1874 if(category == fcInfinity || category == fcNaN)
1877 dstPartsCount = partCountForBits(width);
1879 if(category == fcZero) {
1880 APInt::tcSet(parts, 0, dstPartsCount);
1881 // Negative zero can't be represented as an int.
1886 src = significandParts();
1888 /* Step 1: place our absolute value, with any fraction truncated, in
1891 /* Our absolute value is less than one; truncate everything. */
1892 APInt::tcSet(parts, 0, dstPartsCount);
1893 /* For exponent -1 the integer bit represents .5, look at that.
1894 For smaller exponents leftmost truncated bit is 0. */
1895 truncatedBits = semantics->precision -1U - exponent;
1897 /* We want the most significant (exponent + 1) bits; the rest are
1899 unsigned int bits = exponent + 1U;
1901 /* Hopelessly large in magnitude? */
1905 if (bits < semantics->precision) {
1906 /* We truncate (semantics->precision - bits) bits. */
1907 truncatedBits = semantics->precision - bits;
1908 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1910 /* We want at least as many bits as are available. */
1911 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
1912 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
1917 /* Step 2: work out any lost fraction, and increment the absolute
1918 value if we would round away from zero. */
1919 if (truncatedBits) {
1920 lost_fraction = lostFractionThroughTruncation(src, partCount(),
1922 if (lost_fraction != lfExactlyZero
1923 && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
1924 if (APInt::tcIncrement(parts, dstPartsCount))
1925 return opInvalidOp; /* Overflow. */
1928 lost_fraction = lfExactlyZero;
1931 /* Step 3: check if we fit in the destination. */
1932 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
1936 /* Negative numbers cannot be represented as unsigned. */
1940 /* It takes omsb bits to represent the unsigned integer value.
1941 We lose a bit for the sign, but care is needed as the
1942 maximally negative integer is a special case. */
1943 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
1946 /* This case can happen because of rounding. */
1951 APInt::tcNegate (parts, dstPartsCount);
1953 if (omsb >= width + !isSigned)
1957 if (lost_fraction == lfExactlyZero) {
1964 /* Same as convertToSignExtendedInteger, except we provide
1965 deterministic values in case of an invalid operation exception,
1966 namely zero for NaNs and the minimal or maximal value respectively
1967 for underflow or overflow.
1968 The *isExact output tells whether the result is exact, in the sense
1969 that converting it back to the original floating point type produces
1970 the original value. This is almost equivalent to result==opOK,
1971 except for negative zeroes.
1974 APFloat::convertToInteger(integerPart *parts, unsigned int width,
1976 roundingMode rounding_mode, bool *isExact) const
1980 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
1983 if (fs == opInvalidOp) {
1984 unsigned int bits, dstPartsCount;
1986 dstPartsCount = partCountForBits(width);
1988 if (category == fcNaN)
1993 bits = width - isSigned;
1995 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
1996 if (sign && isSigned)
1997 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2003 /* Convert an unsigned integer SRC to a floating point number,
2004 rounding according to ROUNDING_MODE. The sign of the floating
2005 point number is not modified. */
2007 APFloat::convertFromUnsignedParts(const integerPart *src,
2008 unsigned int srcCount,
2009 roundingMode rounding_mode)
2011 unsigned int omsb, precision, dstCount;
2013 lostFraction lost_fraction;
2015 assertArithmeticOK(*semantics);
2016 category = fcNormal;
2017 omsb = APInt::tcMSB(src, srcCount) + 1;
2018 dst = significandParts();
2019 dstCount = partCount();
2020 precision = semantics->precision;
2022 /* We want the most significant PRECISON bits of SRC. There may not
2023 be that many; extract what we can. */
2024 if (precision <= omsb) {
2025 exponent = omsb - 1;
2026 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2028 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2030 exponent = precision - 1;
2031 lost_fraction = lfExactlyZero;
2032 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2035 return normalize(rounding_mode, lost_fraction);
2039 APFloat::convertFromAPInt(const APInt &Val,
2041 roundingMode rounding_mode)
2043 unsigned int partCount = Val.getNumWords();
2047 if (isSigned && api.isNegative()) {
2052 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2055 /* Convert a two's complement integer SRC to a floating point number,
2056 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2057 integer is signed, in which case it must be sign-extended. */
2059 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2060 unsigned int srcCount,
2062 roundingMode rounding_mode)
2066 assertArithmeticOK(*semantics);
2068 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2071 /* If we're signed and negative negate a copy. */
2073 copy = new integerPart[srcCount];
2074 APInt::tcAssign(copy, src, srcCount);
2075 APInt::tcNegate(copy, srcCount);
2076 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2080 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2086 /* FIXME: should this just take a const APInt reference? */
2088 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2089 unsigned int width, bool isSigned,
2090 roundingMode rounding_mode)
2092 unsigned int partCount = partCountForBits(width);
2093 APInt api = APInt(width, partCount, parts);
2096 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
2101 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2105 APFloat::convertFromHexadecimalString(const char *p,
2106 roundingMode rounding_mode)
2108 lostFraction lost_fraction;
2109 integerPart *significand;
2110 unsigned int bitPos, partsCount;
2111 const char *dot, *firstSignificantDigit;
2115 category = fcNormal;
2117 significand = significandParts();
2118 partsCount = partCount();
2119 bitPos = partsCount * integerPartWidth;
2121 /* Skip leading zeroes and any (hexa)decimal point. */
2122 p = skipLeadingZeroesAndAnyDot(p, &dot);
2123 firstSignificantDigit = p;
2126 integerPart hex_value;
2133 hex_value = hexDigitValue(*p);
2134 if(hex_value == -1U) {
2135 lost_fraction = lfExactlyZero;
2141 /* Store the number whilst 4-bit nibbles remain. */
2144 hex_value <<= bitPos % integerPartWidth;
2145 significand[bitPos / integerPartWidth] |= hex_value;
2147 lost_fraction = trailingHexadecimalFraction(p, hex_value);
2148 while(hexDigitValue(*p) != -1U)
2154 /* Hex floats require an exponent but not a hexadecimal point. */
2155 assert(*p == 'p' || *p == 'P');
2157 /* Ignore the exponent if we are zero. */
2158 if(p != firstSignificantDigit) {
2161 /* Implicit hexadecimal point? */
2165 /* Calculate the exponent adjustment implicit in the number of
2166 significant digits. */
2167 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2168 if(expAdjustment < 0)
2170 expAdjustment = expAdjustment * 4 - 1;
2172 /* Adjust for writing the significand starting at the most
2173 significant nibble. */
2174 expAdjustment += semantics->precision;
2175 expAdjustment -= partsCount * integerPartWidth;
2177 /* Adjust for the given exponent. */
2178 exponent = totalExponent(p, expAdjustment);
2181 return normalize(rounding_mode, lost_fraction);
2185 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2186 unsigned sigPartCount, int exp,
2187 roundingMode rounding_mode)
2189 unsigned int parts, pow5PartCount;
2190 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2191 integerPart pow5Parts[maxPowerOfFiveParts];
2194 isNearest = (rounding_mode == rmNearestTiesToEven
2195 || rounding_mode == rmNearestTiesToAway);
2197 parts = partCountForBits(semantics->precision + 11);
2199 /* Calculate pow(5, abs(exp)). */
2200 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2202 for (;; parts *= 2) {
2203 opStatus sigStatus, powStatus;
2204 unsigned int excessPrecision, truncatedBits;
2206 calcSemantics.precision = parts * integerPartWidth - 1;
2207 excessPrecision = calcSemantics.precision - semantics->precision;
2208 truncatedBits = excessPrecision;
2210 APFloat decSig(calcSemantics, fcZero, sign);
2211 APFloat pow5(calcSemantics, fcZero, false);
2213 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2214 rmNearestTiesToEven);
2215 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2216 rmNearestTiesToEven);
2217 /* Add exp, as 10^n = 5^n * 2^n. */
2218 decSig.exponent += exp;
2220 lostFraction calcLostFraction;
2221 integerPart HUerr, HUdistance;
2222 unsigned int powHUerr;
2225 /* multiplySignificand leaves the precision-th bit set to 1. */
2226 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2227 powHUerr = powStatus != opOK;
2229 calcLostFraction = decSig.divideSignificand(pow5);
2230 /* Denormal numbers have less precision. */
2231 if (decSig.exponent < semantics->minExponent) {
2232 excessPrecision += (semantics->minExponent - decSig.exponent);
2233 truncatedBits = excessPrecision;
2234 if (excessPrecision > calcSemantics.precision)
2235 excessPrecision = calcSemantics.precision;
2237 /* Extra half-ulp lost in reciprocal of exponent. */
2238 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2241 /* Both multiplySignificand and divideSignificand return the
2242 result with the integer bit set. */
2243 assert (APInt::tcExtractBit
2244 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2246 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2248 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2249 excessPrecision, isNearest);
2251 /* Are we guaranteed to round correctly if we truncate? */
2252 if (HUdistance >= HUerr) {
2253 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2254 calcSemantics.precision - excessPrecision,
2256 /* Take the exponent of decSig. If we tcExtract-ed less bits
2257 above we must adjust our exponent to compensate for the
2258 implicit right shift. */
2259 exponent = (decSig.exponent + semantics->precision
2260 - (calcSemantics.precision - excessPrecision));
2261 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2264 return normalize(rounding_mode, calcLostFraction);
2270 APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode)
2275 /* Scan the text. */
2276 interpretDecimal(p, &D);
2278 /* Handle the quick cases. First the case of no significant digits,
2279 i.e. zero, and then exponents that are obviously too large or too
2280 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2281 definitely overflows if
2283 (exp - 1) * L >= maxExponent
2285 and definitely underflows to zero where
2287 (exp + 1) * L <= minExponent - precision
2289 With integer arithmetic the tightest bounds for L are
2291 93/28 < L < 196/59 [ numerator <= 256 ]
2292 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2295 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2298 } else if ((D.normalizedExponent + 1) * 28738
2299 <= 8651 * (semantics->minExponent - (int) semantics->precision)) {
2300 /* Underflow to zero and round. */
2302 fs = normalize(rounding_mode, lfLessThanHalf);
2303 } else if ((D.normalizedExponent - 1) * 42039
2304 >= 12655 * semantics->maxExponent) {
2305 /* Overflow and round. */
2306 fs = handleOverflow(rounding_mode);
2308 integerPart *decSignificand;
2309 unsigned int partCount;
2311 /* A tight upper bound on number of bits required to hold an
2312 N-digit decimal integer is N * 196 / 59. Allocate enough space
2313 to hold the full significand, and an extra part required by
2315 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2316 partCount = partCountForBits(1 + 196 * partCount / 59);
2317 decSignificand = new integerPart[partCount + 1];
2320 /* Convert to binary efficiently - we do almost all multiplication
2321 in an integerPart. When this would overflow do we do a single
2322 bignum multiplication, and then revert again to multiplication
2323 in an integerPart. */
2325 integerPart decValue, val, multiplier;
2334 decValue = decDigitValue(*p++);
2336 val = val * 10 + decValue;
2337 /* The maximum number that can be multiplied by ten with any
2338 digit added without overflowing an integerPart. */
2339 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2341 /* Multiply out the current part. */
2342 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2343 partCount, partCount + 1, false);
2345 /* If we used another part (likely but not guaranteed), increase
2347 if (decSignificand[partCount])
2349 } while (p <= D.lastSigDigit);
2351 category = fcNormal;
2352 fs = roundSignificandWithExponent(decSignificand, partCount,
2353 D.exponent, rounding_mode);
2355 delete [] decSignificand;
2362 APFloat::convertFromString(const char *p, roundingMode rounding_mode)
2364 assertArithmeticOK(*semantics);
2366 /* Handle a leading minus sign. */
2372 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
2373 return convertFromHexadecimalString(p + 2, rounding_mode);
2375 return convertFromDecimalString(p, rounding_mode);
2378 /* Write out a hexadecimal representation of the floating point value
2379 to DST, which must be of sufficient size, in the C99 form
2380 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2381 excluding the terminating NUL.
2383 If UPPERCASE, the output is in upper case, otherwise in lower case.
2385 HEXDIGITS digits appear altogether, rounding the value if
2386 necessary. If HEXDIGITS is 0, the minimal precision to display the
2387 number precisely is used instead. If nothing would appear after
2388 the decimal point it is suppressed.
2390 The decimal exponent is always printed and has at least one digit.
2391 Zero values display an exponent of zero. Infinities and NaNs
2392 appear as "infinity" or "nan" respectively.
2394 The above rules are as specified by C99. There is ambiguity about
2395 what the leading hexadecimal digit should be. This implementation
2396 uses whatever is necessary so that the exponent is displayed as
2397 stored. This implies the exponent will fall within the IEEE format
2398 range, and the leading hexadecimal digit will be 0 (for denormals),
2399 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2400 any other digits zero).
2403 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2404 bool upperCase, roundingMode rounding_mode) const
2408 assertArithmeticOK(*semantics);
2416 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2417 dst += sizeof infinityL - 1;
2421 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2422 dst += sizeof NaNU - 1;
2427 *dst++ = upperCase ? 'X': 'x';
2429 if (hexDigits > 1) {
2431 memset (dst, '0', hexDigits - 1);
2432 dst += hexDigits - 1;
2434 *dst++ = upperCase ? 'P': 'p';
2439 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2445 return static_cast<unsigned int>(dst - p);
2448 /* Does the hard work of outputting the correctly rounded hexadecimal
2449 form of a normal floating point number with the specified number of
2450 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2451 digits necessary to print the value precisely is output. */
2453 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2455 roundingMode rounding_mode) const
2457 unsigned int count, valueBits, shift, partsCount, outputDigits;
2458 const char *hexDigitChars;
2459 const integerPart *significand;
2464 *dst++ = upperCase ? 'X': 'x';
2467 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2469 significand = significandParts();
2470 partsCount = partCount();
2472 /* +3 because the first digit only uses the single integer bit, so
2473 we have 3 virtual zero most-significant-bits. */
2474 valueBits = semantics->precision + 3;
2475 shift = integerPartWidth - valueBits % integerPartWidth;
2477 /* The natural number of digits required ignoring trailing
2478 insignificant zeroes. */
2479 outputDigits = (valueBits - significandLSB () + 3) / 4;
2481 /* hexDigits of zero means use the required number for the
2482 precision. Otherwise, see if we are truncating. If we are,
2483 find out if we need to round away from zero. */
2485 if (hexDigits < outputDigits) {
2486 /* We are dropping non-zero bits, so need to check how to round.
2487 "bits" is the number of dropped bits. */
2489 lostFraction fraction;
2491 bits = valueBits - hexDigits * 4;
2492 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2493 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2495 outputDigits = hexDigits;
2498 /* Write the digits consecutively, and start writing in the location
2499 of the hexadecimal point. We move the most significant digit
2500 left and add the hexadecimal point later. */
2503 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2505 while (outputDigits && count) {
2508 /* Put the most significant integerPartWidth bits in "part". */
2509 if (--count == partsCount)
2510 part = 0; /* An imaginary higher zero part. */
2512 part = significand[count] << shift;
2515 part |= significand[count - 1] >> (integerPartWidth - shift);
2517 /* Convert as much of "part" to hexdigits as we can. */
2518 unsigned int curDigits = integerPartWidth / 4;
2520 if (curDigits > outputDigits)
2521 curDigits = outputDigits;
2522 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2523 outputDigits -= curDigits;
2529 /* Note that hexDigitChars has a trailing '0'. */
2532 *q = hexDigitChars[hexDigitValue (*q) + 1];
2533 } while (*q == '0');
2536 /* Add trailing zeroes. */
2537 memset (dst, '0', outputDigits);
2538 dst += outputDigits;
2541 /* Move the most significant digit to before the point, and if there
2542 is something after the decimal point add it. This must come
2543 after rounding above. */
2550 /* Finally output the exponent. */
2551 *dst++ = upperCase ? 'P': 'p';
2553 return writeSignedDecimal (dst, exponent);
2556 // For good performance it is desirable for different APFloats
2557 // to produce different integers.
2559 APFloat::getHashValue() const
2561 if (category==fcZero) return sign<<8 | semantics->precision ;
2562 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2563 else if (category==fcNaN) return 1<<10 | semantics->precision;
2565 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2566 const integerPart* p = significandParts();
2567 for (int i=partCount(); i>0; i--, p++)
2568 hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
2573 // Conversion from APFloat to/from host float/double. It may eventually be
2574 // possible to eliminate these and have everybody deal with APFloats, but that
2575 // will take a while. This approach will not easily extend to long double.
2576 // Current implementation requires integerPartWidth==64, which is correct at
2577 // the moment but could be made more general.
2579 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2580 // the actual IEEE respresentations. We compensate for that here.
2583 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2585 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2586 assert (partCount()==2);
2588 uint64_t myexponent, mysignificand;
2590 if (category==fcNormal) {
2591 myexponent = exponent+16383; //bias
2592 mysignificand = significandParts()[0];
2593 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2594 myexponent = 0; // denormal
2595 } else if (category==fcZero) {
2598 } else if (category==fcInfinity) {
2599 myexponent = 0x7fff;
2600 mysignificand = 0x8000000000000000ULL;
2602 assert(category == fcNaN && "Unknown category");
2603 myexponent = 0x7fff;
2604 mysignificand = significandParts()[0];
2608 words[0] = ((uint64_t)(sign & 1) << 63) |
2609 ((myexponent & 0x7fffLL) << 48) |
2610 ((mysignificand >>16) & 0xffffffffffffLL);
2611 words[1] = mysignificand & 0xffff;
2612 return APInt(80, 2, words);
2616 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2618 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2619 assert (partCount()==2);
2621 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2623 if (category==fcNormal) {
2624 myexponent = exponent + 1023; //bias
2625 myexponent2 = exponent2 + 1023;
2626 mysignificand = significandParts()[0];
2627 mysignificand2 = significandParts()[1];
2628 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2629 myexponent = 0; // denormal
2630 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2631 myexponent2 = 0; // denormal
2632 } else if (category==fcZero) {
2637 } else if (category==fcInfinity) {
2643 assert(category == fcNaN && "Unknown category");
2645 mysignificand = significandParts()[0];
2646 myexponent2 = exponent2;
2647 mysignificand2 = significandParts()[1];
2651 words[0] = ((uint64_t)(sign & 1) << 63) |
2652 ((myexponent & 0x7ff) << 52) |
2653 (mysignificand & 0xfffffffffffffLL);
2654 words[1] = ((uint64_t)(sign2 & 1) << 63) |
2655 ((myexponent2 & 0x7ff) << 52) |
2656 (mysignificand2 & 0xfffffffffffffLL);
2657 return APInt(128, 2, words);
2661 APFloat::convertDoubleAPFloatToAPInt() const
2663 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2664 assert (partCount()==1);
2666 uint64_t myexponent, mysignificand;
2668 if (category==fcNormal) {
2669 myexponent = exponent+1023; //bias
2670 mysignificand = *significandParts();
2671 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2672 myexponent = 0; // denormal
2673 } else if (category==fcZero) {
2676 } else if (category==fcInfinity) {
2680 assert(category == fcNaN && "Unknown category!");
2682 mysignificand = *significandParts();
2685 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2686 ((myexponent & 0x7ff) << 52) |
2687 (mysignificand & 0xfffffffffffffLL))));
2691 APFloat::convertFloatAPFloatToAPInt() const
2693 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2694 assert (partCount()==1);
2696 uint32_t myexponent, mysignificand;
2698 if (category==fcNormal) {
2699 myexponent = exponent+127; //bias
2700 mysignificand = (uint32_t)*significandParts();
2701 if (myexponent == 1 && !(mysignificand & 0x800000))
2702 myexponent = 0; // denormal
2703 } else if (category==fcZero) {
2706 } else if (category==fcInfinity) {
2710 assert(category == fcNaN && "Unknown category!");
2712 mysignificand = (uint32_t)*significandParts();
2715 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2716 (mysignificand & 0x7fffff)));
2719 // This function creates an APInt that is just a bit map of the floating
2720 // point constant as it would appear in memory. It is not a conversion,
2721 // and treating the result as a normal integer is unlikely to be useful.
2724 APFloat::bitcastToAPInt() const
2726 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2727 return convertFloatAPFloatToAPInt();
2729 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2730 return convertDoubleAPFloatToAPInt();
2732 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2733 return convertPPCDoubleDoubleAPFloatToAPInt();
2735 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2737 return convertF80LongDoubleAPFloatToAPInt();
2741 APFloat::convertToFloat() const
2743 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2744 APInt api = bitcastToAPInt();
2745 return api.bitsToFloat();
2749 APFloat::convertToDouble() const
2751 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2752 APInt api = bitcastToAPInt();
2753 return api.bitsToDouble();
2756 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2757 /// does not support these bit patterns:
2758 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2759 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2760 /// exponent = 0, integer bit 1 ("pseudodenormal")
2761 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2762 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2764 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2766 assert(api.getBitWidth()==80);
2767 uint64_t i1 = api.getRawData()[0];
2768 uint64_t i2 = api.getRawData()[1];
2769 uint64_t myexponent = (i1 >> 48) & 0x7fff;
2770 uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) |
2773 initialize(&APFloat::x87DoubleExtended);
2774 assert(partCount()==2);
2776 sign = static_cast<unsigned int>(i1>>63);
2777 if (myexponent==0 && mysignificand==0) {
2778 // exponent, significand meaningless
2780 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2781 // exponent, significand meaningless
2782 category = fcInfinity;
2783 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2784 // exponent meaningless
2786 significandParts()[0] = mysignificand;
2787 significandParts()[1] = 0;
2789 category = fcNormal;
2790 exponent = myexponent - 16383;
2791 significandParts()[0] = mysignificand;
2792 significandParts()[1] = 0;
2793 if (myexponent==0) // denormal
2799 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
2801 assert(api.getBitWidth()==128);
2802 uint64_t i1 = api.getRawData()[0];
2803 uint64_t i2 = api.getRawData()[1];
2804 uint64_t myexponent = (i1 >> 52) & 0x7ff;
2805 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
2806 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
2807 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
2809 initialize(&APFloat::PPCDoubleDouble);
2810 assert(partCount()==2);
2812 sign = static_cast<unsigned int>(i1>>63);
2813 sign2 = static_cast<unsigned int>(i2>>63);
2814 if (myexponent==0 && mysignificand==0) {
2815 // exponent, significand meaningless
2816 // exponent2 and significand2 are required to be 0; we don't check
2818 } else if (myexponent==0x7ff && mysignificand==0) {
2819 // exponent, significand meaningless
2820 // exponent2 and significand2 are required to be 0; we don't check
2821 category = fcInfinity;
2822 } else if (myexponent==0x7ff && mysignificand!=0) {
2823 // exponent meaningless. So is the whole second word, but keep it
2826 exponent2 = myexponent2;
2827 significandParts()[0] = mysignificand;
2828 significandParts()[1] = mysignificand2;
2830 category = fcNormal;
2831 // Note there is no category2; the second word is treated as if it is
2832 // fcNormal, although it might be something else considered by itself.
2833 exponent = myexponent - 1023;
2834 exponent2 = myexponent2 - 1023;
2835 significandParts()[0] = mysignificand;
2836 significandParts()[1] = mysignificand2;
2837 if (myexponent==0) // denormal
2840 significandParts()[0] |= 0x10000000000000LL; // integer bit
2844 significandParts()[1] |= 0x10000000000000LL; // integer bit
2849 APFloat::initFromDoubleAPInt(const APInt &api)
2851 assert(api.getBitWidth()==64);
2852 uint64_t i = *api.getRawData();
2853 uint64_t myexponent = (i >> 52) & 0x7ff;
2854 uint64_t mysignificand = i & 0xfffffffffffffLL;
2856 initialize(&APFloat::IEEEdouble);
2857 assert(partCount()==1);
2859 sign = static_cast<unsigned int>(i>>63);
2860 if (myexponent==0 && mysignificand==0) {
2861 // exponent, significand meaningless
2863 } else if (myexponent==0x7ff && mysignificand==0) {
2864 // exponent, significand meaningless
2865 category = fcInfinity;
2866 } else if (myexponent==0x7ff && mysignificand!=0) {
2867 // exponent meaningless
2869 *significandParts() = mysignificand;
2871 category = fcNormal;
2872 exponent = myexponent - 1023;
2873 *significandParts() = mysignificand;
2874 if (myexponent==0) // denormal
2877 *significandParts() |= 0x10000000000000LL; // integer bit
2882 APFloat::initFromFloatAPInt(const APInt & api)
2884 assert(api.getBitWidth()==32);
2885 uint32_t i = (uint32_t)*api.getRawData();
2886 uint32_t myexponent = (i >> 23) & 0xff;
2887 uint32_t mysignificand = i & 0x7fffff;
2889 initialize(&APFloat::IEEEsingle);
2890 assert(partCount()==1);
2893 if (myexponent==0 && mysignificand==0) {
2894 // exponent, significand meaningless
2896 } else if (myexponent==0xff && mysignificand==0) {
2897 // exponent, significand meaningless
2898 category = fcInfinity;
2899 } else if (myexponent==0xff && mysignificand!=0) {
2900 // sign, exponent, significand meaningless
2902 *significandParts() = mysignificand;
2904 category = fcNormal;
2905 exponent = myexponent - 127; //bias
2906 *significandParts() = mysignificand;
2907 if (myexponent==0) // denormal
2910 *significandParts() |= 0x800000; // integer bit
2914 /// Treat api as containing the bits of a floating point number. Currently
2915 /// we infer the floating point type from the size of the APInt. The
2916 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
2917 /// when the size is anything else).
2919 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
2921 if (api.getBitWidth() == 32)
2922 return initFromFloatAPInt(api);
2923 else if (api.getBitWidth()==64)
2924 return initFromDoubleAPInt(api);
2925 else if (api.getBitWidth()==80)
2926 return initFromF80LongDoubleAPInt(api);
2927 else if (api.getBitWidth()==128 && !isIEEE)
2928 return initFromPPCDoubleDoubleAPInt(api);
2933 APFloat::APFloat(const APInt& api, bool isIEEE)
2935 initFromAPInt(api, isIEEE);
2938 APFloat::APFloat(float f)
2940 APInt api = APInt(32, 0);
2941 initFromAPInt(api.floatToBits(f));
2944 APFloat::APFloat(double d)
2946 APInt api = APInt(64, 0);
2947 initFromAPInt(api.doubleToBits(d));