1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/Support/MathExtras.h"
22 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
24 /* Assumed in hexadecimal significand parsing, and conversion to
25 hexadecimal strings. */
26 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
27 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
31 /* Represents floating point arithmetic semantics. */
33 /* The largest E such that 2^E is representable; this matches the
34 definition of IEEE 754. */
35 exponent_t maxExponent;
37 /* The smallest E such that 2^E is a normalized number; this
38 matches the definition of IEEE 754. */
39 exponent_t minExponent;
41 /* Number of bits in the significand. This includes the integer
43 unsigned int precision;
45 /* True if arithmetic is supported. */
46 unsigned int arithmeticOK;
49 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
50 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
51 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
52 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
53 const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
55 // The PowerPC format consists of two doubles. It does not map cleanly
56 // onto the usual format above. For now only storage of constants of
57 // this type is supported, no arithmetic.
58 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
60 /* A tight upper bound on number of parts required to hold the value
63 power * 815 / (351 * integerPartWidth) + 1
65 However, whilst the result may require only this many parts,
66 because we are multiplying two values to get it, the
67 multiplication may require an extra part with the excess part
68 being zero (consider the trivial case of 1 * 1, tcFullMultiply
69 requires two parts to hold the single-part result). So we add an
70 extra one to guarantee enough space whilst multiplying. */
71 const unsigned int maxExponent = 16383;
72 const unsigned int maxPrecision = 113;
73 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
74 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
75 / (351 * integerPartWidth));
78 /* Put a bunch of private, handy routines in an anonymous namespace. */
81 static inline unsigned int
82 partCountForBits(unsigned int bits)
84 return ((bits) + integerPartWidth - 1) / integerPartWidth;
87 /* Returns 0U-9U. Return values >= 10U are not digits. */
88 static inline unsigned int
89 decDigitValue(unsigned int c)
95 hexDigitValue(unsigned int c)
115 assertArithmeticOK(const llvm::fltSemantics &semantics) {
116 assert(semantics.arithmeticOK
117 && "Compile-time arithmetic does not support these semantics");
121 isArithmeticOk(const llvm::fltSemantics &semantics) {
122 return semantics.arithmeticOK;
125 /* Return the value of a decimal exponent of the form
128 If the exponent overflows, returns a large exponent with the
131 readExponent(const char *p)
134 unsigned int absExponent;
135 const unsigned int overlargeExponent = 24000; /* FIXME. */
137 isNegative = (*p == '-');
138 if (*p == '-' || *p == '+')
141 absExponent = decDigitValue(*p++);
142 assert (absExponent < 10U);
147 value = decDigitValue(*p);
152 value += absExponent * 10;
153 if (absExponent >= overlargeExponent) {
154 absExponent = overlargeExponent;
161 return -(int) absExponent;
163 return (int) absExponent;
166 /* This is ugly and needs cleaning up, but I don't immediately see
167 how whilst remaining safe. */
169 totalExponent(const char *p, int exponentAdjustment)
171 int unsignedExponent;
172 bool negative, overflow;
175 /* Move past the exponent letter and sign to the digits. */
177 negative = *p == '-';
178 if(*p == '-' || *p == '+')
181 unsignedExponent = 0;
186 value = decDigitValue(*p);
191 unsignedExponent = unsignedExponent * 10 + value;
192 if(unsignedExponent > 65535)
196 if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
200 exponent = unsignedExponent;
202 exponent = -exponent;
203 exponent += exponentAdjustment;
204 if(exponent > 65535 || exponent < -65536)
209 exponent = negative ? -65536: 65535;
215 skipLeadingZeroesAndAnyDot(const char *p, const char **dot)
230 /* Given a normal decimal floating point number of the form
234 where the decimal point and exponent are optional, fill out the
235 structure D. Exponent is appropriate if the significand is
236 treated as an integer, and normalizedExponent if the significand
237 is taken to have the decimal point after a single leading
240 If the value is zero, V->firstSigDigit points to a non-digit, and
241 the return exponent is zero.
244 const char *firstSigDigit;
245 const char *lastSigDigit;
247 int normalizedExponent;
251 interpretDecimal(const char *p, decimalInfo *D)
255 p = skipLeadingZeroesAndAnyDot (p, &dot);
257 D->firstSigDigit = p;
259 D->normalizedExponent = 0;
266 if (decDigitValue(*p) >= 10U)
271 /* If number is all zerooes accept any exponent. */
272 if (p != D->firstSigDigit) {
273 if (*p == 'e' || *p == 'E')
274 D->exponent = readExponent(p + 1);
276 /* Implied decimal point? */
280 /* Drop insignificant trailing zeroes. */
287 /* Adjust the exponents for any decimal point. */
288 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
289 D->normalizedExponent = (D->exponent +
290 static_cast<exponent_t>((p - D->firstSigDigit)
291 - (dot > D->firstSigDigit && dot < p)));
297 /* Return the trailing fraction of a hexadecimal number.
298 DIGITVALUE is the first hex digit of the fraction, P points to
301 trailingHexadecimalFraction(const char *p, unsigned int digitValue)
303 unsigned int hexDigit;
305 /* If the first trailing digit isn't 0 or 8 we can work out the
306 fraction immediately. */
308 return lfMoreThanHalf;
309 else if(digitValue < 8 && digitValue > 0)
310 return lfLessThanHalf;
312 /* Otherwise we need to find the first non-zero digit. */
316 hexDigit = hexDigitValue(*p);
318 /* If we ran off the end it is exactly zero or one-half, otherwise
321 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
323 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
326 /* Return the fraction lost were a bignum truncated losing the least
327 significant BITS bits. */
329 lostFractionThroughTruncation(const integerPart *parts,
330 unsigned int partCount,
335 lsb = APInt::tcLSB(parts, partCount);
337 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
339 return lfExactlyZero;
341 return lfExactlyHalf;
342 if(bits <= partCount * integerPartWidth
343 && APInt::tcExtractBit(parts, bits - 1))
344 return lfMoreThanHalf;
346 return lfLessThanHalf;
349 /* Shift DST right BITS bits noting lost fraction. */
351 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
353 lostFraction lost_fraction;
355 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
357 APInt::tcShiftRight(dst, parts, bits);
359 return lost_fraction;
362 /* Combine the effect of two lost fractions. */
364 combineLostFractions(lostFraction moreSignificant,
365 lostFraction lessSignificant)
367 if(lessSignificant != lfExactlyZero) {
368 if(moreSignificant == lfExactlyZero)
369 moreSignificant = lfLessThanHalf;
370 else if(moreSignificant == lfExactlyHalf)
371 moreSignificant = lfMoreThanHalf;
374 return moreSignificant;
377 /* The error from the true value, in half-ulps, on multiplying two
378 floating point numbers, which differ from the value they
379 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
380 than the returned value.
382 See "How to Read Floating Point Numbers Accurately" by William D
385 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
387 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
389 if (HUerr1 + HUerr2 == 0)
390 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
392 return inexactMultiply + 2 * (HUerr1 + HUerr2);
395 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
396 when the least significant BITS are truncated. BITS cannot be
399 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
401 unsigned int count, partBits;
402 integerPart part, boundary;
407 count = bits / integerPartWidth;
408 partBits = bits % integerPartWidth + 1;
410 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
413 boundary = (integerPart) 1 << (partBits - 1);
418 if (part - boundary <= boundary - part)
419 return part - boundary;
421 return boundary - part;
424 if (part == boundary) {
427 return ~(integerPart) 0; /* A lot. */
430 } else if (part == boundary - 1) {
433 return ~(integerPart) 0; /* A lot. */
438 return ~(integerPart) 0; /* A lot. */
441 /* Place pow(5, power) in DST, and return the number of parts used.
442 DST must be at least one part larger than size of the answer. */
444 powerOf5(integerPart *dst, unsigned int power)
446 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
448 static integerPart pow5s[maxPowerOfFiveParts * 2 + 5] = { 78125 * 5 };
449 static unsigned int partsCount[16] = { 1 };
451 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
454 assert(power <= maxExponent);
459 *p1 = firstEightPowers[power & 7];
465 for (unsigned int n = 0; power; power >>= 1, n++) {
470 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
472 pc = partsCount[n - 1];
473 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
475 if (pow5[pc - 1] == 0)
483 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
485 if (p2[result - 1] == 0)
488 /* Now result is in p1 with partsCount parts and p2 is scratch
490 tmp = p1, p1 = p2, p2 = tmp;
497 APInt::tcAssign(dst, p1, result);
502 /* Zero at the end to avoid modular arithmetic when adding one; used
503 when rounding up during hexadecimal output. */
504 static const char hexDigitsLower[] = "0123456789abcdef0";
505 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
506 static const char infinityL[] = "infinity";
507 static const char infinityU[] = "INFINITY";
508 static const char NaNL[] = "nan";
509 static const char NaNU[] = "NAN";
511 /* Write out an integerPart in hexadecimal, starting with the most
512 significant nibble. Write out exactly COUNT hexdigits, return
515 partAsHex (char *dst, integerPart part, unsigned int count,
516 const char *hexDigitChars)
518 unsigned int result = count;
520 assert (count != 0 && count <= integerPartWidth / 4);
522 part >>= (integerPartWidth - 4 * count);
524 dst[count] = hexDigitChars[part & 0xf];
531 /* Write out an unsigned decimal integer. */
533 writeUnsignedDecimal (char *dst, unsigned int n)
549 /* Write out a signed decimal integer. */
551 writeSignedDecimal (char *dst, int value)
555 dst = writeUnsignedDecimal(dst, -(unsigned) value);
557 dst = writeUnsignedDecimal(dst, value);
565 APFloat::initialize(const fltSemantics *ourSemantics)
569 semantics = ourSemantics;
572 significand.parts = new integerPart[count];
576 APFloat::freeSignificand()
579 delete [] significand.parts;
583 APFloat::assign(const APFloat &rhs)
585 assert(semantics == rhs.semantics);
588 category = rhs.category;
589 exponent = rhs.exponent;
591 exponent2 = rhs.exponent2;
592 if(category == fcNormal || category == fcNaN)
593 copySignificand(rhs);
597 APFloat::copySignificand(const APFloat &rhs)
599 assert(category == fcNormal || category == fcNaN);
600 assert(rhs.partCount() >= partCount());
602 APInt::tcAssign(significandParts(), rhs.significandParts(),
606 /* Make this number a NaN, with an arbitrary but deterministic value
607 for the significand. */
609 APFloat::makeNaN(void)
612 APInt::tcSet(significandParts(), ~0U, partCount());
616 APFloat::operator=(const APFloat &rhs)
619 if(semantics != rhs.semantics) {
621 initialize(rhs.semantics);
630 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
633 if (semantics != rhs.semantics ||
634 category != rhs.category ||
637 if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
640 if (category==fcZero || category==fcInfinity)
642 else if (category==fcNormal && exponent!=rhs.exponent)
644 else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
645 exponent2!=rhs.exponent2)
649 const integerPart* p=significandParts();
650 const integerPart* q=rhs.significandParts();
651 for (; i>0; i--, p++, q++) {
659 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
661 assertArithmeticOK(ourSemantics);
662 initialize(&ourSemantics);
665 exponent = ourSemantics.precision - 1;
666 significandParts()[0] = value;
667 normalize(rmNearestTiesToEven, lfExactlyZero);
670 APFloat::APFloat(const fltSemantics &ourSemantics,
671 fltCategory ourCategory, bool negative)
673 assertArithmeticOK(ourSemantics);
674 initialize(&ourSemantics);
675 category = ourCategory;
677 if(category == fcNormal)
679 else if (ourCategory == fcNaN)
683 APFloat::APFloat(const fltSemantics &ourSemantics, const char *text)
685 assertArithmeticOK(ourSemantics);
686 initialize(&ourSemantics);
687 convertFromString(text, rmNearestTiesToEven);
690 APFloat::APFloat(const APFloat &rhs)
692 initialize(rhs.semantics);
701 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
702 void APFloat::Profile(FoldingSetNodeID& ID) const {
703 ID.Add(bitcastToAPInt());
707 APFloat::partCount() const
709 return partCountForBits(semantics->precision + 1);
713 APFloat::semanticsPrecision(const fltSemantics &semantics)
715 return semantics.precision;
719 APFloat::significandParts() const
721 return const_cast<APFloat *>(this)->significandParts();
725 APFloat::significandParts()
727 assert(category == fcNormal || category == fcNaN);
730 return significand.parts;
732 return &significand.part;
736 APFloat::zeroSignificand()
739 APInt::tcSet(significandParts(), 0, partCount());
742 /* Increment an fcNormal floating point number's significand. */
744 APFloat::incrementSignificand()
748 carry = APInt::tcIncrement(significandParts(), partCount());
750 /* Our callers should never cause us to overflow. */
754 /* Add the significand of the RHS. Returns the carry flag. */
756 APFloat::addSignificand(const APFloat &rhs)
760 parts = significandParts();
762 assert(semantics == rhs.semantics);
763 assert(exponent == rhs.exponent);
765 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
768 /* Subtract the significand of the RHS with a borrow flag. Returns
771 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
775 parts = significandParts();
777 assert(semantics == rhs.semantics);
778 assert(exponent == rhs.exponent);
780 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
784 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
785 on to the full-precision result of the multiplication. Returns the
788 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
790 unsigned int omsb; // One, not zero, based MSB.
791 unsigned int partsCount, newPartsCount, precision;
792 integerPart *lhsSignificand;
793 integerPart scratch[4];
794 integerPart *fullSignificand;
795 lostFraction lost_fraction;
798 assert(semantics == rhs.semantics);
800 precision = semantics->precision;
801 newPartsCount = partCountForBits(precision * 2);
803 if(newPartsCount > 4)
804 fullSignificand = new integerPart[newPartsCount];
806 fullSignificand = scratch;
808 lhsSignificand = significandParts();
809 partsCount = partCount();
811 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
812 rhs.significandParts(), partsCount, partsCount);
814 lost_fraction = lfExactlyZero;
815 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
816 exponent += rhs.exponent;
819 Significand savedSignificand = significand;
820 const fltSemantics *savedSemantics = semantics;
821 fltSemantics extendedSemantics;
823 unsigned int extendedPrecision;
825 /* Normalize our MSB. */
826 extendedPrecision = precision + precision - 1;
827 if(omsb != extendedPrecision)
829 APInt::tcShiftLeft(fullSignificand, newPartsCount,
830 extendedPrecision - omsb);
831 exponent -= extendedPrecision - omsb;
834 /* Create new semantics. */
835 extendedSemantics = *semantics;
836 extendedSemantics.precision = extendedPrecision;
838 if(newPartsCount == 1)
839 significand.part = fullSignificand[0];
841 significand.parts = fullSignificand;
842 semantics = &extendedSemantics;
844 APFloat extendedAddend(*addend);
845 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
846 assert(status == opOK);
847 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
849 /* Restore our state. */
850 if(newPartsCount == 1)
851 fullSignificand[0] = significand.part;
852 significand = savedSignificand;
853 semantics = savedSemantics;
855 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
858 exponent -= (precision - 1);
860 if(omsb > precision) {
861 unsigned int bits, significantParts;
864 bits = omsb - precision;
865 significantParts = partCountForBits(omsb);
866 lf = shiftRight(fullSignificand, significantParts, bits);
867 lost_fraction = combineLostFractions(lf, lost_fraction);
871 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
873 if(newPartsCount > 4)
874 delete [] fullSignificand;
876 return lost_fraction;
879 /* Multiply the significands of LHS and RHS to DST. */
881 APFloat::divideSignificand(const APFloat &rhs)
883 unsigned int bit, i, partsCount;
884 const integerPart *rhsSignificand;
885 integerPart *lhsSignificand, *dividend, *divisor;
886 integerPart scratch[4];
887 lostFraction lost_fraction;
889 assert(semantics == rhs.semantics);
891 lhsSignificand = significandParts();
892 rhsSignificand = rhs.significandParts();
893 partsCount = partCount();
896 dividend = new integerPart[partsCount * 2];
900 divisor = dividend + partsCount;
902 /* Copy the dividend and divisor as they will be modified in-place. */
903 for(i = 0; i < partsCount; i++) {
904 dividend[i] = lhsSignificand[i];
905 divisor[i] = rhsSignificand[i];
906 lhsSignificand[i] = 0;
909 exponent -= rhs.exponent;
911 unsigned int precision = semantics->precision;
913 /* Normalize the divisor. */
914 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
917 APInt::tcShiftLeft(divisor, partsCount, bit);
920 /* Normalize the dividend. */
921 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
924 APInt::tcShiftLeft(dividend, partsCount, bit);
927 /* Ensure the dividend >= divisor initially for the loop below.
928 Incidentally, this means that the division loop below is
929 guaranteed to set the integer bit to one. */
930 if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
932 APInt::tcShiftLeft(dividend, partsCount, 1);
933 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
937 for(bit = precision; bit; bit -= 1) {
938 if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
939 APInt::tcSubtract(dividend, divisor, 0, partsCount);
940 APInt::tcSetBit(lhsSignificand, bit - 1);
943 APInt::tcShiftLeft(dividend, partsCount, 1);
946 /* Figure out the lost fraction. */
947 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
950 lost_fraction = lfMoreThanHalf;
952 lost_fraction = lfExactlyHalf;
953 else if(APInt::tcIsZero(dividend, partsCount))
954 lost_fraction = lfExactlyZero;
956 lost_fraction = lfLessThanHalf;
961 return lost_fraction;
965 APFloat::significandMSB() const
967 return APInt::tcMSB(significandParts(), partCount());
971 APFloat::significandLSB() const
973 return APInt::tcLSB(significandParts(), partCount());
976 /* Note that a zero result is NOT normalized to fcZero. */
978 APFloat::shiftSignificandRight(unsigned int bits)
980 /* Our exponent should not overflow. */
981 assert((exponent_t) (exponent + bits) >= exponent);
985 return shiftRight(significandParts(), partCount(), bits);
988 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
990 APFloat::shiftSignificandLeft(unsigned int bits)
992 assert(bits < semantics->precision);
995 unsigned int partsCount = partCount();
997 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1000 assert(!APInt::tcIsZero(significandParts(), partsCount));
1005 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1009 assert(semantics == rhs.semantics);
1010 assert(category == fcNormal);
1011 assert(rhs.category == fcNormal);
1013 compare = exponent - rhs.exponent;
1015 /* If exponents are equal, do an unsigned bignum comparison of the
1018 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1022 return cmpGreaterThan;
1023 else if(compare < 0)
1029 /* Handle overflow. Sign is preserved. We either become infinity or
1030 the largest finite number. */
1032 APFloat::handleOverflow(roundingMode rounding_mode)
1035 if(rounding_mode == rmNearestTiesToEven
1036 || rounding_mode == rmNearestTiesToAway
1037 || (rounding_mode == rmTowardPositive && !sign)
1038 || (rounding_mode == rmTowardNegative && sign))
1040 category = fcInfinity;
1041 return (opStatus) (opOverflow | opInexact);
1044 /* Otherwise we become the largest finite number. */
1045 category = fcNormal;
1046 exponent = semantics->maxExponent;
1047 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1048 semantics->precision);
1053 /* Returns TRUE if, when truncating the current number, with BIT the
1054 new LSB, with the given lost fraction and rounding mode, the result
1055 would need to be rounded away from zero (i.e., by increasing the
1056 signficand). This routine must work for fcZero of both signs, and
1057 fcNormal numbers. */
1059 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1060 lostFraction lost_fraction,
1061 unsigned int bit) const
1063 /* NaNs and infinities should not have lost fractions. */
1064 assert(category == fcNormal || category == fcZero);
1066 /* Current callers never pass this so we don't handle it. */
1067 assert(lost_fraction != lfExactlyZero);
1069 switch(rounding_mode) {
1073 case rmNearestTiesToAway:
1074 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1076 case rmNearestTiesToEven:
1077 if(lost_fraction == lfMoreThanHalf)
1080 /* Our zeroes don't have a significand to test. */
1081 if(lost_fraction == lfExactlyHalf && category != fcZero)
1082 return APInt::tcExtractBit(significandParts(), bit);
1089 case rmTowardPositive:
1090 return sign == false;
1092 case rmTowardNegative:
1093 return sign == true;
1098 APFloat::normalize(roundingMode rounding_mode,
1099 lostFraction lost_fraction)
1101 unsigned int omsb; /* One, not zero, based MSB. */
1104 if(category != fcNormal)
1107 /* Before rounding normalize the exponent of fcNormal numbers. */
1108 omsb = significandMSB() + 1;
1111 /* OMSB is numbered from 1. We want to place it in the integer
1112 bit numbered PRECISON if possible, with a compensating change in
1114 exponentChange = omsb - semantics->precision;
1116 /* If the resulting exponent is too high, overflow according to
1117 the rounding mode. */
1118 if(exponent + exponentChange > semantics->maxExponent)
1119 return handleOverflow(rounding_mode);
1121 /* Subnormal numbers have exponent minExponent, and their MSB
1122 is forced based on that. */
1123 if(exponent + exponentChange < semantics->minExponent)
1124 exponentChange = semantics->minExponent - exponent;
1126 /* Shifting left is easy as we don't lose precision. */
1127 if(exponentChange < 0) {
1128 assert(lost_fraction == lfExactlyZero);
1130 shiftSignificandLeft(-exponentChange);
1135 if(exponentChange > 0) {
1138 /* Shift right and capture any new lost fraction. */
1139 lf = shiftSignificandRight(exponentChange);
1141 lost_fraction = combineLostFractions(lf, lost_fraction);
1143 /* Keep OMSB up-to-date. */
1144 if(omsb > (unsigned) exponentChange)
1145 omsb -= exponentChange;
1151 /* Now round the number according to rounding_mode given the lost
1154 /* As specified in IEEE 754, since we do not trap we do not report
1155 underflow for exact results. */
1156 if(lost_fraction == lfExactlyZero) {
1157 /* Canonicalize zeroes. */
1164 /* Increment the significand if we're rounding away from zero. */
1165 if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1167 exponent = semantics->minExponent;
1169 incrementSignificand();
1170 omsb = significandMSB() + 1;
1172 /* Did the significand increment overflow? */
1173 if(omsb == (unsigned) semantics->precision + 1) {
1174 /* Renormalize by incrementing the exponent and shifting our
1175 significand right one. However if we already have the
1176 maximum exponent we overflow to infinity. */
1177 if(exponent == semantics->maxExponent) {
1178 category = fcInfinity;
1180 return (opStatus) (opOverflow | opInexact);
1183 shiftSignificandRight(1);
1189 /* The normal case - we were and are not denormal, and any
1190 significand increment above didn't overflow. */
1191 if(omsb == semantics->precision)
1194 /* We have a non-zero denormal. */
1195 assert(omsb < semantics->precision);
1197 /* Canonicalize zeroes. */
1201 /* The fcZero case is a denormal that underflowed to zero. */
1202 return (opStatus) (opUnderflow | opInexact);
1206 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1208 switch(convolve(category, rhs.category)) {
1212 case convolve(fcNaN, fcZero):
1213 case convolve(fcNaN, fcNormal):
1214 case convolve(fcNaN, fcInfinity):
1215 case convolve(fcNaN, fcNaN):
1216 case convolve(fcNormal, fcZero):
1217 case convolve(fcInfinity, fcNormal):
1218 case convolve(fcInfinity, fcZero):
1221 case convolve(fcZero, fcNaN):
1222 case convolve(fcNormal, fcNaN):
1223 case convolve(fcInfinity, fcNaN):
1225 copySignificand(rhs);
1228 case convolve(fcNormal, fcInfinity):
1229 case convolve(fcZero, fcInfinity):
1230 category = fcInfinity;
1231 sign = rhs.sign ^ subtract;
1234 case convolve(fcZero, fcNormal):
1236 sign = rhs.sign ^ subtract;
1239 case convolve(fcZero, fcZero):
1240 /* Sign depends on rounding mode; handled by caller. */
1243 case convolve(fcInfinity, fcInfinity):
1244 /* Differently signed infinities can only be validly
1246 if((sign ^ rhs.sign) != subtract) {
1253 case convolve(fcNormal, fcNormal):
1258 /* Add or subtract two normal numbers. */
1260 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1263 lostFraction lost_fraction;
1266 /* Determine if the operation on the absolute values is effectively
1267 an addition or subtraction. */
1268 subtract ^= (sign ^ rhs.sign) ? true : false;
1270 /* Are we bigger exponent-wise than the RHS? */
1271 bits = exponent - rhs.exponent;
1273 /* Subtraction is more subtle than one might naively expect. */
1275 APFloat temp_rhs(rhs);
1279 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1280 lost_fraction = lfExactlyZero;
1281 } else if (bits > 0) {
1282 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1283 shiftSignificandLeft(1);
1286 lost_fraction = shiftSignificandRight(-bits - 1);
1287 temp_rhs.shiftSignificandLeft(1);
1292 carry = temp_rhs.subtractSignificand
1293 (*this, lost_fraction != lfExactlyZero);
1294 copySignificand(temp_rhs);
1297 carry = subtractSignificand
1298 (temp_rhs, lost_fraction != lfExactlyZero);
1301 /* Invert the lost fraction - it was on the RHS and
1303 if(lost_fraction == lfLessThanHalf)
1304 lost_fraction = lfMoreThanHalf;
1305 else if(lost_fraction == lfMoreThanHalf)
1306 lost_fraction = lfLessThanHalf;
1308 /* The code above is intended to ensure that no borrow is
1313 APFloat temp_rhs(rhs);
1315 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1316 carry = addSignificand(temp_rhs);
1318 lost_fraction = shiftSignificandRight(-bits);
1319 carry = addSignificand(rhs);
1322 /* We have a guard bit; generating a carry cannot happen. */
1326 return lost_fraction;
1330 APFloat::multiplySpecials(const APFloat &rhs)
1332 switch(convolve(category, rhs.category)) {
1336 case convolve(fcNaN, fcZero):
1337 case convolve(fcNaN, fcNormal):
1338 case convolve(fcNaN, fcInfinity):
1339 case convolve(fcNaN, fcNaN):
1342 case convolve(fcZero, fcNaN):
1343 case convolve(fcNormal, fcNaN):
1344 case convolve(fcInfinity, fcNaN):
1346 copySignificand(rhs);
1349 case convolve(fcNormal, fcInfinity):
1350 case convolve(fcInfinity, fcNormal):
1351 case convolve(fcInfinity, fcInfinity):
1352 category = fcInfinity;
1355 case convolve(fcZero, fcNormal):
1356 case convolve(fcNormal, fcZero):
1357 case convolve(fcZero, fcZero):
1361 case convolve(fcZero, fcInfinity):
1362 case convolve(fcInfinity, fcZero):
1366 case convolve(fcNormal, fcNormal):
1372 APFloat::divideSpecials(const APFloat &rhs)
1374 switch(convolve(category, rhs.category)) {
1378 case convolve(fcNaN, fcZero):
1379 case convolve(fcNaN, fcNormal):
1380 case convolve(fcNaN, fcInfinity):
1381 case convolve(fcNaN, fcNaN):
1382 case convolve(fcInfinity, fcZero):
1383 case convolve(fcInfinity, fcNormal):
1384 case convolve(fcZero, fcInfinity):
1385 case convolve(fcZero, fcNormal):
1388 case convolve(fcZero, fcNaN):
1389 case convolve(fcNormal, fcNaN):
1390 case convolve(fcInfinity, fcNaN):
1392 copySignificand(rhs);
1395 case convolve(fcNormal, fcInfinity):
1399 case convolve(fcNormal, fcZero):
1400 category = fcInfinity;
1403 case convolve(fcInfinity, fcInfinity):
1404 case convolve(fcZero, fcZero):
1408 case convolve(fcNormal, fcNormal):
1415 APFloat::changeSign()
1417 /* Look mummy, this one's easy. */
1422 APFloat::clearSign()
1424 /* So is this one. */
1429 APFloat::copySign(const APFloat &rhs)
1435 /* Normalized addition or subtraction. */
1437 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1442 assertArithmeticOK(*semantics);
1444 fs = addOrSubtractSpecials(rhs, subtract);
1446 /* This return code means it was not a simple case. */
1447 if(fs == opDivByZero) {
1448 lostFraction lost_fraction;
1450 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1451 fs = normalize(rounding_mode, lost_fraction);
1453 /* Can only be zero if we lost no fraction. */
1454 assert(category != fcZero || lost_fraction == lfExactlyZero);
1457 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1458 positive zero unless rounding to minus infinity, except that
1459 adding two like-signed zeroes gives that zero. */
1460 if(category == fcZero) {
1461 if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
1462 sign = (rounding_mode == rmTowardNegative);
1468 /* Normalized addition. */
1470 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1472 return addOrSubtract(rhs, rounding_mode, false);
1475 /* Normalized subtraction. */
1477 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1479 return addOrSubtract(rhs, rounding_mode, true);
1482 /* Normalized multiply. */
1484 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1488 assertArithmeticOK(*semantics);
1490 fs = multiplySpecials(rhs);
1492 if(category == fcNormal) {
1493 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1494 fs = normalize(rounding_mode, lost_fraction);
1495 if(lost_fraction != lfExactlyZero)
1496 fs = (opStatus) (fs | opInexact);
1502 /* Normalized divide. */
1504 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1508 assertArithmeticOK(*semantics);
1510 fs = divideSpecials(rhs);
1512 if(category == fcNormal) {
1513 lostFraction lost_fraction = divideSignificand(rhs);
1514 fs = normalize(rounding_mode, lost_fraction);
1515 if(lost_fraction != lfExactlyZero)
1516 fs = (opStatus) (fs | opInexact);
1522 /* Normalized remainder. This is not currently doing TRT. */
1524 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1528 unsigned int origSign = sign;
1530 assertArithmeticOK(*semantics);
1531 fs = V.divide(rhs, rmNearestTiesToEven);
1532 if (fs == opDivByZero)
1535 int parts = partCount();
1536 integerPart *x = new integerPart[parts];
1538 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1539 rmNearestTiesToEven, &ignored);
1540 if (fs==opInvalidOp)
1543 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1544 rmNearestTiesToEven);
1545 assert(fs==opOK); // should always work
1547 fs = V.multiply(rhs, rounding_mode);
1548 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1550 fs = subtract(V, rounding_mode);
1551 assert(fs==opOK || fs==opInexact); // likewise
1554 sign = origSign; // IEEE754 requires this
1559 /* Normalized fused-multiply-add. */
1561 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1562 const APFloat &addend,
1563 roundingMode rounding_mode)
1567 assertArithmeticOK(*semantics);
1569 /* Post-multiplication sign, before addition. */
1570 sign ^= multiplicand.sign;
1572 /* If and only if all arguments are normal do we need to do an
1573 extended-precision calculation. */
1574 if(category == fcNormal
1575 && multiplicand.category == fcNormal
1576 && addend.category == fcNormal) {
1577 lostFraction lost_fraction;
1579 lost_fraction = multiplySignificand(multiplicand, &addend);
1580 fs = normalize(rounding_mode, lost_fraction);
1581 if(lost_fraction != lfExactlyZero)
1582 fs = (opStatus) (fs | opInexact);
1584 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1585 positive zero unless rounding to minus infinity, except that
1586 adding two like-signed zeroes gives that zero. */
1587 if(category == fcZero && sign != addend.sign)
1588 sign = (rounding_mode == rmTowardNegative);
1590 fs = multiplySpecials(multiplicand);
1592 /* FS can only be opOK or opInvalidOp. There is no more work
1593 to do in the latter case. The IEEE-754R standard says it is
1594 implementation-defined in this case whether, if ADDEND is a
1595 quiet NaN, we raise invalid op; this implementation does so.
1597 If we need to do the addition we can do so with normal
1600 fs = addOrSubtract(addend, rounding_mode, false);
1606 /* Comparison requires normalized numbers. */
1608 APFloat::compare(const APFloat &rhs) const
1612 assertArithmeticOK(*semantics);
1613 assert(semantics == rhs.semantics);
1615 switch(convolve(category, rhs.category)) {
1619 case convolve(fcNaN, fcZero):
1620 case convolve(fcNaN, fcNormal):
1621 case convolve(fcNaN, fcInfinity):
1622 case convolve(fcNaN, fcNaN):
1623 case convolve(fcZero, fcNaN):
1624 case convolve(fcNormal, fcNaN):
1625 case convolve(fcInfinity, fcNaN):
1626 return cmpUnordered;
1628 case convolve(fcInfinity, fcNormal):
1629 case convolve(fcInfinity, fcZero):
1630 case convolve(fcNormal, fcZero):
1634 return cmpGreaterThan;
1636 case convolve(fcNormal, fcInfinity):
1637 case convolve(fcZero, fcInfinity):
1638 case convolve(fcZero, fcNormal):
1640 return cmpGreaterThan;
1644 case convolve(fcInfinity, fcInfinity):
1645 if(sign == rhs.sign)
1650 return cmpGreaterThan;
1652 case convolve(fcZero, fcZero):
1655 case convolve(fcNormal, fcNormal):
1659 /* Two normal numbers. Do they have the same sign? */
1660 if(sign != rhs.sign) {
1662 result = cmpLessThan;
1664 result = cmpGreaterThan;
1666 /* Compare absolute values; invert result if negative. */
1667 result = compareAbsoluteValue(rhs);
1670 if(result == cmpLessThan)
1671 result = cmpGreaterThan;
1672 else if(result == cmpGreaterThan)
1673 result = cmpLessThan;
1680 /// APFloat::convert - convert a value of one floating point type to another.
1681 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1682 /// records whether the transformation lost information, i.e. whether
1683 /// converting the result back to the original type will produce the
1684 /// original value (this is almost the same as return value==fsOK, but there
1685 /// are edge cases where this is not so).
1688 APFloat::convert(const fltSemantics &toSemantics,
1689 roundingMode rounding_mode, bool *losesInfo)
1691 lostFraction lostFraction;
1692 unsigned int newPartCount, oldPartCount;
1695 assertArithmeticOK(*semantics);
1696 assertArithmeticOK(toSemantics);
1697 lostFraction = lfExactlyZero;
1698 newPartCount = partCountForBits(toSemantics.precision + 1);
1699 oldPartCount = partCount();
1701 /* Handle storage complications. If our new form is wider,
1702 re-allocate our bit pattern into wider storage. If it is
1703 narrower, we ignore the excess parts, but if narrowing to a
1704 single part we need to free the old storage.
1705 Be careful not to reference significandParts for zeroes
1706 and infinities, since it aborts. */
1707 if (newPartCount > oldPartCount) {
1708 integerPart *newParts;
1709 newParts = new integerPart[newPartCount];
1710 APInt::tcSet(newParts, 0, newPartCount);
1711 if (category==fcNormal || category==fcNaN)
1712 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1714 significand.parts = newParts;
1715 } else if (newPartCount < oldPartCount) {
1716 /* Capture any lost fraction through truncation of parts so we get
1717 correct rounding whilst normalizing. */
1718 if (category==fcNormal)
1719 lostFraction = lostFractionThroughTruncation
1720 (significandParts(), oldPartCount, toSemantics.precision);
1721 if (newPartCount == 1) {
1722 integerPart newPart = 0;
1723 if (category==fcNormal || category==fcNaN)
1724 newPart = significandParts()[0];
1726 significand.part = newPart;
1730 if(category == fcNormal) {
1731 /* Re-interpret our bit-pattern. */
1732 exponent += toSemantics.precision - semantics->precision;
1733 semantics = &toSemantics;
1734 fs = normalize(rounding_mode, lostFraction);
1735 *losesInfo = (fs != opOK);
1736 } else if (category == fcNaN) {
1737 int shift = toSemantics.precision - semantics->precision;
1738 // Do this now so significandParts gets the right answer
1739 const fltSemantics *oldSemantics = semantics;
1740 semantics = &toSemantics;
1742 // No normalization here, just truncate
1744 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1745 else if (shift < 0) {
1746 unsigned ushift = -shift;
1747 // Figure out if we are losing information. This happens
1748 // if are shifting out something other than 0s, or if the x87 long
1749 // double input did not have its integer bit set (pseudo-NaN), or if the
1750 // x87 long double input did not have its QNan bit set (because the x87
1751 // hardware sets this bit when converting a lower-precision NaN to
1752 // x87 long double).
1753 if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
1755 if (oldSemantics == &APFloat::x87DoubleExtended &&
1756 (!(*significandParts() & 0x8000000000000000ULL) ||
1757 !(*significandParts() & 0x4000000000000000ULL)))
1759 APInt::tcShiftRight(significandParts(), newPartCount, ushift);
1761 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1762 // does not give you back the same bits. This is dubious, and we
1763 // don't currently do it. You're really supposed to get
1764 // an invalid operation signal at runtime, but nobody does that.
1767 semantics = &toSemantics;
1775 /* Convert a floating point number to an integer according to the
1776 rounding mode. If the rounded integer value is out of range this
1777 returns an invalid operation exception and the contents of the
1778 destination parts are unspecified. If the rounded value is in
1779 range but the floating point number is not the exact integer, the C
1780 standard doesn't require an inexact exception to be raised. IEEE
1781 854 does require it so we do that.
1783 Note that for conversions to integer type the C standard requires
1784 round-to-zero to always be used. */
1786 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1788 roundingMode rounding_mode,
1789 bool *isExact) const
1791 lostFraction lost_fraction;
1792 const integerPart *src;
1793 unsigned int dstPartsCount, truncatedBits;
1795 if (!isArithmeticOk(*semantics))
1800 /* Handle the three special cases first. */
1801 if(category == fcInfinity || category == fcNaN)
1804 dstPartsCount = partCountForBits(width);
1806 if(category == fcZero) {
1807 APInt::tcSet(parts, 0, dstPartsCount);
1808 // Negative zero can't be represented as an int.
1813 src = significandParts();
1815 /* Step 1: place our absolute value, with any fraction truncated, in
1818 /* Our absolute value is less than one; truncate everything. */
1819 APInt::tcSet(parts, 0, dstPartsCount);
1820 truncatedBits = semantics->precision;
1822 /* We want the most significant (exponent + 1) bits; the rest are
1824 unsigned int bits = exponent + 1U;
1826 /* Hopelessly large in magnitude? */
1830 if (bits < semantics->precision) {
1831 /* We truncate (semantics->precision - bits) bits. */
1832 truncatedBits = semantics->precision - bits;
1833 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
1835 /* We want at least as many bits as are available. */
1836 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
1837 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
1842 /* Step 2: work out any lost fraction, and increment the absolute
1843 value if we would round away from zero. */
1844 if (truncatedBits) {
1845 lost_fraction = lostFractionThroughTruncation(src, partCount(),
1847 if (lost_fraction != lfExactlyZero
1848 && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
1849 if (APInt::tcIncrement(parts, dstPartsCount))
1850 return opInvalidOp; /* Overflow. */
1853 lost_fraction = lfExactlyZero;
1856 /* Step 3: check if we fit in the destination. */
1857 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
1861 /* Negative numbers cannot be represented as unsigned. */
1865 /* It takes omsb bits to represent the unsigned integer value.
1866 We lose a bit for the sign, but care is needed as the
1867 maximally negative integer is a special case. */
1868 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
1871 /* This case can happen because of rounding. */
1876 APInt::tcNegate (parts, dstPartsCount);
1878 if (omsb >= width + !isSigned)
1882 if (lost_fraction == lfExactlyZero) {
1889 /* Same as convertToSignExtendedInteger, except we provide
1890 deterministic values in case of an invalid operation exception,
1891 namely zero for NaNs and the minimal or maximal value respectively
1892 for underflow or overflow.
1893 The *isExact output tells whether the result is exact, in the sense
1894 that converting it back to the original floating point type produces
1895 the original value. This is almost equivalent to result==opOK,
1896 except for negative zeroes.
1899 APFloat::convertToInteger(integerPart *parts, unsigned int width,
1901 roundingMode rounding_mode, bool *isExact) const
1905 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
1908 if (fs == opInvalidOp) {
1909 unsigned int bits, dstPartsCount;
1911 dstPartsCount = partCountForBits(width);
1913 if (category == fcNaN)
1918 bits = width - isSigned;
1920 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
1921 if (sign && isSigned)
1922 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
1928 /* Convert an unsigned integer SRC to a floating point number,
1929 rounding according to ROUNDING_MODE. The sign of the floating
1930 point number is not modified. */
1932 APFloat::convertFromUnsignedParts(const integerPart *src,
1933 unsigned int srcCount,
1934 roundingMode rounding_mode)
1936 unsigned int omsb, precision, dstCount;
1938 lostFraction lost_fraction;
1940 assertArithmeticOK(*semantics);
1941 category = fcNormal;
1942 omsb = APInt::tcMSB(src, srcCount) + 1;
1943 dst = significandParts();
1944 dstCount = partCount();
1945 precision = semantics->precision;
1947 /* We want the most significant PRECISON bits of SRC. There may not
1948 be that many; extract what we can. */
1949 if (precision <= omsb) {
1950 exponent = omsb - 1;
1951 lost_fraction = lostFractionThroughTruncation(src, srcCount,
1953 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
1955 exponent = precision - 1;
1956 lost_fraction = lfExactlyZero;
1957 APInt::tcExtract(dst, dstCount, src, omsb, 0);
1960 return normalize(rounding_mode, lost_fraction);
1964 APFloat::convertFromAPInt(const APInt &Val,
1966 roundingMode rounding_mode)
1968 unsigned int partCount = Val.getNumWords();
1972 if (isSigned && api.isNegative()) {
1977 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
1980 /* Convert a two's complement integer SRC to a floating point number,
1981 rounding according to ROUNDING_MODE. ISSIGNED is true if the
1982 integer is signed, in which case it must be sign-extended. */
1984 APFloat::convertFromSignExtendedInteger(const integerPart *src,
1985 unsigned int srcCount,
1987 roundingMode rounding_mode)
1991 assertArithmeticOK(*semantics);
1993 && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
1996 /* If we're signed and negative negate a copy. */
1998 copy = new integerPart[srcCount];
1999 APInt::tcAssign(copy, src, srcCount);
2000 APInt::tcNegate(copy, srcCount);
2001 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2005 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2011 /* FIXME: should this just take a const APInt reference? */
2013 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2014 unsigned int width, bool isSigned,
2015 roundingMode rounding_mode)
2017 unsigned int partCount = partCountForBits(width);
2018 APInt api = APInt(width, partCount, parts);
2021 if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
2026 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2030 APFloat::convertFromHexadecimalString(const char *p,
2031 roundingMode rounding_mode)
2033 lostFraction lost_fraction;
2034 integerPart *significand;
2035 unsigned int bitPos, partsCount;
2036 const char *dot, *firstSignificantDigit;
2040 category = fcNormal;
2042 significand = significandParts();
2043 partsCount = partCount();
2044 bitPos = partsCount * integerPartWidth;
2046 /* Skip leading zeroes and any (hexa)decimal point. */
2047 p = skipLeadingZeroesAndAnyDot(p, &dot);
2048 firstSignificantDigit = p;
2051 integerPart hex_value;
2058 hex_value = hexDigitValue(*p);
2059 if(hex_value == -1U) {
2060 lost_fraction = lfExactlyZero;
2066 /* Store the number whilst 4-bit nibbles remain. */
2069 hex_value <<= bitPos % integerPartWidth;
2070 significand[bitPos / integerPartWidth] |= hex_value;
2072 lost_fraction = trailingHexadecimalFraction(p, hex_value);
2073 while(hexDigitValue(*p) != -1U)
2079 /* Hex floats require an exponent but not a hexadecimal point. */
2080 assert(*p == 'p' || *p == 'P');
2082 /* Ignore the exponent if we are zero. */
2083 if(p != firstSignificantDigit) {
2086 /* Implicit hexadecimal point? */
2090 /* Calculate the exponent adjustment implicit in the number of
2091 significant digits. */
2092 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2093 if(expAdjustment < 0)
2095 expAdjustment = expAdjustment * 4 - 1;
2097 /* Adjust for writing the significand starting at the most
2098 significant nibble. */
2099 expAdjustment += semantics->precision;
2100 expAdjustment -= partsCount * integerPartWidth;
2102 /* Adjust for the given exponent. */
2103 exponent = totalExponent(p, expAdjustment);
2106 return normalize(rounding_mode, lost_fraction);
2110 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2111 unsigned sigPartCount, int exp,
2112 roundingMode rounding_mode)
2114 unsigned int parts, pow5PartCount;
2115 fltSemantics calcSemantics = { 32767, -32767, 0, true };
2116 integerPart pow5Parts[maxPowerOfFiveParts];
2119 isNearest = (rounding_mode == rmNearestTiesToEven
2120 || rounding_mode == rmNearestTiesToAway);
2122 parts = partCountForBits(semantics->precision + 11);
2124 /* Calculate pow(5, abs(exp)). */
2125 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2127 for (;; parts *= 2) {
2128 opStatus sigStatus, powStatus;
2129 unsigned int excessPrecision, truncatedBits;
2131 calcSemantics.precision = parts * integerPartWidth - 1;
2132 excessPrecision = calcSemantics.precision - semantics->precision;
2133 truncatedBits = excessPrecision;
2135 APFloat decSig(calcSemantics, fcZero, sign);
2136 APFloat pow5(calcSemantics, fcZero, false);
2138 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2139 rmNearestTiesToEven);
2140 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2141 rmNearestTiesToEven);
2142 /* Add exp, as 10^n = 5^n * 2^n. */
2143 decSig.exponent += exp;
2145 lostFraction calcLostFraction;
2146 integerPart HUerr, HUdistance;
2147 unsigned int powHUerr;
2150 /* multiplySignificand leaves the precision-th bit set to 1. */
2151 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2152 powHUerr = powStatus != opOK;
2154 calcLostFraction = decSig.divideSignificand(pow5);
2155 /* Denormal numbers have less precision. */
2156 if (decSig.exponent < semantics->minExponent) {
2157 excessPrecision += (semantics->minExponent - decSig.exponent);
2158 truncatedBits = excessPrecision;
2159 if (excessPrecision > calcSemantics.precision)
2160 excessPrecision = calcSemantics.precision;
2162 /* Extra half-ulp lost in reciprocal of exponent. */
2163 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2166 /* Both multiplySignificand and divideSignificand return the
2167 result with the integer bit set. */
2168 assert (APInt::tcExtractBit
2169 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2171 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2173 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2174 excessPrecision, isNearest);
2176 /* Are we guaranteed to round correctly if we truncate? */
2177 if (HUdistance >= HUerr) {
2178 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2179 calcSemantics.precision - excessPrecision,
2181 /* Take the exponent of decSig. If we tcExtract-ed less bits
2182 above we must adjust our exponent to compensate for the
2183 implicit right shift. */
2184 exponent = (decSig.exponent + semantics->precision
2185 - (calcSemantics.precision - excessPrecision));
2186 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2189 return normalize(rounding_mode, calcLostFraction);
2195 APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode)
2200 /* Scan the text. */
2201 interpretDecimal(p, &D);
2203 /* Handle the quick cases. First the case of no significant digits,
2204 i.e. zero, and then exponents that are obviously too large or too
2205 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2206 definitely overflows if
2208 (exp - 1) * L >= maxExponent
2210 and definitely underflows to zero where
2212 (exp + 1) * L <= minExponent - precision
2214 With integer arithmetic the tightest bounds for L are
2216 93/28 < L < 196/59 [ numerator <= 256 ]
2217 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2220 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2223 } else if ((D.normalizedExponent + 1) * 28738
2224 <= 8651 * (semantics->minExponent - (int) semantics->precision)) {
2225 /* Underflow to zero and round. */
2227 fs = normalize(rounding_mode, lfLessThanHalf);
2228 } else if ((D.normalizedExponent - 1) * 42039
2229 >= 12655 * semantics->maxExponent) {
2230 /* Overflow and round. */
2231 fs = handleOverflow(rounding_mode);
2233 integerPart *decSignificand;
2234 unsigned int partCount;
2236 /* A tight upper bound on number of bits required to hold an
2237 N-digit decimal integer is N * 196 / 59. Allocate enough space
2238 to hold the full significand, and an extra part required by
2240 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2241 partCount = partCountForBits(1 + 196 * partCount / 59);
2242 decSignificand = new integerPart[partCount + 1];
2245 /* Convert to binary efficiently - we do almost all multiplication
2246 in an integerPart. When this would overflow do we do a single
2247 bignum multiplication, and then revert again to multiplication
2248 in an integerPart. */
2250 integerPart decValue, val, multiplier;
2259 decValue = decDigitValue(*p++);
2261 val = val * 10 + decValue;
2262 /* The maximum number that can be multiplied by ten with any
2263 digit added without overflowing an integerPart. */
2264 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2266 /* Multiply out the current part. */
2267 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2268 partCount, partCount + 1, false);
2270 /* If we used another part (likely but not guaranteed), increase
2272 if (decSignificand[partCount])
2274 } while (p <= D.lastSigDigit);
2276 category = fcNormal;
2277 fs = roundSignificandWithExponent(decSignificand, partCount,
2278 D.exponent, rounding_mode);
2280 delete [] decSignificand;
2287 APFloat::convertFromString(const char *p, roundingMode rounding_mode)
2289 assertArithmeticOK(*semantics);
2291 /* Handle a leading minus sign. */
2297 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
2298 return convertFromHexadecimalString(p + 2, rounding_mode);
2300 return convertFromDecimalString(p, rounding_mode);
2303 /* Write out a hexadecimal representation of the floating point value
2304 to DST, which must be of sufficient size, in the C99 form
2305 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2306 excluding the terminating NUL.
2308 If UPPERCASE, the output is in upper case, otherwise in lower case.
2310 HEXDIGITS digits appear altogether, rounding the value if
2311 necessary. If HEXDIGITS is 0, the minimal precision to display the
2312 number precisely is used instead. If nothing would appear after
2313 the decimal point it is suppressed.
2315 The decimal exponent is always printed and has at least one digit.
2316 Zero values display an exponent of zero. Infinities and NaNs
2317 appear as "infinity" or "nan" respectively.
2319 The above rules are as specified by C99. There is ambiguity about
2320 what the leading hexadecimal digit should be. This implementation
2321 uses whatever is necessary so that the exponent is displayed as
2322 stored. This implies the exponent will fall within the IEEE format
2323 range, and the leading hexadecimal digit will be 0 (for denormals),
2324 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2325 any other digits zero).
2328 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2329 bool upperCase, roundingMode rounding_mode) const
2333 assertArithmeticOK(*semantics);
2341 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2342 dst += sizeof infinityL - 1;
2346 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2347 dst += sizeof NaNU - 1;
2352 *dst++ = upperCase ? 'X': 'x';
2354 if (hexDigits > 1) {
2356 memset (dst, '0', hexDigits - 1);
2357 dst += hexDigits - 1;
2359 *dst++ = upperCase ? 'P': 'p';
2364 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2370 return static_cast<unsigned int>(dst - p);
2373 /* Does the hard work of outputting the correctly rounded hexadecimal
2374 form of a normal floating point number with the specified number of
2375 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2376 digits necessary to print the value precisely is output. */
2378 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2380 roundingMode rounding_mode) const
2382 unsigned int count, valueBits, shift, partsCount, outputDigits;
2383 const char *hexDigitChars;
2384 const integerPart *significand;
2389 *dst++ = upperCase ? 'X': 'x';
2392 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2394 significand = significandParts();
2395 partsCount = partCount();
2397 /* +3 because the first digit only uses the single integer bit, so
2398 we have 3 virtual zero most-significant-bits. */
2399 valueBits = semantics->precision + 3;
2400 shift = integerPartWidth - valueBits % integerPartWidth;
2402 /* The natural number of digits required ignoring trailing
2403 insignificant zeroes. */
2404 outputDigits = (valueBits - significandLSB () + 3) / 4;
2406 /* hexDigits of zero means use the required number for the
2407 precision. Otherwise, see if we are truncating. If we are,
2408 find out if we need to round away from zero. */
2410 if (hexDigits < outputDigits) {
2411 /* We are dropping non-zero bits, so need to check how to round.
2412 "bits" is the number of dropped bits. */
2414 lostFraction fraction;
2416 bits = valueBits - hexDigits * 4;
2417 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2418 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2420 outputDigits = hexDigits;
2423 /* Write the digits consecutively, and start writing in the location
2424 of the hexadecimal point. We move the most significant digit
2425 left and add the hexadecimal point later. */
2428 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2430 while (outputDigits && count) {
2433 /* Put the most significant integerPartWidth bits in "part". */
2434 if (--count == partsCount)
2435 part = 0; /* An imaginary higher zero part. */
2437 part = significand[count] << shift;
2440 part |= significand[count - 1] >> (integerPartWidth - shift);
2442 /* Convert as much of "part" to hexdigits as we can. */
2443 unsigned int curDigits = integerPartWidth / 4;
2445 if (curDigits > outputDigits)
2446 curDigits = outputDigits;
2447 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2448 outputDigits -= curDigits;
2454 /* Note that hexDigitChars has a trailing '0'. */
2457 *q = hexDigitChars[hexDigitValue (*q) + 1];
2458 } while (*q == '0');
2461 /* Add trailing zeroes. */
2462 memset (dst, '0', outputDigits);
2463 dst += outputDigits;
2466 /* Move the most significant digit to before the point, and if there
2467 is something after the decimal point add it. This must come
2468 after rounding above. */
2475 /* Finally output the exponent. */
2476 *dst++ = upperCase ? 'P': 'p';
2478 return writeSignedDecimal (dst, exponent);
2481 // For good performance it is desirable for different APFloats
2482 // to produce different integers.
2484 APFloat::getHashValue() const
2486 if (category==fcZero) return sign<<8 | semantics->precision ;
2487 else if (category==fcInfinity) return sign<<9 | semantics->precision;
2488 else if (category==fcNaN) return 1<<10 | semantics->precision;
2490 uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
2491 const integerPart* p = significandParts();
2492 for (int i=partCount(); i>0; i--, p++)
2493 hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
2498 // Conversion from APFloat to/from host float/double. It may eventually be
2499 // possible to eliminate these and have everybody deal with APFloats, but that
2500 // will take a while. This approach will not easily extend to long double.
2501 // Current implementation requires integerPartWidth==64, which is correct at
2502 // the moment but could be made more general.
2504 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2505 // the actual IEEE respresentations. We compensate for that here.
2508 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2510 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2511 assert (partCount()==2);
2513 uint64_t myexponent, mysignificand;
2515 if (category==fcNormal) {
2516 myexponent = exponent+16383; //bias
2517 mysignificand = significandParts()[0];
2518 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2519 myexponent = 0; // denormal
2520 } else if (category==fcZero) {
2523 } else if (category==fcInfinity) {
2524 myexponent = 0x7fff;
2525 mysignificand = 0x8000000000000000ULL;
2527 assert(category == fcNaN && "Unknown category");
2528 myexponent = 0x7fff;
2529 mysignificand = significandParts()[0];
2533 words[0] = ((uint64_t)(sign & 1) << 63) |
2534 ((myexponent & 0x7fffLL) << 48) |
2535 ((mysignificand >>16) & 0xffffffffffffLL);
2536 words[1] = mysignificand & 0xffff;
2537 return APInt(80, 2, words);
2541 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2543 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2544 assert (partCount()==2);
2546 uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
2548 if (category==fcNormal) {
2549 myexponent = exponent + 1023; //bias
2550 myexponent2 = exponent2 + 1023;
2551 mysignificand = significandParts()[0];
2552 mysignificand2 = significandParts()[1];
2553 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2554 myexponent = 0; // denormal
2555 if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
2556 myexponent2 = 0; // denormal
2557 } else if (category==fcZero) {
2562 } else if (category==fcInfinity) {
2568 assert(category == fcNaN && "Unknown category");
2570 mysignificand = significandParts()[0];
2571 myexponent2 = exponent2;
2572 mysignificand2 = significandParts()[1];
2576 words[0] = ((uint64_t)(sign & 1) << 63) |
2577 ((myexponent & 0x7ff) << 52) |
2578 (mysignificand & 0xfffffffffffffLL);
2579 words[1] = ((uint64_t)(sign2 & 1) << 63) |
2580 ((myexponent2 & 0x7ff) << 52) |
2581 (mysignificand2 & 0xfffffffffffffLL);
2582 return APInt(128, 2, words);
2586 APFloat::convertDoubleAPFloatToAPInt() const
2588 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2589 assert (partCount()==1);
2591 uint64_t myexponent, mysignificand;
2593 if (category==fcNormal) {
2594 myexponent = exponent+1023; //bias
2595 mysignificand = *significandParts();
2596 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2597 myexponent = 0; // denormal
2598 } else if (category==fcZero) {
2601 } else if (category==fcInfinity) {
2605 assert(category == fcNaN && "Unknown category!");
2607 mysignificand = *significandParts();
2610 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2611 ((myexponent & 0x7ff) << 52) |
2612 (mysignificand & 0xfffffffffffffLL))));
2616 APFloat::convertFloatAPFloatToAPInt() const
2618 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2619 assert (partCount()==1);
2621 uint32_t myexponent, mysignificand;
2623 if (category==fcNormal) {
2624 myexponent = exponent+127; //bias
2625 mysignificand = (uint32_t)*significandParts();
2626 if (myexponent == 1 && !(mysignificand & 0x800000))
2627 myexponent = 0; // denormal
2628 } else if (category==fcZero) {
2631 } else if (category==fcInfinity) {
2635 assert(category == fcNaN && "Unknown category!");
2637 mysignificand = (uint32_t)*significandParts();
2640 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2641 (mysignificand & 0x7fffff)));
2644 // This function creates an APInt that is just a bit map of the floating
2645 // point constant as it would appear in memory. It is not a conversion,
2646 // and treating the result as a normal integer is unlikely to be useful.
2649 APFloat::bitcastToAPInt() const
2651 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2652 return convertFloatAPFloatToAPInt();
2654 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2655 return convertDoubleAPFloatToAPInt();
2657 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2658 return convertPPCDoubleDoubleAPFloatToAPInt();
2660 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2662 return convertF80LongDoubleAPFloatToAPInt();
2666 APFloat::convertToFloat() const
2668 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2669 APInt api = bitcastToAPInt();
2670 return api.bitsToFloat();
2674 APFloat::convertToDouble() const
2676 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2677 APInt api = bitcastToAPInt();
2678 return api.bitsToDouble();
2681 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2682 /// does not support these bit patterns:
2683 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2684 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2685 /// exponent = 0, integer bit 1 ("pseudodenormal")
2686 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2687 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2689 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2691 assert(api.getBitWidth()==80);
2692 uint64_t i1 = api.getRawData()[0];
2693 uint64_t i2 = api.getRawData()[1];
2694 uint64_t myexponent = (i1 >> 48) & 0x7fff;
2695 uint64_t mysignificand = ((i1 << 16) & 0xffffffffffff0000ULL) |
2698 initialize(&APFloat::x87DoubleExtended);
2699 assert(partCount()==2);
2701 sign = static_cast<unsigned int>(i1>>63);
2702 if (myexponent==0 && mysignificand==0) {
2703 // exponent, significand meaningless
2705 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2706 // exponent, significand meaningless
2707 category = fcInfinity;
2708 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2709 // exponent meaningless
2711 significandParts()[0] = mysignificand;
2712 significandParts()[1] = 0;
2714 category = fcNormal;
2715 exponent = myexponent - 16383;
2716 significandParts()[0] = mysignificand;
2717 significandParts()[1] = 0;
2718 if (myexponent==0) // denormal
2724 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
2726 assert(api.getBitWidth()==128);
2727 uint64_t i1 = api.getRawData()[0];
2728 uint64_t i2 = api.getRawData()[1];
2729 uint64_t myexponent = (i1 >> 52) & 0x7ff;
2730 uint64_t mysignificand = i1 & 0xfffffffffffffLL;
2731 uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
2732 uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
2734 initialize(&APFloat::PPCDoubleDouble);
2735 assert(partCount()==2);
2737 sign = static_cast<unsigned int>(i1>>63);
2738 sign2 = static_cast<unsigned int>(i2>>63);
2739 if (myexponent==0 && mysignificand==0) {
2740 // exponent, significand meaningless
2741 // exponent2 and significand2 are required to be 0; we don't check
2743 } else if (myexponent==0x7ff && mysignificand==0) {
2744 // exponent, significand meaningless
2745 // exponent2 and significand2 are required to be 0; we don't check
2746 category = fcInfinity;
2747 } else if (myexponent==0x7ff && mysignificand!=0) {
2748 // exponent meaningless. So is the whole second word, but keep it
2751 exponent2 = myexponent2;
2752 significandParts()[0] = mysignificand;
2753 significandParts()[1] = mysignificand2;
2755 category = fcNormal;
2756 // Note there is no category2; the second word is treated as if it is
2757 // fcNormal, although it might be something else considered by itself.
2758 exponent = myexponent - 1023;
2759 exponent2 = myexponent2 - 1023;
2760 significandParts()[0] = mysignificand;
2761 significandParts()[1] = mysignificand2;
2762 if (myexponent==0) // denormal
2765 significandParts()[0] |= 0x10000000000000LL; // integer bit
2769 significandParts()[1] |= 0x10000000000000LL; // integer bit
2774 APFloat::initFromDoubleAPInt(const APInt &api)
2776 assert(api.getBitWidth()==64);
2777 uint64_t i = *api.getRawData();
2778 uint64_t myexponent = (i >> 52) & 0x7ff;
2779 uint64_t mysignificand = i & 0xfffffffffffffLL;
2781 initialize(&APFloat::IEEEdouble);
2782 assert(partCount()==1);
2784 sign = static_cast<unsigned int>(i>>63);
2785 if (myexponent==0 && mysignificand==0) {
2786 // exponent, significand meaningless
2788 } else if (myexponent==0x7ff && mysignificand==0) {
2789 // exponent, significand meaningless
2790 category = fcInfinity;
2791 } else if (myexponent==0x7ff && mysignificand!=0) {
2792 // exponent meaningless
2794 *significandParts() = mysignificand;
2796 category = fcNormal;
2797 exponent = myexponent - 1023;
2798 *significandParts() = mysignificand;
2799 if (myexponent==0) // denormal
2802 *significandParts() |= 0x10000000000000LL; // integer bit
2807 APFloat::initFromFloatAPInt(const APInt & api)
2809 assert(api.getBitWidth()==32);
2810 uint32_t i = (uint32_t)*api.getRawData();
2811 uint32_t myexponent = (i >> 23) & 0xff;
2812 uint32_t mysignificand = i & 0x7fffff;
2814 initialize(&APFloat::IEEEsingle);
2815 assert(partCount()==1);
2818 if (myexponent==0 && mysignificand==0) {
2819 // exponent, significand meaningless
2821 } else if (myexponent==0xff && mysignificand==0) {
2822 // exponent, significand meaningless
2823 category = fcInfinity;
2824 } else if (myexponent==0xff && mysignificand!=0) {
2825 // sign, exponent, significand meaningless
2827 *significandParts() = mysignificand;
2829 category = fcNormal;
2830 exponent = myexponent - 127; //bias
2831 *significandParts() = mysignificand;
2832 if (myexponent==0) // denormal
2835 *significandParts() |= 0x800000; // integer bit
2839 /// Treat api as containing the bits of a floating point number. Currently
2840 /// we infer the floating point type from the size of the APInt. The
2841 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
2842 /// when the size is anything else).
2844 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
2846 if (api.getBitWidth() == 32)
2847 return initFromFloatAPInt(api);
2848 else if (api.getBitWidth()==64)
2849 return initFromDoubleAPInt(api);
2850 else if (api.getBitWidth()==80)
2851 return initFromF80LongDoubleAPInt(api);
2852 else if (api.getBitWidth()==128 && !isIEEE)
2853 return initFromPPCDoubleDoubleAPInt(api);
2858 APFloat::APFloat(const APInt& api, bool isIEEE)
2860 initFromAPInt(api, isIEEE);
2863 APFloat::APFloat(float f)
2865 APInt api = APInt(32, 0);
2866 initFromAPInt(api.floatToBits(f));
2869 APFloat::APFloat(double d)
2871 APInt api = APInt(64, 0);
2872 initFromAPInt(api.doubleToBits(d));