1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/MathExtras.h"
27 #define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
29 /* Assumed in hexadecimal significand parsing, and conversion to
30 hexadecimal strings. */
31 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
32 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
36 /* Represents floating point arithmetic semantics. */
38 /* The largest E such that 2^E is representable; this matches the
39 definition of IEEE 754. */
40 exponent_t maxExponent;
42 /* The smallest E such that 2^E is a normalized number; this
43 matches the definition of IEEE 754. */
44 exponent_t minExponent;
46 /* Number of bits in the significand. This includes the integer
48 unsigned int precision;
51 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 };
52 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 };
53 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 };
54 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 };
55 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 };
56 const fltSemantics APFloat::Bogus = { 0, 0, 0 };
58 /* The PowerPC format consists of two doubles. It does not map cleanly
59 onto the usual format above. It is approximated using twice the
60 mantissa bits. Note that for exponents near the double minimum,
61 we no longer can represent the full 106 mantissa bits, so those
62 will be treated as denormal numbers.
64 FIXME: While this approximation is equivalent to what GCC uses for
65 compile-time arithmetic on PPC double-double numbers, it is not able
66 to represent all possible values held by a PPC double-double number,
67 for example: (long double) 1.0 + (long double) 0x1p-106
68 Should this be replaced by a full emulation of PPC double-double? */
69 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 };
71 /* A tight upper bound on number of parts required to hold the value
74 power * 815 / (351 * integerPartWidth) + 1
76 However, whilst the result may require only this many parts,
77 because we are multiplying two values to get it, the
78 multiplication may require an extra part with the excess part
79 being zero (consider the trivial case of 1 * 1, tcFullMultiply
80 requires two parts to hold the single-part result). So we add an
81 extra one to guarantee enough space whilst multiplying. */
82 const unsigned int maxExponent = 16383;
83 const unsigned int maxPrecision = 113;
84 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
85 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
86 / (351 * integerPartWidth));
89 /* A bunch of private, handy routines. */
91 static inline unsigned int
92 partCountForBits(unsigned int bits)
94 return ((bits) + integerPartWidth - 1) / integerPartWidth;
97 /* Returns 0U-9U. Return values >= 10U are not digits. */
98 static inline unsigned int
99 decDigitValue(unsigned int c)
105 hexDigitValue(unsigned int c)
124 /* Return the value of a decimal exponent of the form
127 If the exponent overflows, returns a large exponent with the
130 readExponent(StringRef::iterator begin, StringRef::iterator end)
133 unsigned int absExponent;
134 const unsigned int overlargeExponent = 24000; /* FIXME. */
135 StringRef::iterator p = begin;
137 assert(p != end && "Exponent has no digits");
139 isNegative = (*p == '-');
140 if (*p == '-' || *p == '+') {
142 assert(p != end && "Exponent has no digits");
145 absExponent = decDigitValue(*p++);
146 assert(absExponent < 10U && "Invalid character in exponent");
148 for (; p != end; ++p) {
151 value = decDigitValue(*p);
152 assert(value < 10U && "Invalid character in exponent");
154 value += absExponent * 10;
155 if (absExponent >= overlargeExponent) {
156 absExponent = overlargeExponent;
157 p = end; /* outwit assert below */
163 assert(p == end && "Invalid exponent in exponent");
166 return -(int) absExponent;
168 return (int) absExponent;
171 /* This is ugly and needs cleaning up, but I don't immediately see
172 how whilst remaining safe. */
174 totalExponent(StringRef::iterator p, StringRef::iterator end,
175 int exponentAdjustment)
177 int unsignedExponent;
178 bool negative, overflow;
181 assert(p != end && "Exponent has no digits");
183 negative = *p == '-';
184 if (*p == '-' || *p == '+') {
186 assert(p != end && "Exponent has no digits");
189 unsignedExponent = 0;
191 for (; p != end; ++p) {
194 value = decDigitValue(*p);
195 assert(value < 10U && "Invalid character in exponent");
197 unsignedExponent = unsignedExponent * 10 + value;
198 if (unsignedExponent > 32767) {
204 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
208 exponent = unsignedExponent;
210 exponent = -exponent;
211 exponent += exponentAdjustment;
212 if (exponent > 32767 || exponent < -32768)
217 exponent = negative ? -32768: 32767;
222 static StringRef::iterator
223 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
224 StringRef::iterator *dot)
226 StringRef::iterator p = begin;
228 while (*p == '0' && p != end)
234 assert(end - begin != 1 && "Significand has no digits");
236 while (*p == '0' && p != end)
243 /* Given a normal decimal floating point number of the form
247 where the decimal point and exponent are optional, fill out the
248 structure D. Exponent is appropriate if the significand is
249 treated as an integer, and normalizedExponent if the significand
250 is taken to have the decimal point after a single leading
253 If the value is zero, V->firstSigDigit points to a non-digit, and
254 the return exponent is zero.
257 const char *firstSigDigit;
258 const char *lastSigDigit;
260 int normalizedExponent;
264 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
267 StringRef::iterator dot = end;
268 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
270 D->firstSigDigit = p;
272 D->normalizedExponent = 0;
274 for (; p != end; ++p) {
276 assert(dot == end && "String contains multiple dots");
281 if (decDigitValue(*p) >= 10U)
286 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
287 assert(p != begin && "Significand has no digits");
288 assert((dot == end || p - begin != 1) && "Significand has no digits");
290 /* p points to the first non-digit in the string */
291 D->exponent = readExponent(p + 1, end);
293 /* Implied decimal point? */
298 /* If number is all zeroes accept any exponent. */
299 if (p != D->firstSigDigit) {
300 /* Drop insignificant trailing zeroes. */
305 while (p != begin && *p == '0');
306 while (p != begin && *p == '.');
309 /* Adjust the exponents for any decimal point. */
310 D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
311 D->normalizedExponent = (D->exponent +
312 static_cast<exponent_t>((p - D->firstSigDigit)
313 - (dot > D->firstSigDigit && dot < p)));
319 /* Return the trailing fraction of a hexadecimal number.
320 DIGITVALUE is the first hex digit of the fraction, P points to
323 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
324 unsigned int digitValue)
326 unsigned int hexDigit;
328 /* If the first trailing digit isn't 0 or 8 we can work out the
329 fraction immediately. */
331 return lfMoreThanHalf;
332 else if (digitValue < 8 && digitValue > 0)
333 return lfLessThanHalf;
335 /* Otherwise we need to find the first non-zero digit. */
339 assert(p != end && "Invalid trailing hexadecimal fraction!");
341 hexDigit = hexDigitValue(*p);
343 /* If we ran off the end it is exactly zero or one-half, otherwise
346 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
348 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
351 /* Return the fraction lost were a bignum truncated losing the least
352 significant BITS bits. */
354 lostFractionThroughTruncation(const integerPart *parts,
355 unsigned int partCount,
360 lsb = APInt::tcLSB(parts, partCount);
362 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
364 return lfExactlyZero;
366 return lfExactlyHalf;
367 if (bits <= partCount * integerPartWidth &&
368 APInt::tcExtractBit(parts, bits - 1))
369 return lfMoreThanHalf;
371 return lfLessThanHalf;
374 /* Shift DST right BITS bits noting lost fraction. */
376 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
378 lostFraction lost_fraction;
380 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
382 APInt::tcShiftRight(dst, parts, bits);
384 return lost_fraction;
387 /* Combine the effect of two lost fractions. */
389 combineLostFractions(lostFraction moreSignificant,
390 lostFraction lessSignificant)
392 if (lessSignificant != lfExactlyZero) {
393 if (moreSignificant == lfExactlyZero)
394 moreSignificant = lfLessThanHalf;
395 else if (moreSignificant == lfExactlyHalf)
396 moreSignificant = lfMoreThanHalf;
399 return moreSignificant;
402 /* The error from the true value, in half-ulps, on multiplying two
403 floating point numbers, which differ from the value they
404 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
405 than the returned value.
407 See "How to Read Floating Point Numbers Accurately" by William D
410 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
412 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
414 if (HUerr1 + HUerr2 == 0)
415 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
417 return inexactMultiply + 2 * (HUerr1 + HUerr2);
420 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
421 when the least significant BITS are truncated. BITS cannot be
424 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
426 unsigned int count, partBits;
427 integerPart part, boundary;
432 count = bits / integerPartWidth;
433 partBits = bits % integerPartWidth + 1;
435 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
438 boundary = (integerPart) 1 << (partBits - 1);
443 if (part - boundary <= boundary - part)
444 return part - boundary;
446 return boundary - part;
449 if (part == boundary) {
452 return ~(integerPart) 0; /* A lot. */
455 } else if (part == boundary - 1) {
458 return ~(integerPart) 0; /* A lot. */
463 return ~(integerPart) 0; /* A lot. */
466 /* Place pow(5, power) in DST, and return the number of parts used.
467 DST must be at least one part larger than size of the answer. */
469 powerOf5(integerPart *dst, unsigned int power)
471 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
473 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
474 pow5s[0] = 78125 * 5;
476 unsigned int partsCount[16] = { 1 };
477 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
479 assert(power <= maxExponent);
484 *p1 = firstEightPowers[power & 7];
490 for (unsigned int n = 0; power; power >>= 1, n++) {
495 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
497 pc = partsCount[n - 1];
498 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
500 if (pow5[pc - 1] == 0)
508 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
510 if (p2[result - 1] == 0)
513 /* Now result is in p1 with partsCount parts and p2 is scratch
515 tmp = p1, p1 = p2, p2 = tmp;
522 APInt::tcAssign(dst, p1, result);
527 /* Zero at the end to avoid modular arithmetic when adding one; used
528 when rounding up during hexadecimal output. */
529 static const char hexDigitsLower[] = "0123456789abcdef0";
530 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
531 static const char infinityL[] = "infinity";
532 static const char infinityU[] = "INFINITY";
533 static const char NaNL[] = "nan";
534 static const char NaNU[] = "NAN";
536 /* Write out an integerPart in hexadecimal, starting with the most
537 significant nibble. Write out exactly COUNT hexdigits, return
540 partAsHex (char *dst, integerPart part, unsigned int count,
541 const char *hexDigitChars)
543 unsigned int result = count;
545 assert(count != 0 && count <= integerPartWidth / 4);
547 part >>= (integerPartWidth - 4 * count);
549 dst[count] = hexDigitChars[part & 0xf];
556 /* Write out an unsigned decimal integer. */
558 writeUnsignedDecimal (char *dst, unsigned int n)
574 /* Write out a signed decimal integer. */
576 writeSignedDecimal (char *dst, int value)
580 dst = writeUnsignedDecimal(dst, -(unsigned) value);
582 dst = writeUnsignedDecimal(dst, value);
589 APFloat::initialize(const fltSemantics *ourSemantics)
593 semantics = ourSemantics;
596 significand.parts = new integerPart[count];
600 APFloat::freeSignificand()
603 delete [] significand.parts;
607 APFloat::assign(const APFloat &rhs)
609 assert(semantics == rhs.semantics);
612 category = rhs.category;
613 exponent = rhs.exponent;
614 if (category == fcNormal || category == fcNaN)
615 copySignificand(rhs);
619 APFloat::copySignificand(const APFloat &rhs)
621 assert(category == fcNormal || category == fcNaN);
622 assert(rhs.partCount() >= partCount());
624 APInt::tcAssign(significandParts(), rhs.significandParts(),
628 /* Make this number a NaN, with an arbitrary but deterministic value
629 for the significand. If double or longer, this is a signalling NaN,
630 which may not be ideal. If float, this is QNaN(0). */
631 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
636 integerPart *significand = significandParts();
637 unsigned numParts = partCount();
639 // Set the significand bits to the fill.
640 if (!fill || fill->getNumWords() < numParts)
641 APInt::tcSet(significand, 0, numParts);
643 APInt::tcAssign(significand, fill->getRawData(),
644 std::min(fill->getNumWords(), numParts));
646 // Zero out the excess bits of the significand.
647 unsigned bitsToPreserve = semantics->precision - 1;
648 unsigned part = bitsToPreserve / 64;
649 bitsToPreserve %= 64;
650 significand[part] &= ((1ULL << bitsToPreserve) - 1);
651 for (part++; part != numParts; ++part)
652 significand[part] = 0;
655 unsigned QNaNBit = semantics->precision - 2;
658 // We always have to clear the QNaN bit to make it an SNaN.
659 APInt::tcClearBit(significand, QNaNBit);
661 // If there are no bits set in the payload, we have to set
662 // *something* to make it a NaN instead of an infinity;
663 // conventionally, this is the next bit down from the QNaN bit.
664 if (APInt::tcIsZero(significand, numParts))
665 APInt::tcSetBit(significand, QNaNBit - 1);
667 // We always have to set the QNaN bit to make it a QNaN.
668 APInt::tcSetBit(significand, QNaNBit);
671 // For x87 extended precision, we want to make a NaN, not a
672 // pseudo-NaN. Maybe we should expose the ability to make
674 if (semantics == &APFloat::x87DoubleExtended)
675 APInt::tcSetBit(significand, QNaNBit + 1);
678 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
680 APFloat value(Sem, uninitialized);
681 value.makeNaN(SNaN, Negative, fill);
686 APFloat::operator=(const APFloat &rhs)
689 if (semantics != rhs.semantics) {
691 initialize(rhs.semantics);
700 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
703 if (semantics != rhs.semantics ||
704 category != rhs.category ||
707 if (category==fcZero || category==fcInfinity)
709 else if (category==fcNormal && exponent!=rhs.exponent)
713 const integerPart* p=significandParts();
714 const integerPart* q=rhs.significandParts();
715 for (; i>0; i--, p++, q++) {
723 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
724 initialize(&ourSemantics);
727 exponent = ourSemantics.precision - 1;
728 significandParts()[0] = value;
729 normalize(rmNearestTiesToEven, lfExactlyZero);
732 APFloat::APFloat(const fltSemantics &ourSemantics) {
733 initialize(&ourSemantics);
738 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
739 // Allocates storage if necessary but does not initialize it.
740 initialize(&ourSemantics);
743 APFloat::APFloat(const fltSemantics &ourSemantics,
744 fltCategory ourCategory, bool negative) {
745 initialize(&ourSemantics);
746 category = ourCategory;
748 if (category == fcNormal)
750 else if (ourCategory == fcNaN)
754 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) {
755 initialize(&ourSemantics);
756 convertFromString(text, rmNearestTiesToEven);
759 APFloat::APFloat(const APFloat &rhs) {
760 initialize(rhs.semantics);
769 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
770 void APFloat::Profile(FoldingSetNodeID& ID) const {
771 ID.Add(bitcastToAPInt());
775 APFloat::partCount() const
777 return partCountForBits(semantics->precision + 1);
781 APFloat::semanticsPrecision(const fltSemantics &semantics)
783 return semantics.precision;
787 APFloat::significandParts() const
789 return const_cast<APFloat *>(this)->significandParts();
793 APFloat::significandParts()
795 assert(category == fcNormal || category == fcNaN);
798 return significand.parts;
800 return &significand.part;
804 APFloat::zeroSignificand()
807 APInt::tcSet(significandParts(), 0, partCount());
810 /* Increment an fcNormal floating point number's significand. */
812 APFloat::incrementSignificand()
816 carry = APInt::tcIncrement(significandParts(), partCount());
818 /* Our callers should never cause us to overflow. */
823 /* Add the significand of the RHS. Returns the carry flag. */
825 APFloat::addSignificand(const APFloat &rhs)
829 parts = significandParts();
831 assert(semantics == rhs.semantics);
832 assert(exponent == rhs.exponent);
834 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
837 /* Subtract the significand of the RHS with a borrow flag. Returns
840 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
844 parts = significandParts();
846 assert(semantics == rhs.semantics);
847 assert(exponent == rhs.exponent);
849 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
853 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
854 on to the full-precision result of the multiplication. Returns the
857 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
859 unsigned int omsb; // One, not zero, based MSB.
860 unsigned int partsCount, newPartsCount, precision;
861 integerPart *lhsSignificand;
862 integerPart scratch[4];
863 integerPart *fullSignificand;
864 lostFraction lost_fraction;
867 assert(semantics == rhs.semantics);
869 precision = semantics->precision;
870 newPartsCount = partCountForBits(precision * 2);
872 if (newPartsCount > 4)
873 fullSignificand = new integerPart[newPartsCount];
875 fullSignificand = scratch;
877 lhsSignificand = significandParts();
878 partsCount = partCount();
880 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
881 rhs.significandParts(), partsCount, partsCount);
883 lost_fraction = lfExactlyZero;
884 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
885 exponent += rhs.exponent;
888 Significand savedSignificand = significand;
889 const fltSemantics *savedSemantics = semantics;
890 fltSemantics extendedSemantics;
892 unsigned int extendedPrecision;
894 /* Normalize our MSB. */
895 extendedPrecision = precision + precision - 1;
896 if (omsb != extendedPrecision) {
897 APInt::tcShiftLeft(fullSignificand, newPartsCount,
898 extendedPrecision - omsb);
899 exponent -= extendedPrecision - omsb;
902 /* Create new semantics. */
903 extendedSemantics = *semantics;
904 extendedSemantics.precision = extendedPrecision;
906 if (newPartsCount == 1)
907 significand.part = fullSignificand[0];
909 significand.parts = fullSignificand;
910 semantics = &extendedSemantics;
912 APFloat extendedAddend(*addend);
913 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
914 assert(status == opOK);
916 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
918 /* Restore our state. */
919 if (newPartsCount == 1)
920 fullSignificand[0] = significand.part;
921 significand = savedSignificand;
922 semantics = savedSemantics;
924 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
927 exponent -= (precision - 1);
929 if (omsb > precision) {
930 unsigned int bits, significantParts;
933 bits = omsb - precision;
934 significantParts = partCountForBits(omsb);
935 lf = shiftRight(fullSignificand, significantParts, bits);
936 lost_fraction = combineLostFractions(lf, lost_fraction);
940 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
942 if (newPartsCount > 4)
943 delete [] fullSignificand;
945 return lost_fraction;
948 /* Multiply the significands of LHS and RHS to DST. */
950 APFloat::divideSignificand(const APFloat &rhs)
952 unsigned int bit, i, partsCount;
953 const integerPart *rhsSignificand;
954 integerPart *lhsSignificand, *dividend, *divisor;
955 integerPart scratch[4];
956 lostFraction lost_fraction;
958 assert(semantics == rhs.semantics);
960 lhsSignificand = significandParts();
961 rhsSignificand = rhs.significandParts();
962 partsCount = partCount();
965 dividend = new integerPart[partsCount * 2];
969 divisor = dividend + partsCount;
971 /* Copy the dividend and divisor as they will be modified in-place. */
972 for (i = 0; i < partsCount; i++) {
973 dividend[i] = lhsSignificand[i];
974 divisor[i] = rhsSignificand[i];
975 lhsSignificand[i] = 0;
978 exponent -= rhs.exponent;
980 unsigned int precision = semantics->precision;
982 /* Normalize the divisor. */
983 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
986 APInt::tcShiftLeft(divisor, partsCount, bit);
989 /* Normalize the dividend. */
990 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
993 APInt::tcShiftLeft(dividend, partsCount, bit);
996 /* Ensure the dividend >= divisor initially for the loop below.
997 Incidentally, this means that the division loop below is
998 guaranteed to set the integer bit to one. */
999 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1001 APInt::tcShiftLeft(dividend, partsCount, 1);
1002 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1005 /* Long division. */
1006 for (bit = precision; bit; bit -= 1) {
1007 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1008 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1009 APInt::tcSetBit(lhsSignificand, bit - 1);
1012 APInt::tcShiftLeft(dividend, partsCount, 1);
1015 /* Figure out the lost fraction. */
1016 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1019 lost_fraction = lfMoreThanHalf;
1021 lost_fraction = lfExactlyHalf;
1022 else if (APInt::tcIsZero(dividend, partsCount))
1023 lost_fraction = lfExactlyZero;
1025 lost_fraction = lfLessThanHalf;
1030 return lost_fraction;
1034 APFloat::significandMSB() const
1036 return APInt::tcMSB(significandParts(), partCount());
1040 APFloat::significandLSB() const
1042 return APInt::tcLSB(significandParts(), partCount());
1045 /* Note that a zero result is NOT normalized to fcZero. */
1047 APFloat::shiftSignificandRight(unsigned int bits)
1049 /* Our exponent should not overflow. */
1050 assert((exponent_t) (exponent + bits) >= exponent);
1054 return shiftRight(significandParts(), partCount(), bits);
1057 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1059 APFloat::shiftSignificandLeft(unsigned int bits)
1061 assert(bits < semantics->precision);
1064 unsigned int partsCount = partCount();
1066 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1069 assert(!APInt::tcIsZero(significandParts(), partsCount));
1074 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1078 assert(semantics == rhs.semantics);
1079 assert(category == fcNormal);
1080 assert(rhs.category == fcNormal);
1082 compare = exponent - rhs.exponent;
1084 /* If exponents are equal, do an unsigned bignum comparison of the
1087 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1091 return cmpGreaterThan;
1092 else if (compare < 0)
1098 /* Handle overflow. Sign is preserved. We either become infinity or
1099 the largest finite number. */
1101 APFloat::handleOverflow(roundingMode rounding_mode)
1104 if (rounding_mode == rmNearestTiesToEven ||
1105 rounding_mode == rmNearestTiesToAway ||
1106 (rounding_mode == rmTowardPositive && !sign) ||
1107 (rounding_mode == rmTowardNegative && sign)) {
1108 category = fcInfinity;
1109 return (opStatus) (opOverflow | opInexact);
1112 /* Otherwise we become the largest finite number. */
1113 category = fcNormal;
1114 exponent = semantics->maxExponent;
1115 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1116 semantics->precision);
1121 /* Returns TRUE if, when truncating the current number, with BIT the
1122 new LSB, with the given lost fraction and rounding mode, the result
1123 would need to be rounded away from zero (i.e., by increasing the
1124 signficand). This routine must work for fcZero of both signs, and
1125 fcNormal numbers. */
1127 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1128 lostFraction lost_fraction,
1129 unsigned int bit) const
1131 /* NaNs and infinities should not have lost fractions. */
1132 assert(category == fcNormal || category == fcZero);
1134 /* Current callers never pass this so we don't handle it. */
1135 assert(lost_fraction != lfExactlyZero);
1137 switch (rounding_mode) {
1138 case rmNearestTiesToAway:
1139 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1141 case rmNearestTiesToEven:
1142 if (lost_fraction == lfMoreThanHalf)
1145 /* Our zeroes don't have a significand to test. */
1146 if (lost_fraction == lfExactlyHalf && category != fcZero)
1147 return APInt::tcExtractBit(significandParts(), bit);
1154 case rmTowardPositive:
1155 return sign == false;
1157 case rmTowardNegative:
1158 return sign == true;
1160 llvm_unreachable("Invalid rounding mode found");
1164 APFloat::normalize(roundingMode rounding_mode,
1165 lostFraction lost_fraction)
1167 unsigned int omsb; /* One, not zero, based MSB. */
1170 if (category != fcNormal)
1173 /* Before rounding normalize the exponent of fcNormal numbers. */
1174 omsb = significandMSB() + 1;
1177 /* OMSB is numbered from 1. We want to place it in the integer
1178 bit numbered PRECISION if possible, with a compensating change in
1180 exponentChange = omsb - semantics->precision;
1182 /* If the resulting exponent is too high, overflow according to
1183 the rounding mode. */
1184 if (exponent + exponentChange > semantics->maxExponent)
1185 return handleOverflow(rounding_mode);
1187 /* Subnormal numbers have exponent minExponent, and their MSB
1188 is forced based on that. */
1189 if (exponent + exponentChange < semantics->minExponent)
1190 exponentChange = semantics->minExponent - exponent;
1192 /* Shifting left is easy as we don't lose precision. */
1193 if (exponentChange < 0) {
1194 assert(lost_fraction == lfExactlyZero);
1196 shiftSignificandLeft(-exponentChange);
1201 if (exponentChange > 0) {
1204 /* Shift right and capture any new lost fraction. */
1205 lf = shiftSignificandRight(exponentChange);
1207 lost_fraction = combineLostFractions(lf, lost_fraction);
1209 /* Keep OMSB up-to-date. */
1210 if (omsb > (unsigned) exponentChange)
1211 omsb -= exponentChange;
1217 /* Now round the number according to rounding_mode given the lost
1220 /* As specified in IEEE 754, since we do not trap we do not report
1221 underflow for exact results. */
1222 if (lost_fraction == lfExactlyZero) {
1223 /* Canonicalize zeroes. */
1230 /* Increment the significand if we're rounding away from zero. */
1231 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1233 exponent = semantics->minExponent;
1235 incrementSignificand();
1236 omsb = significandMSB() + 1;
1238 /* Did the significand increment overflow? */
1239 if (omsb == (unsigned) semantics->precision + 1) {
1240 /* Renormalize by incrementing the exponent and shifting our
1241 significand right one. However if we already have the
1242 maximum exponent we overflow to infinity. */
1243 if (exponent == semantics->maxExponent) {
1244 category = fcInfinity;
1246 return (opStatus) (opOverflow | opInexact);
1249 shiftSignificandRight(1);
1255 /* The normal case - we were and are not denormal, and any
1256 significand increment above didn't overflow. */
1257 if (omsb == semantics->precision)
1260 /* We have a non-zero denormal. */
1261 assert(omsb < semantics->precision);
1263 /* Canonicalize zeroes. */
1267 /* The fcZero case is a denormal that underflowed to zero. */
1268 return (opStatus) (opUnderflow | opInexact);
1272 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1274 switch (convolve(category, rhs.category)) {
1276 llvm_unreachable(0);
1278 case convolve(fcNaN, fcZero):
1279 case convolve(fcNaN, fcNormal):
1280 case convolve(fcNaN, fcInfinity):
1281 case convolve(fcNaN, fcNaN):
1282 case convolve(fcNormal, fcZero):
1283 case convolve(fcInfinity, fcNormal):
1284 case convolve(fcInfinity, fcZero):
1287 case convolve(fcZero, fcNaN):
1288 case convolve(fcNormal, fcNaN):
1289 case convolve(fcInfinity, fcNaN):
1291 copySignificand(rhs);
1294 case convolve(fcNormal, fcInfinity):
1295 case convolve(fcZero, fcInfinity):
1296 category = fcInfinity;
1297 sign = rhs.sign ^ subtract;
1300 case convolve(fcZero, fcNormal):
1302 sign = rhs.sign ^ subtract;
1305 case convolve(fcZero, fcZero):
1306 /* Sign depends on rounding mode; handled by caller. */
1309 case convolve(fcInfinity, fcInfinity):
1310 /* Differently signed infinities can only be validly
1312 if (((sign ^ rhs.sign)!=0) != subtract) {
1319 case convolve(fcNormal, fcNormal):
1324 /* Add or subtract two normal numbers. */
1326 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1329 lostFraction lost_fraction;
1332 /* Determine if the operation on the absolute values is effectively
1333 an addition or subtraction. */
1334 subtract ^= (sign ^ rhs.sign) ? true : false;
1336 /* Are we bigger exponent-wise than the RHS? */
1337 bits = exponent - rhs.exponent;
1339 /* Subtraction is more subtle than one might naively expect. */
1341 APFloat temp_rhs(rhs);
1345 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1346 lost_fraction = lfExactlyZero;
1347 } else if (bits > 0) {
1348 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1349 shiftSignificandLeft(1);
1352 lost_fraction = shiftSignificandRight(-bits - 1);
1353 temp_rhs.shiftSignificandLeft(1);
1358 carry = temp_rhs.subtractSignificand
1359 (*this, lost_fraction != lfExactlyZero);
1360 copySignificand(temp_rhs);
1363 carry = subtractSignificand
1364 (temp_rhs, lost_fraction != lfExactlyZero);
1367 /* Invert the lost fraction - it was on the RHS and
1369 if (lost_fraction == lfLessThanHalf)
1370 lost_fraction = lfMoreThanHalf;
1371 else if (lost_fraction == lfMoreThanHalf)
1372 lost_fraction = lfLessThanHalf;
1374 /* The code above is intended to ensure that no borrow is
1380 APFloat temp_rhs(rhs);
1382 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1383 carry = addSignificand(temp_rhs);
1385 lost_fraction = shiftSignificandRight(-bits);
1386 carry = addSignificand(rhs);
1389 /* We have a guard bit; generating a carry cannot happen. */
1394 return lost_fraction;
1398 APFloat::multiplySpecials(const APFloat &rhs)
1400 switch (convolve(category, rhs.category)) {
1402 llvm_unreachable(0);
1404 case convolve(fcNaN, fcZero):
1405 case convolve(fcNaN, fcNormal):
1406 case convolve(fcNaN, fcInfinity):
1407 case convolve(fcNaN, fcNaN):
1410 case convolve(fcZero, fcNaN):
1411 case convolve(fcNormal, fcNaN):
1412 case convolve(fcInfinity, fcNaN):
1414 copySignificand(rhs);
1417 case convolve(fcNormal, fcInfinity):
1418 case convolve(fcInfinity, fcNormal):
1419 case convolve(fcInfinity, fcInfinity):
1420 category = fcInfinity;
1423 case convolve(fcZero, fcNormal):
1424 case convolve(fcNormal, fcZero):
1425 case convolve(fcZero, fcZero):
1429 case convolve(fcZero, fcInfinity):
1430 case convolve(fcInfinity, fcZero):
1434 case convolve(fcNormal, fcNormal):
1440 APFloat::divideSpecials(const APFloat &rhs)
1442 switch (convolve(category, rhs.category)) {
1444 llvm_unreachable(0);
1446 case convolve(fcNaN, fcZero):
1447 case convolve(fcNaN, fcNormal):
1448 case convolve(fcNaN, fcInfinity):
1449 case convolve(fcNaN, fcNaN):
1450 case convolve(fcInfinity, fcZero):
1451 case convolve(fcInfinity, fcNormal):
1452 case convolve(fcZero, fcInfinity):
1453 case convolve(fcZero, fcNormal):
1456 case convolve(fcZero, fcNaN):
1457 case convolve(fcNormal, fcNaN):
1458 case convolve(fcInfinity, fcNaN):
1460 copySignificand(rhs);
1463 case convolve(fcNormal, fcInfinity):
1467 case convolve(fcNormal, fcZero):
1468 category = fcInfinity;
1471 case convolve(fcInfinity, fcInfinity):
1472 case convolve(fcZero, fcZero):
1476 case convolve(fcNormal, fcNormal):
1482 APFloat::modSpecials(const APFloat &rhs)
1484 switch (convolve(category, rhs.category)) {
1486 llvm_unreachable(0);
1488 case convolve(fcNaN, fcZero):
1489 case convolve(fcNaN, fcNormal):
1490 case convolve(fcNaN, fcInfinity):
1491 case convolve(fcNaN, fcNaN):
1492 case convolve(fcZero, fcInfinity):
1493 case convolve(fcZero, fcNormal):
1494 case convolve(fcNormal, fcInfinity):
1497 case convolve(fcZero, fcNaN):
1498 case convolve(fcNormal, fcNaN):
1499 case convolve(fcInfinity, fcNaN):
1501 copySignificand(rhs);
1504 case convolve(fcNormal, fcZero):
1505 case convolve(fcInfinity, fcZero):
1506 case convolve(fcInfinity, fcNormal):
1507 case convolve(fcInfinity, fcInfinity):
1508 case convolve(fcZero, fcZero):
1512 case convolve(fcNormal, fcNormal):
1519 APFloat::changeSign()
1521 /* Look mummy, this one's easy. */
1526 APFloat::clearSign()
1528 /* So is this one. */
1533 APFloat::copySign(const APFloat &rhs)
1539 /* Normalized addition or subtraction. */
1541 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1546 fs = addOrSubtractSpecials(rhs, subtract);
1548 /* This return code means it was not a simple case. */
1549 if (fs == opDivByZero) {
1550 lostFraction lost_fraction;
1552 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1553 fs = normalize(rounding_mode, lost_fraction);
1555 /* Can only be zero if we lost no fraction. */
1556 assert(category != fcZero || lost_fraction == lfExactlyZero);
1559 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1560 positive zero unless rounding to minus infinity, except that
1561 adding two like-signed zeroes gives that zero. */
1562 if (category == fcZero) {
1563 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1564 sign = (rounding_mode == rmTowardNegative);
1570 /* Normalized addition. */
1572 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1574 return addOrSubtract(rhs, rounding_mode, false);
1577 /* Normalized subtraction. */
1579 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1581 return addOrSubtract(rhs, rounding_mode, true);
1584 /* Normalized multiply. */
1586 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1591 fs = multiplySpecials(rhs);
1593 if (category == fcNormal) {
1594 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1595 fs = normalize(rounding_mode, lost_fraction);
1596 if (lost_fraction != lfExactlyZero)
1597 fs = (opStatus) (fs | opInexact);
1603 /* Normalized divide. */
1605 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1610 fs = divideSpecials(rhs);
1612 if (category == fcNormal) {
1613 lostFraction lost_fraction = divideSignificand(rhs);
1614 fs = normalize(rounding_mode, lost_fraction);
1615 if (lost_fraction != lfExactlyZero)
1616 fs = (opStatus) (fs | opInexact);
1622 /* Normalized remainder. This is not currently correct in all cases. */
1624 APFloat::remainder(const APFloat &rhs)
1628 unsigned int origSign = sign;
1630 fs = V.divide(rhs, rmNearestTiesToEven);
1631 if (fs == opDivByZero)
1634 int parts = partCount();
1635 integerPart *x = new integerPart[parts];
1637 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1638 rmNearestTiesToEven, &ignored);
1639 if (fs==opInvalidOp)
1642 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1643 rmNearestTiesToEven);
1644 assert(fs==opOK); // should always work
1646 fs = V.multiply(rhs, rmNearestTiesToEven);
1647 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1649 fs = subtract(V, rmNearestTiesToEven);
1650 assert(fs==opOK || fs==opInexact); // likewise
1653 sign = origSign; // IEEE754 requires this
1658 /* Normalized llvm frem (C fmod).
1659 This is not currently correct in all cases. */
1661 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1664 fs = modSpecials(rhs);
1666 if (category == fcNormal && rhs.category == fcNormal) {
1668 unsigned int origSign = sign;
1670 fs = V.divide(rhs, rmNearestTiesToEven);
1671 if (fs == opDivByZero)
1674 int parts = partCount();
1675 integerPart *x = new integerPart[parts];
1677 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1678 rmTowardZero, &ignored);
1679 if (fs==opInvalidOp)
1682 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1683 rmNearestTiesToEven);
1684 assert(fs==opOK); // should always work
1686 fs = V.multiply(rhs, rounding_mode);
1687 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1689 fs = subtract(V, rounding_mode);
1690 assert(fs==opOK || fs==opInexact); // likewise
1693 sign = origSign; // IEEE754 requires this
1699 /* Normalized fused-multiply-add. */
1701 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1702 const APFloat &addend,
1703 roundingMode rounding_mode)
1707 /* Post-multiplication sign, before addition. */
1708 sign ^= multiplicand.sign;
1710 /* If and only if all arguments are normal do we need to do an
1711 extended-precision calculation. */
1712 if (category == fcNormal &&
1713 multiplicand.category == fcNormal &&
1714 addend.category == fcNormal) {
1715 lostFraction lost_fraction;
1717 lost_fraction = multiplySignificand(multiplicand, &addend);
1718 fs = normalize(rounding_mode, lost_fraction);
1719 if (lost_fraction != lfExactlyZero)
1720 fs = (opStatus) (fs | opInexact);
1722 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1723 positive zero unless rounding to minus infinity, except that
1724 adding two like-signed zeroes gives that zero. */
1725 if (category == fcZero && sign != addend.sign)
1726 sign = (rounding_mode == rmTowardNegative);
1728 fs = multiplySpecials(multiplicand);
1730 /* FS can only be opOK or opInvalidOp. There is no more work
1731 to do in the latter case. The IEEE-754R standard says it is
1732 implementation-defined in this case whether, if ADDEND is a
1733 quiet NaN, we raise invalid op; this implementation does so.
1735 If we need to do the addition we can do so with normal
1738 fs = addOrSubtract(addend, rounding_mode, false);
1744 /* Rounding-mode corrrect round to integral value. */
1745 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1748 // If the exponent is large enough, we know that this value is already
1749 // integral, and the arithmetic below would potentially cause it to saturate
1750 // to +/-Inf. Bail out early instead.
1751 if (category == fcNormal && exponent+1 >= (int)semanticsPrecision(*semantics))
1754 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1755 // precision of our format, and then subtract it back off again. The choice
1756 // of rounding modes for the addition/subtraction determines the rounding mode
1757 // for our integral rounding as well.
1758 // NOTE: When the input value is negative, we do subtraction followed by
1759 // addition instead.
1760 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1761 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1762 APFloat MagicConstant(*semantics);
1763 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1764 rmNearestTiesToEven);
1765 MagicConstant.copySign(*this);
1770 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1771 bool inputSign = isNegative();
1773 fs = add(MagicConstant, rounding_mode);
1774 if (fs != opOK && fs != opInexact)
1777 fs = subtract(MagicConstant, rounding_mode);
1779 // Restore the input sign.
1780 if (inputSign != isNegative())
1787 /* Comparison requires normalized numbers. */
1789 APFloat::compare(const APFloat &rhs) const
1793 assert(semantics == rhs.semantics);
1795 switch (convolve(category, rhs.category)) {
1797 llvm_unreachable(0);
1799 case convolve(fcNaN, fcZero):
1800 case convolve(fcNaN, fcNormal):
1801 case convolve(fcNaN, fcInfinity):
1802 case convolve(fcNaN, fcNaN):
1803 case convolve(fcZero, fcNaN):
1804 case convolve(fcNormal, fcNaN):
1805 case convolve(fcInfinity, fcNaN):
1806 return cmpUnordered;
1808 case convolve(fcInfinity, fcNormal):
1809 case convolve(fcInfinity, fcZero):
1810 case convolve(fcNormal, fcZero):
1814 return cmpGreaterThan;
1816 case convolve(fcNormal, fcInfinity):
1817 case convolve(fcZero, fcInfinity):
1818 case convolve(fcZero, fcNormal):
1820 return cmpGreaterThan;
1824 case convolve(fcInfinity, fcInfinity):
1825 if (sign == rhs.sign)
1830 return cmpGreaterThan;
1832 case convolve(fcZero, fcZero):
1835 case convolve(fcNormal, fcNormal):
1839 /* Two normal numbers. Do they have the same sign? */
1840 if (sign != rhs.sign) {
1842 result = cmpLessThan;
1844 result = cmpGreaterThan;
1846 /* Compare absolute values; invert result if negative. */
1847 result = compareAbsoluteValue(rhs);
1850 if (result == cmpLessThan)
1851 result = cmpGreaterThan;
1852 else if (result == cmpGreaterThan)
1853 result = cmpLessThan;
1860 /// APFloat::convert - convert a value of one floating point type to another.
1861 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1862 /// records whether the transformation lost information, i.e. whether
1863 /// converting the result back to the original type will produce the
1864 /// original value (this is almost the same as return value==fsOK, but there
1865 /// are edge cases where this is not so).
1868 APFloat::convert(const fltSemantics &toSemantics,
1869 roundingMode rounding_mode, bool *losesInfo)
1871 lostFraction lostFraction;
1872 unsigned int newPartCount, oldPartCount;
1875 const fltSemantics &fromSemantics = *semantics;
1877 lostFraction = lfExactlyZero;
1878 newPartCount = partCountForBits(toSemantics.precision + 1);
1879 oldPartCount = partCount();
1880 shift = toSemantics.precision - fromSemantics.precision;
1882 bool X86SpecialNan = false;
1883 if (&fromSemantics == &APFloat::x87DoubleExtended &&
1884 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
1885 (!(*significandParts() & 0x8000000000000000ULL) ||
1886 !(*significandParts() & 0x4000000000000000ULL))) {
1887 // x86 has some unusual NaNs which cannot be represented in any other
1888 // format; note them here.
1889 X86SpecialNan = true;
1892 // If this is a truncation, perform the shift before we narrow the storage.
1893 if (shift < 0 && (category==fcNormal || category==fcNaN))
1894 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1896 // Fix the storage so it can hold to new value.
1897 if (newPartCount > oldPartCount) {
1898 // The new type requires more storage; make it available.
1899 integerPart *newParts;
1900 newParts = new integerPart[newPartCount];
1901 APInt::tcSet(newParts, 0, newPartCount);
1902 if (category==fcNormal || category==fcNaN)
1903 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1905 significand.parts = newParts;
1906 } else if (newPartCount == 1 && oldPartCount != 1) {
1907 // Switch to built-in storage for a single part.
1908 integerPart newPart = 0;
1909 if (category==fcNormal || category==fcNaN)
1910 newPart = significandParts()[0];
1912 significand.part = newPart;
1915 // Now that we have the right storage, switch the semantics.
1916 semantics = &toSemantics;
1918 // If this is an extension, perform the shift now that the storage is
1920 if (shift > 0 && (category==fcNormal || category==fcNaN))
1921 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1923 if (category == fcNormal) {
1924 fs = normalize(rounding_mode, lostFraction);
1925 *losesInfo = (fs != opOK);
1926 } else if (category == fcNaN) {
1927 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
1928 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
1929 // does not give you back the same bits. This is dubious, and we
1930 // don't currently do it. You're really supposed to get
1931 // an invalid operation signal at runtime, but nobody does that.
1941 /* Convert a floating point number to an integer according to the
1942 rounding mode. If the rounded integer value is out of range this
1943 returns an invalid operation exception and the contents of the
1944 destination parts are unspecified. If the rounded value is in
1945 range but the floating point number is not the exact integer, the C
1946 standard doesn't require an inexact exception to be raised. IEEE
1947 854 does require it so we do that.
1949 Note that for conversions to integer type the C standard requires
1950 round-to-zero to always be used. */
1952 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
1954 roundingMode rounding_mode,
1955 bool *isExact) const
1957 lostFraction lost_fraction;
1958 const integerPart *src;
1959 unsigned int dstPartsCount, truncatedBits;
1963 /* Handle the three special cases first. */
1964 if (category == fcInfinity || category == fcNaN)
1967 dstPartsCount = partCountForBits(width);
1969 if (category == fcZero) {
1970 APInt::tcSet(parts, 0, dstPartsCount);
1971 // Negative zero can't be represented as an int.
1976 src = significandParts();
1978 /* Step 1: place our absolute value, with any fraction truncated, in
1981 /* Our absolute value is less than one; truncate everything. */
1982 APInt::tcSet(parts, 0, dstPartsCount);
1983 /* For exponent -1 the integer bit represents .5, look at that.
1984 For smaller exponents leftmost truncated bit is 0. */
1985 truncatedBits = semantics->precision -1U - exponent;
1987 /* We want the most significant (exponent + 1) bits; the rest are
1989 unsigned int bits = exponent + 1U;
1991 /* Hopelessly large in magnitude? */
1995 if (bits < semantics->precision) {
1996 /* We truncate (semantics->precision - bits) bits. */
1997 truncatedBits = semantics->precision - bits;
1998 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
2000 /* We want at least as many bits as are available. */
2001 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
2002 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2007 /* Step 2: work out any lost fraction, and increment the absolute
2008 value if we would round away from zero. */
2009 if (truncatedBits) {
2010 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2012 if (lost_fraction != lfExactlyZero &&
2013 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2014 if (APInt::tcIncrement(parts, dstPartsCount))
2015 return opInvalidOp; /* Overflow. */
2018 lost_fraction = lfExactlyZero;
2021 /* Step 3: check if we fit in the destination. */
2022 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2026 /* Negative numbers cannot be represented as unsigned. */
2030 /* It takes omsb bits to represent the unsigned integer value.
2031 We lose a bit for the sign, but care is needed as the
2032 maximally negative integer is a special case. */
2033 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2036 /* This case can happen because of rounding. */
2041 APInt::tcNegate (parts, dstPartsCount);
2043 if (omsb >= width + !isSigned)
2047 if (lost_fraction == lfExactlyZero) {
2054 /* Same as convertToSignExtendedInteger, except we provide
2055 deterministic values in case of an invalid operation exception,
2056 namely zero for NaNs and the minimal or maximal value respectively
2057 for underflow or overflow.
2058 The *isExact output tells whether the result is exact, in the sense
2059 that converting it back to the original floating point type produces
2060 the original value. This is almost equivalent to result==opOK,
2061 except for negative zeroes.
2064 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2066 roundingMode rounding_mode, bool *isExact) const
2070 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2073 if (fs == opInvalidOp) {
2074 unsigned int bits, dstPartsCount;
2076 dstPartsCount = partCountForBits(width);
2078 if (category == fcNaN)
2083 bits = width - isSigned;
2085 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2086 if (sign && isSigned)
2087 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2093 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2094 an APSInt, whose initial bit-width and signed-ness are used to determine the
2095 precision of the conversion.
2098 APFloat::convertToInteger(APSInt &result,
2099 roundingMode rounding_mode, bool *isExact) const
2101 unsigned bitWidth = result.getBitWidth();
2102 SmallVector<uint64_t, 4> parts(result.getNumWords());
2103 opStatus status = convertToInteger(
2104 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2105 // Keeps the original signed-ness.
2106 result = APInt(bitWidth, parts);
2110 /* Convert an unsigned integer SRC to a floating point number,
2111 rounding according to ROUNDING_MODE. The sign of the floating
2112 point number is not modified. */
2114 APFloat::convertFromUnsignedParts(const integerPart *src,
2115 unsigned int srcCount,
2116 roundingMode rounding_mode)
2118 unsigned int omsb, precision, dstCount;
2120 lostFraction lost_fraction;
2122 category = fcNormal;
2123 omsb = APInt::tcMSB(src, srcCount) + 1;
2124 dst = significandParts();
2125 dstCount = partCount();
2126 precision = semantics->precision;
2128 /* We want the most significant PRECISION bits of SRC. There may not
2129 be that many; extract what we can. */
2130 if (precision <= omsb) {
2131 exponent = omsb - 1;
2132 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2134 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2136 exponent = precision - 1;
2137 lost_fraction = lfExactlyZero;
2138 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2141 return normalize(rounding_mode, lost_fraction);
2145 APFloat::convertFromAPInt(const APInt &Val,
2147 roundingMode rounding_mode)
2149 unsigned int partCount = Val.getNumWords();
2153 if (isSigned && api.isNegative()) {
2158 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2161 /* Convert a two's complement integer SRC to a floating point number,
2162 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2163 integer is signed, in which case it must be sign-extended. */
2165 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2166 unsigned int srcCount,
2168 roundingMode rounding_mode)
2173 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2176 /* If we're signed and negative negate a copy. */
2178 copy = new integerPart[srcCount];
2179 APInt::tcAssign(copy, src, srcCount);
2180 APInt::tcNegate(copy, srcCount);
2181 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2185 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2191 /* FIXME: should this just take a const APInt reference? */
2193 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2194 unsigned int width, bool isSigned,
2195 roundingMode rounding_mode)
2197 unsigned int partCount = partCountForBits(width);
2198 APInt api = APInt(width, makeArrayRef(parts, partCount));
2201 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2206 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2210 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2212 lostFraction lost_fraction = lfExactlyZero;
2213 integerPart *significand;
2214 unsigned int bitPos, partsCount;
2215 StringRef::iterator dot, firstSignificantDigit;
2219 category = fcNormal;
2221 significand = significandParts();
2222 partsCount = partCount();
2223 bitPos = partsCount * integerPartWidth;
2225 /* Skip leading zeroes and any (hexa)decimal point. */
2226 StringRef::iterator begin = s.begin();
2227 StringRef::iterator end = s.end();
2228 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2229 firstSignificantDigit = p;
2232 integerPart hex_value;
2235 assert(dot == end && "String contains multiple dots");
2242 hex_value = hexDigitValue(*p);
2243 if (hex_value == -1U) {
2252 /* Store the number whilst 4-bit nibbles remain. */
2255 hex_value <<= bitPos % integerPartWidth;
2256 significand[bitPos / integerPartWidth] |= hex_value;
2258 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2259 while (p != end && hexDigitValue(*p) != -1U)
2266 /* Hex floats require an exponent but not a hexadecimal point. */
2267 assert(p != end && "Hex strings require an exponent");
2268 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2269 assert(p != begin && "Significand has no digits");
2270 assert((dot == end || p - begin != 1) && "Significand has no digits");
2272 /* Ignore the exponent if we are zero. */
2273 if (p != firstSignificantDigit) {
2276 /* Implicit hexadecimal point? */
2280 /* Calculate the exponent adjustment implicit in the number of
2281 significant digits. */
2282 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2283 if (expAdjustment < 0)
2285 expAdjustment = expAdjustment * 4 - 1;
2287 /* Adjust for writing the significand starting at the most
2288 significant nibble. */
2289 expAdjustment += semantics->precision;
2290 expAdjustment -= partsCount * integerPartWidth;
2292 /* Adjust for the given exponent. */
2293 exponent = totalExponent(p + 1, end, expAdjustment);
2296 return normalize(rounding_mode, lost_fraction);
2300 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2301 unsigned sigPartCount, int exp,
2302 roundingMode rounding_mode)
2304 unsigned int parts, pow5PartCount;
2305 fltSemantics calcSemantics = { 32767, -32767, 0 };
2306 integerPart pow5Parts[maxPowerOfFiveParts];
2309 isNearest = (rounding_mode == rmNearestTiesToEven ||
2310 rounding_mode == rmNearestTiesToAway);
2312 parts = partCountForBits(semantics->precision + 11);
2314 /* Calculate pow(5, abs(exp)). */
2315 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2317 for (;; parts *= 2) {
2318 opStatus sigStatus, powStatus;
2319 unsigned int excessPrecision, truncatedBits;
2321 calcSemantics.precision = parts * integerPartWidth - 1;
2322 excessPrecision = calcSemantics.precision - semantics->precision;
2323 truncatedBits = excessPrecision;
2325 APFloat decSig(calcSemantics, fcZero, sign);
2326 APFloat pow5(calcSemantics, fcZero, false);
2328 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2329 rmNearestTiesToEven);
2330 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2331 rmNearestTiesToEven);
2332 /* Add exp, as 10^n = 5^n * 2^n. */
2333 decSig.exponent += exp;
2335 lostFraction calcLostFraction;
2336 integerPart HUerr, HUdistance;
2337 unsigned int powHUerr;
2340 /* multiplySignificand leaves the precision-th bit set to 1. */
2341 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2342 powHUerr = powStatus != opOK;
2344 calcLostFraction = decSig.divideSignificand(pow5);
2345 /* Denormal numbers have less precision. */
2346 if (decSig.exponent < semantics->minExponent) {
2347 excessPrecision += (semantics->minExponent - decSig.exponent);
2348 truncatedBits = excessPrecision;
2349 if (excessPrecision > calcSemantics.precision)
2350 excessPrecision = calcSemantics.precision;
2352 /* Extra half-ulp lost in reciprocal of exponent. */
2353 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2356 /* Both multiplySignificand and divideSignificand return the
2357 result with the integer bit set. */
2358 assert(APInt::tcExtractBit
2359 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2361 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2363 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2364 excessPrecision, isNearest);
2366 /* Are we guaranteed to round correctly if we truncate? */
2367 if (HUdistance >= HUerr) {
2368 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2369 calcSemantics.precision - excessPrecision,
2371 /* Take the exponent of decSig. If we tcExtract-ed less bits
2372 above we must adjust our exponent to compensate for the
2373 implicit right shift. */
2374 exponent = (decSig.exponent + semantics->precision
2375 - (calcSemantics.precision - excessPrecision));
2376 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2379 return normalize(rounding_mode, calcLostFraction);
2385 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2390 /* Scan the text. */
2391 StringRef::iterator p = str.begin();
2392 interpretDecimal(p, str.end(), &D);
2394 /* Handle the quick cases. First the case of no significant digits,
2395 i.e. zero, and then exponents that are obviously too large or too
2396 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2397 definitely overflows if
2399 (exp - 1) * L >= maxExponent
2401 and definitely underflows to zero where
2403 (exp + 1) * L <= minExponent - precision
2405 With integer arithmetic the tightest bounds for L are
2407 93/28 < L < 196/59 [ numerator <= 256 ]
2408 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2411 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2415 /* Check whether the normalized exponent is high enough to overflow
2416 max during the log-rebasing in the max-exponent check below. */
2417 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2418 fs = handleOverflow(rounding_mode);
2420 /* If it wasn't, then it also wasn't high enough to overflow max
2421 during the log-rebasing in the min-exponent check. Check that it
2422 won't overflow min in either check, then perform the min-exponent
2424 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2425 (D.normalizedExponent + 1) * 28738 <=
2426 8651 * (semantics->minExponent - (int) semantics->precision)) {
2427 /* Underflow to zero and round. */
2429 fs = normalize(rounding_mode, lfLessThanHalf);
2431 /* We can finally safely perform the max-exponent check. */
2432 } else if ((D.normalizedExponent - 1) * 42039
2433 >= 12655 * semantics->maxExponent) {
2434 /* Overflow and round. */
2435 fs = handleOverflow(rounding_mode);
2437 integerPart *decSignificand;
2438 unsigned int partCount;
2440 /* A tight upper bound on number of bits required to hold an
2441 N-digit decimal integer is N * 196 / 59. Allocate enough space
2442 to hold the full significand, and an extra part required by
2444 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2445 partCount = partCountForBits(1 + 196 * partCount / 59);
2446 decSignificand = new integerPart[partCount + 1];
2449 /* Convert to binary efficiently - we do almost all multiplication
2450 in an integerPart. When this would overflow do we do a single
2451 bignum multiplication, and then revert again to multiplication
2452 in an integerPart. */
2454 integerPart decValue, val, multiplier;
2462 if (p == str.end()) {
2466 decValue = decDigitValue(*p++);
2467 assert(decValue < 10U && "Invalid character in significand");
2469 val = val * 10 + decValue;
2470 /* The maximum number that can be multiplied by ten with any
2471 digit added without overflowing an integerPart. */
2472 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2474 /* Multiply out the current part. */
2475 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2476 partCount, partCount + 1, false);
2478 /* If we used another part (likely but not guaranteed), increase
2480 if (decSignificand[partCount])
2482 } while (p <= D.lastSigDigit);
2484 category = fcNormal;
2485 fs = roundSignificandWithExponent(decSignificand, partCount,
2486 D.exponent, rounding_mode);
2488 delete [] decSignificand;
2495 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2497 assert(!str.empty() && "Invalid string length");
2499 /* Handle a leading minus sign. */
2500 StringRef::iterator p = str.begin();
2501 size_t slen = str.size();
2502 sign = *p == '-' ? 1 : 0;
2503 if (*p == '-' || *p == '+') {
2506 assert(slen && "String has no digits");
2509 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2510 assert(slen - 2 && "Invalid string");
2511 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2515 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2518 /* Write out a hexadecimal representation of the floating point value
2519 to DST, which must be of sufficient size, in the C99 form
2520 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2521 excluding the terminating NUL.
2523 If UPPERCASE, the output is in upper case, otherwise in lower case.
2525 HEXDIGITS digits appear altogether, rounding the value if
2526 necessary. If HEXDIGITS is 0, the minimal precision to display the
2527 number precisely is used instead. If nothing would appear after
2528 the decimal point it is suppressed.
2530 The decimal exponent is always printed and has at least one digit.
2531 Zero values display an exponent of zero. Infinities and NaNs
2532 appear as "infinity" or "nan" respectively.
2534 The above rules are as specified by C99. There is ambiguity about
2535 what the leading hexadecimal digit should be. This implementation
2536 uses whatever is necessary so that the exponent is displayed as
2537 stored. This implies the exponent will fall within the IEEE format
2538 range, and the leading hexadecimal digit will be 0 (for denormals),
2539 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2540 any other digits zero).
2543 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2544 bool upperCase, roundingMode rounding_mode) const
2554 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2555 dst += sizeof infinityL - 1;
2559 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2560 dst += sizeof NaNU - 1;
2565 *dst++ = upperCase ? 'X': 'x';
2567 if (hexDigits > 1) {
2569 memset (dst, '0', hexDigits - 1);
2570 dst += hexDigits - 1;
2572 *dst++ = upperCase ? 'P': 'p';
2577 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2583 return static_cast<unsigned int>(dst - p);
2586 /* Does the hard work of outputting the correctly rounded hexadecimal
2587 form of a normal floating point number with the specified number of
2588 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2589 digits necessary to print the value precisely is output. */
2591 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2593 roundingMode rounding_mode) const
2595 unsigned int count, valueBits, shift, partsCount, outputDigits;
2596 const char *hexDigitChars;
2597 const integerPart *significand;
2602 *dst++ = upperCase ? 'X': 'x';
2605 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2607 significand = significandParts();
2608 partsCount = partCount();
2610 /* +3 because the first digit only uses the single integer bit, so
2611 we have 3 virtual zero most-significant-bits. */
2612 valueBits = semantics->precision + 3;
2613 shift = integerPartWidth - valueBits % integerPartWidth;
2615 /* The natural number of digits required ignoring trailing
2616 insignificant zeroes. */
2617 outputDigits = (valueBits - significandLSB () + 3) / 4;
2619 /* hexDigits of zero means use the required number for the
2620 precision. Otherwise, see if we are truncating. If we are,
2621 find out if we need to round away from zero. */
2623 if (hexDigits < outputDigits) {
2624 /* We are dropping non-zero bits, so need to check how to round.
2625 "bits" is the number of dropped bits. */
2627 lostFraction fraction;
2629 bits = valueBits - hexDigits * 4;
2630 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2631 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2633 outputDigits = hexDigits;
2636 /* Write the digits consecutively, and start writing in the location
2637 of the hexadecimal point. We move the most significant digit
2638 left and add the hexadecimal point later. */
2641 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2643 while (outputDigits && count) {
2646 /* Put the most significant integerPartWidth bits in "part". */
2647 if (--count == partsCount)
2648 part = 0; /* An imaginary higher zero part. */
2650 part = significand[count] << shift;
2653 part |= significand[count - 1] >> (integerPartWidth - shift);
2655 /* Convert as much of "part" to hexdigits as we can. */
2656 unsigned int curDigits = integerPartWidth / 4;
2658 if (curDigits > outputDigits)
2659 curDigits = outputDigits;
2660 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2661 outputDigits -= curDigits;
2667 /* Note that hexDigitChars has a trailing '0'. */
2670 *q = hexDigitChars[hexDigitValue (*q) + 1];
2671 } while (*q == '0');
2674 /* Add trailing zeroes. */
2675 memset (dst, '0', outputDigits);
2676 dst += outputDigits;
2679 /* Move the most significant digit to before the point, and if there
2680 is something after the decimal point add it. This must come
2681 after rounding above. */
2688 /* Finally output the exponent. */
2689 *dst++ = upperCase ? 'P': 'p';
2691 return writeSignedDecimal (dst, exponent);
2694 hash_code llvm::hash_value(const APFloat &Arg) {
2695 if (Arg.category != APFloat::fcNormal)
2696 return hash_combine((uint8_t)Arg.category,
2697 // NaN has no sign, fix it at zero.
2698 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2699 Arg.semantics->precision);
2701 // Normal floats need their exponent and significand hashed.
2702 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2703 Arg.semantics->precision, Arg.exponent,
2705 Arg.significandParts(),
2706 Arg.significandParts() + Arg.partCount()));
2709 // Conversion from APFloat to/from host float/double. It may eventually be
2710 // possible to eliminate these and have everybody deal with APFloats, but that
2711 // will take a while. This approach will not easily extend to long double.
2712 // Current implementation requires integerPartWidth==64, which is correct at
2713 // the moment but could be made more general.
2715 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2716 // the actual IEEE respresentations. We compensate for that here.
2719 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2721 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2722 assert(partCount()==2);
2724 uint64_t myexponent, mysignificand;
2726 if (category==fcNormal) {
2727 myexponent = exponent+16383; //bias
2728 mysignificand = significandParts()[0];
2729 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2730 myexponent = 0; // denormal
2731 } else if (category==fcZero) {
2734 } else if (category==fcInfinity) {
2735 myexponent = 0x7fff;
2736 mysignificand = 0x8000000000000000ULL;
2738 assert(category == fcNaN && "Unknown category");
2739 myexponent = 0x7fff;
2740 mysignificand = significandParts()[0];
2744 words[0] = mysignificand;
2745 words[1] = ((uint64_t)(sign & 1) << 15) |
2746 (myexponent & 0x7fffLL);
2747 return APInt(80, words);
2751 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2753 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2754 assert(partCount()==2);
2760 // Convert number to double. To avoid spurious underflows, we re-
2761 // normalize against the "double" minExponent first, and only *then*
2762 // truncate the mantissa. The result of that second conversion
2763 // may be inexact, but should never underflow.
2764 // Declare fltSemantics before APFloat that uses it (and
2765 // saves pointer to it) to ensure correct destruction order.
2766 fltSemantics extendedSemantics = *semantics;
2767 extendedSemantics.minExponent = IEEEdouble.minExponent;
2768 APFloat extended(*this);
2769 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2770 assert(fs == opOK && !losesInfo);
2773 APFloat u(extended);
2774 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2775 assert(fs == opOK || fs == opInexact);
2777 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2779 // If conversion was exact or resulted in a special case, we're done;
2780 // just set the second double to zero. Otherwise, re-convert back to
2781 // the extended format and compute the difference. This now should
2782 // convert exactly to double.
2783 if (u.category == fcNormal && losesInfo) {
2784 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2785 assert(fs == opOK && !losesInfo);
2788 APFloat v(extended);
2789 v.subtract(u, rmNearestTiesToEven);
2790 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2791 assert(fs == opOK && !losesInfo);
2793 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2798 return APInt(128, words);
2802 APFloat::convertQuadrupleAPFloatToAPInt() const
2804 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2805 assert(partCount()==2);
2807 uint64_t myexponent, mysignificand, mysignificand2;
2809 if (category==fcNormal) {
2810 myexponent = exponent+16383; //bias
2811 mysignificand = significandParts()[0];
2812 mysignificand2 = significandParts()[1];
2813 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2814 myexponent = 0; // denormal
2815 } else if (category==fcZero) {
2817 mysignificand = mysignificand2 = 0;
2818 } else if (category==fcInfinity) {
2819 myexponent = 0x7fff;
2820 mysignificand = mysignificand2 = 0;
2822 assert(category == fcNaN && "Unknown category!");
2823 myexponent = 0x7fff;
2824 mysignificand = significandParts()[0];
2825 mysignificand2 = significandParts()[1];
2829 words[0] = mysignificand;
2830 words[1] = ((uint64_t)(sign & 1) << 63) |
2831 ((myexponent & 0x7fff) << 48) |
2832 (mysignificand2 & 0xffffffffffffLL);
2834 return APInt(128, words);
2838 APFloat::convertDoubleAPFloatToAPInt() const
2840 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2841 assert(partCount()==1);
2843 uint64_t myexponent, mysignificand;
2845 if (category==fcNormal) {
2846 myexponent = exponent+1023; //bias
2847 mysignificand = *significandParts();
2848 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2849 myexponent = 0; // denormal
2850 } else if (category==fcZero) {
2853 } else if (category==fcInfinity) {
2857 assert(category == fcNaN && "Unknown category!");
2859 mysignificand = *significandParts();
2862 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2863 ((myexponent & 0x7ff) << 52) |
2864 (mysignificand & 0xfffffffffffffLL))));
2868 APFloat::convertFloatAPFloatToAPInt() const
2870 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2871 assert(partCount()==1);
2873 uint32_t myexponent, mysignificand;
2875 if (category==fcNormal) {
2876 myexponent = exponent+127; //bias
2877 mysignificand = (uint32_t)*significandParts();
2878 if (myexponent == 1 && !(mysignificand & 0x800000))
2879 myexponent = 0; // denormal
2880 } else if (category==fcZero) {
2883 } else if (category==fcInfinity) {
2887 assert(category == fcNaN && "Unknown category!");
2889 mysignificand = (uint32_t)*significandParts();
2892 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2893 (mysignificand & 0x7fffff)));
2897 APFloat::convertHalfAPFloatToAPInt() const
2899 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
2900 assert(partCount()==1);
2902 uint32_t myexponent, mysignificand;
2904 if (category==fcNormal) {
2905 myexponent = exponent+15; //bias
2906 mysignificand = (uint32_t)*significandParts();
2907 if (myexponent == 1 && !(mysignificand & 0x400))
2908 myexponent = 0; // denormal
2909 } else if (category==fcZero) {
2912 } else if (category==fcInfinity) {
2916 assert(category == fcNaN && "Unknown category!");
2918 mysignificand = (uint32_t)*significandParts();
2921 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2922 (mysignificand & 0x3ff)));
2925 // This function creates an APInt that is just a bit map of the floating
2926 // point constant as it would appear in memory. It is not a conversion,
2927 // and treating the result as a normal integer is unlikely to be useful.
2930 APFloat::bitcastToAPInt() const
2932 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
2933 return convertHalfAPFloatToAPInt();
2935 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
2936 return convertFloatAPFloatToAPInt();
2938 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
2939 return convertDoubleAPFloatToAPInt();
2941 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
2942 return convertQuadrupleAPFloatToAPInt();
2944 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
2945 return convertPPCDoubleDoubleAPFloatToAPInt();
2947 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
2949 return convertF80LongDoubleAPFloatToAPInt();
2953 APFloat::convertToFloat() const
2955 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
2956 "Float semantics are not IEEEsingle");
2957 APInt api = bitcastToAPInt();
2958 return api.bitsToFloat();
2962 APFloat::convertToDouble() const
2964 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
2965 "Float semantics are not IEEEdouble");
2966 APInt api = bitcastToAPInt();
2967 return api.bitsToDouble();
2970 /// Integer bit is explicit in this format. Intel hardware (387 and later)
2971 /// does not support these bit patterns:
2972 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
2973 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
2974 /// exponent = 0, integer bit 1 ("pseudodenormal")
2975 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
2976 /// At the moment, the first two are treated as NaNs, the second two as Normal.
2978 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
2980 assert(api.getBitWidth()==80);
2981 uint64_t i1 = api.getRawData()[0];
2982 uint64_t i2 = api.getRawData()[1];
2983 uint64_t myexponent = (i2 & 0x7fff);
2984 uint64_t mysignificand = i1;
2986 initialize(&APFloat::x87DoubleExtended);
2987 assert(partCount()==2);
2989 sign = static_cast<unsigned int>(i2>>15);
2990 if (myexponent==0 && mysignificand==0) {
2991 // exponent, significand meaningless
2993 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
2994 // exponent, significand meaningless
2995 category = fcInfinity;
2996 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
2997 // exponent meaningless
2999 significandParts()[0] = mysignificand;
3000 significandParts()[1] = 0;
3002 category = fcNormal;
3003 exponent = myexponent - 16383;
3004 significandParts()[0] = mysignificand;
3005 significandParts()[1] = 0;
3006 if (myexponent==0) // denormal
3012 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3014 assert(api.getBitWidth()==128);
3015 uint64_t i1 = api.getRawData()[0];
3016 uint64_t i2 = api.getRawData()[1];
3020 // Get the first double and convert to our format.
3021 initFromDoubleAPInt(APInt(64, i1));
3022 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3023 assert(fs == opOK && !losesInfo);
3026 // Unless we have a special case, add in second double.
3027 if (category == fcNormal) {
3028 APFloat v(APInt(64, i2));
3029 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3030 assert(fs == opOK && !losesInfo);
3033 add(v, rmNearestTiesToEven);
3038 APFloat::initFromQuadrupleAPInt(const APInt &api)
3040 assert(api.getBitWidth()==128);
3041 uint64_t i1 = api.getRawData()[0];
3042 uint64_t i2 = api.getRawData()[1];
3043 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3044 uint64_t mysignificand = i1;
3045 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3047 initialize(&APFloat::IEEEquad);
3048 assert(partCount()==2);
3050 sign = static_cast<unsigned int>(i2>>63);
3051 if (myexponent==0 &&
3052 (mysignificand==0 && mysignificand2==0)) {
3053 // exponent, significand meaningless
3055 } else if (myexponent==0x7fff &&
3056 (mysignificand==0 && mysignificand2==0)) {
3057 // exponent, significand meaningless
3058 category = fcInfinity;
3059 } else if (myexponent==0x7fff &&
3060 (mysignificand!=0 || mysignificand2 !=0)) {
3061 // exponent meaningless
3063 significandParts()[0] = mysignificand;
3064 significandParts()[1] = mysignificand2;
3066 category = fcNormal;
3067 exponent = myexponent - 16383;
3068 significandParts()[0] = mysignificand;
3069 significandParts()[1] = mysignificand2;
3070 if (myexponent==0) // denormal
3073 significandParts()[1] |= 0x1000000000000LL; // integer bit
3078 APFloat::initFromDoubleAPInt(const APInt &api)
3080 assert(api.getBitWidth()==64);
3081 uint64_t i = *api.getRawData();
3082 uint64_t myexponent = (i >> 52) & 0x7ff;
3083 uint64_t mysignificand = i & 0xfffffffffffffLL;
3085 initialize(&APFloat::IEEEdouble);
3086 assert(partCount()==1);
3088 sign = static_cast<unsigned int>(i>>63);
3089 if (myexponent==0 && mysignificand==0) {
3090 // exponent, significand meaningless
3092 } else if (myexponent==0x7ff && mysignificand==0) {
3093 // exponent, significand meaningless
3094 category = fcInfinity;
3095 } else if (myexponent==0x7ff && mysignificand!=0) {
3096 // exponent meaningless
3098 *significandParts() = mysignificand;
3100 category = fcNormal;
3101 exponent = myexponent - 1023;
3102 *significandParts() = mysignificand;
3103 if (myexponent==0) // denormal
3106 *significandParts() |= 0x10000000000000LL; // integer bit
3111 APFloat::initFromFloatAPInt(const APInt & api)
3113 assert(api.getBitWidth()==32);
3114 uint32_t i = (uint32_t)*api.getRawData();
3115 uint32_t myexponent = (i >> 23) & 0xff;
3116 uint32_t mysignificand = i & 0x7fffff;
3118 initialize(&APFloat::IEEEsingle);
3119 assert(partCount()==1);
3122 if (myexponent==0 && mysignificand==0) {
3123 // exponent, significand meaningless
3125 } else if (myexponent==0xff && mysignificand==0) {
3126 // exponent, significand meaningless
3127 category = fcInfinity;
3128 } else if (myexponent==0xff && mysignificand!=0) {
3129 // sign, exponent, significand meaningless
3131 *significandParts() = mysignificand;
3133 category = fcNormal;
3134 exponent = myexponent - 127; //bias
3135 *significandParts() = mysignificand;
3136 if (myexponent==0) // denormal
3139 *significandParts() |= 0x800000; // integer bit
3144 APFloat::initFromHalfAPInt(const APInt & api)
3146 assert(api.getBitWidth()==16);
3147 uint32_t i = (uint32_t)*api.getRawData();
3148 uint32_t myexponent = (i >> 10) & 0x1f;
3149 uint32_t mysignificand = i & 0x3ff;
3151 initialize(&APFloat::IEEEhalf);
3152 assert(partCount()==1);
3155 if (myexponent==0 && mysignificand==0) {
3156 // exponent, significand meaningless
3158 } else if (myexponent==0x1f && mysignificand==0) {
3159 // exponent, significand meaningless
3160 category = fcInfinity;
3161 } else if (myexponent==0x1f && mysignificand!=0) {
3162 // sign, exponent, significand meaningless
3164 *significandParts() = mysignificand;
3166 category = fcNormal;
3167 exponent = myexponent - 15; //bias
3168 *significandParts() = mysignificand;
3169 if (myexponent==0) // denormal
3172 *significandParts() |= 0x400; // integer bit
3176 /// Treat api as containing the bits of a floating point number. Currently
3177 /// we infer the floating point type from the size of the APInt. The
3178 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3179 /// when the size is anything else).
3181 APFloat::initFromAPInt(const APInt& api, bool isIEEE)
3183 if (api.getBitWidth() == 16)
3184 return initFromHalfAPInt(api);
3185 else if (api.getBitWidth() == 32)
3186 return initFromFloatAPInt(api);
3187 else if (api.getBitWidth()==64)
3188 return initFromDoubleAPInt(api);
3189 else if (api.getBitWidth()==80)
3190 return initFromF80LongDoubleAPInt(api);
3191 else if (api.getBitWidth()==128)
3193 initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api));
3195 llvm_unreachable(0);
3199 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3201 return APFloat(APInt::getAllOnesValue(BitWidth), isIEEE);
3204 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3205 APFloat Val(Sem, fcNormal, Negative);
3207 // We want (in interchange format):
3208 // sign = {Negative}
3210 // significand = 1..1
3212 Val.exponent = Sem.maxExponent; // unbiased
3214 // 1-initialize all bits....
3215 Val.zeroSignificand();
3216 integerPart *significand = Val.significandParts();
3217 unsigned N = partCountForBits(Sem.precision);
3218 for (unsigned i = 0; i != N; ++i)
3219 significand[i] = ~((integerPart) 0);
3221 // ...and then clear the top bits for internal consistency.
3222 if (Sem.precision % integerPartWidth != 0)
3224 (((integerPart) 1) << (Sem.precision % integerPartWidth)) - 1;
3229 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3230 APFloat Val(Sem, fcNormal, Negative);
3232 // We want (in interchange format):
3233 // sign = {Negative}
3235 // significand = 0..01
3237 Val.exponent = Sem.minExponent; // unbiased
3238 Val.zeroSignificand();
3239 Val.significandParts()[0] = 1;
3243 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3244 APFloat Val(Sem, fcNormal, Negative);
3246 // We want (in interchange format):
3247 // sign = {Negative}
3249 // significand = 10..0
3251 Val.exponent = Sem.minExponent;
3252 Val.zeroSignificand();
3253 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3254 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3259 APFloat::APFloat(const APInt& api, bool isIEEE) {
3260 initFromAPInt(api, isIEEE);
3263 APFloat::APFloat(float f) {
3264 initFromAPInt(APInt::floatToBits(f));
3267 APFloat::APFloat(double d) {
3268 initFromAPInt(APInt::doubleToBits(d));
3272 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3273 Buffer.append(Str.begin(), Str.end());
3276 /// Removes data from the given significand until it is no more
3277 /// precise than is required for the desired precision.
3278 void AdjustToPrecision(APInt &significand,
3279 int &exp, unsigned FormatPrecision) {
3280 unsigned bits = significand.getActiveBits();
3282 // 196/59 is a very slight overestimate of lg_2(10).
3283 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3285 if (bits <= bitsRequired) return;
3287 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3288 if (!tensRemovable) return;
3290 exp += tensRemovable;
3292 APInt divisor(significand.getBitWidth(), 1);
3293 APInt powten(significand.getBitWidth(), 10);
3295 if (tensRemovable & 1)
3297 tensRemovable >>= 1;
3298 if (!tensRemovable) break;
3302 significand = significand.udiv(divisor);
3304 // Truncate the significand down to its active bit count, but
3305 // don't try to drop below 32.
3306 unsigned newPrecision = std::max(32U, significand.getActiveBits());
3307 significand = significand.trunc(newPrecision);
3311 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3312 int &exp, unsigned FormatPrecision) {
3313 unsigned N = buffer.size();
3314 if (N <= FormatPrecision) return;
3316 // The most significant figures are the last ones in the buffer.
3317 unsigned FirstSignificant = N - FormatPrecision;
3320 // FIXME: this probably shouldn't use 'round half up'.
3322 // Rounding down is just a truncation, except we also want to drop
3323 // trailing zeros from the new result.
3324 if (buffer[FirstSignificant - 1] < '5') {
3325 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3328 exp += FirstSignificant;
3329 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3333 // Rounding up requires a decimal add-with-carry. If we continue
3334 // the carry, the newly-introduced zeros will just be truncated.
3335 for (unsigned I = FirstSignificant; I != N; ++I) {
3336 if (buffer[I] == '9') {
3344 // If we carried through, we have exactly one digit of precision.
3345 if (FirstSignificant == N) {
3346 exp += FirstSignificant;
3348 buffer.push_back('1');
3352 exp += FirstSignificant;
3353 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3357 void APFloat::toString(SmallVectorImpl<char> &Str,
3358 unsigned FormatPrecision,
3359 unsigned FormatMaxPadding) const {
3363 return append(Str, "-Inf");
3365 return append(Str, "+Inf");
3367 case fcNaN: return append(Str, "NaN");
3373 if (!FormatMaxPadding)
3374 append(Str, "0.0E+0");
3386 // Decompose the number into an APInt and an exponent.
3387 int exp = exponent - ((int) semantics->precision - 1);
3388 APInt significand(semantics->precision,
3389 makeArrayRef(significandParts(),
3390 partCountForBits(semantics->precision)));
3392 // Set FormatPrecision if zero. We want to do this before we
3393 // truncate trailing zeros, as those are part of the precision.
3394 if (!FormatPrecision) {
3395 // It's an interesting question whether to use the nominal
3396 // precision or the active precision here for denormals.
3398 // FormatPrecision = ceil(significandBits / lg_2(10))
3399 FormatPrecision = (semantics->precision * 59 + 195) / 196;
3402 // Ignore trailing binary zeros.
3403 int trailingZeros = significand.countTrailingZeros();
3404 exp += trailingZeros;
3405 significand = significand.lshr(trailingZeros);
3407 // Change the exponent from 2^e to 10^e.
3410 } else if (exp > 0) {
3412 significand = significand.zext(semantics->precision + exp);
3413 significand <<= exp;
3415 } else { /* exp < 0 */
3418 // We transform this using the identity:
3419 // (N)(2^-e) == (N)(5^e)(10^-e)
3420 // This means we have to multiply N (the significand) by 5^e.
3421 // To avoid overflow, we have to operate on numbers large
3422 // enough to store N * 5^e:
3423 // log2(N * 5^e) == log2(N) + e * log2(5)
3424 // <= semantics->precision + e * 137 / 59
3425 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3427 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3429 // Multiply significand by 5^e.
3430 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3431 significand = significand.zext(precision);
3432 APInt five_to_the_i(precision, 5);
3434 if (texp & 1) significand *= five_to_the_i;
3438 five_to_the_i *= five_to_the_i;
3442 AdjustToPrecision(significand, exp, FormatPrecision);
3444 llvm::SmallVector<char, 256> buffer;
3447 unsigned precision = significand.getBitWidth();
3448 APInt ten(precision, 10);
3449 APInt digit(precision, 0);
3451 bool inTrail = true;
3452 while (significand != 0) {
3453 // digit <- significand % 10
3454 // significand <- significand / 10
3455 APInt::udivrem(significand, ten, significand, digit);
3457 unsigned d = digit.getZExtValue();
3459 // Drop trailing zeros.
3460 if (inTrail && !d) exp++;
3462 buffer.push_back((char) ('0' + d));
3467 assert(!buffer.empty() && "no characters in buffer!");
3469 // Drop down to FormatPrecision.
3470 // TODO: don't do more precise calculations above than are required.
3471 AdjustToPrecision(buffer, exp, FormatPrecision);
3473 unsigned NDigits = buffer.size();
3475 // Check whether we should use scientific notation.
3476 bool FormatScientific;
3477 if (!FormatMaxPadding)
3478 FormatScientific = true;
3483 // But we shouldn't make the number look more precise than it is.
3484 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3485 NDigits + (unsigned) exp > FormatPrecision);
3487 // Power of the most significant digit.
3488 int MSD = exp + (int) (NDigits - 1);
3491 FormatScientific = false;
3493 // 765e-5 == 0.00765
3495 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3500 // Scientific formatting is pretty straightforward.
3501 if (FormatScientific) {
3502 exp += (NDigits - 1);
3504 Str.push_back(buffer[NDigits-1]);
3509 for (unsigned I = 1; I != NDigits; ++I)
3510 Str.push_back(buffer[NDigits-1-I]);
3513 Str.push_back(exp >= 0 ? '+' : '-');
3514 if (exp < 0) exp = -exp;
3515 SmallVector<char, 6> expbuf;
3517 expbuf.push_back((char) ('0' + (exp % 10)));
3520 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3521 Str.push_back(expbuf[E-1-I]);
3525 // Non-scientific, positive exponents.
3527 for (unsigned I = 0; I != NDigits; ++I)
3528 Str.push_back(buffer[NDigits-1-I]);
3529 for (unsigned I = 0; I != (unsigned) exp; ++I)
3534 // Non-scientific, negative exponents.
3536 // The number of digits to the left of the decimal point.
3537 int NWholeDigits = exp + (int) NDigits;
3540 if (NWholeDigits > 0) {
3541 for (; I != (unsigned) NWholeDigits; ++I)
3542 Str.push_back(buffer[NDigits-I-1]);
3545 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3549 for (unsigned Z = 1; Z != NZeros; ++Z)
3553 for (; I != NDigits; ++I)
3554 Str.push_back(buffer[NDigits-I-1]);
3557 bool APFloat::getExactInverse(APFloat *inv) const {
3558 // Special floats and denormals have no exact inverse.
3559 if (category != fcNormal)
3562 // Check that the number is a power of two by making sure that only the
3563 // integer bit is set in the significand.
3564 if (significandLSB() != semantics->precision - 1)
3568 APFloat reciprocal(*semantics, 1ULL);
3569 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3572 // Avoid multiplication with a denormal, it is not safe on all platforms and
3573 // may be slower than a normal division.
3574 if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision)
3577 assert(reciprocal.category == fcNormal &&
3578 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);