1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/MathExtras.h"
28 /// A macro used to combine two fcCategory enums into one key which can be used
29 /// in a switch statement to classify how the interaction of two APFloat's
30 /// categories affects an operation.
32 /// TODO: If clang source code is ever allowed to use constexpr in its own
33 /// codebase, change this into a static inline function.
34 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
36 /* Assumed in hexadecimal significand parsing, and conversion to
37 hexadecimal strings. */
38 #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
39 COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
43 /* Represents floating point arithmetic semantics. */
45 /* The largest E such that 2^E is representable; this matches the
46 definition of IEEE 754. */
47 APFloat::ExponentType maxExponent;
49 /* The smallest E such that 2^E is a normalized number; this
50 matches the definition of IEEE 754. */
51 APFloat::ExponentType minExponent;
53 /* Number of bits in the significand. This includes the integer
55 unsigned int precision;
58 const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 };
59 const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 };
60 const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 };
61 const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 };
62 const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 };
63 const fltSemantics APFloat::Bogus = { 0, 0, 0 };
65 /* The PowerPC format consists of two doubles. It does not map cleanly
66 onto the usual format above. It is approximated using twice the
67 mantissa bits. Note that for exponents near the double minimum,
68 we no longer can represent the full 106 mantissa bits, so those
69 will be treated as denormal numbers.
71 FIXME: While this approximation is equivalent to what GCC uses for
72 compile-time arithmetic on PPC double-double numbers, it is not able
73 to represent all possible values held by a PPC double-double number,
74 for example: (long double) 1.0 + (long double) 0x1p-106
75 Should this be replaced by a full emulation of PPC double-double? */
76 const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 };
78 /* A tight upper bound on number of parts required to hold the value
81 power * 815 / (351 * integerPartWidth) + 1
83 However, whilst the result may require only this many parts,
84 because we are multiplying two values to get it, the
85 multiplication may require an extra part with the excess part
86 being zero (consider the trivial case of 1 * 1, tcFullMultiply
87 requires two parts to hold the single-part result). So we add an
88 extra one to guarantee enough space whilst multiplying. */
89 const unsigned int maxExponent = 16383;
90 const unsigned int maxPrecision = 113;
91 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
92 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
93 / (351 * integerPartWidth));
96 /* A bunch of private, handy routines. */
98 static inline unsigned int
99 partCountForBits(unsigned int bits)
101 return ((bits) + integerPartWidth - 1) / integerPartWidth;
104 /* Returns 0U-9U. Return values >= 10U are not digits. */
105 static inline unsigned int
106 decDigitValue(unsigned int c)
111 /* Return the value of a decimal exponent of the form
114 If the exponent overflows, returns a large exponent with the
117 readExponent(StringRef::iterator begin, StringRef::iterator end)
120 unsigned int absExponent;
121 const unsigned int overlargeExponent = 24000; /* FIXME. */
122 StringRef::iterator p = begin;
124 assert(p != end && "Exponent has no digits");
126 isNegative = (*p == '-');
127 if (*p == '-' || *p == '+') {
129 assert(p != end && "Exponent has no digits");
132 absExponent = decDigitValue(*p++);
133 assert(absExponent < 10U && "Invalid character in exponent");
135 for (; p != end; ++p) {
138 value = decDigitValue(*p);
139 assert(value < 10U && "Invalid character in exponent");
141 value += absExponent * 10;
142 if (absExponent >= overlargeExponent) {
143 absExponent = overlargeExponent;
144 p = end; /* outwit assert below */
150 assert(p == end && "Invalid exponent in exponent");
153 return -(int) absExponent;
155 return (int) absExponent;
158 /* This is ugly and needs cleaning up, but I don't immediately see
159 how whilst remaining safe. */
161 totalExponent(StringRef::iterator p, StringRef::iterator end,
162 int exponentAdjustment)
164 int unsignedExponent;
165 bool negative, overflow;
168 assert(p != end && "Exponent has no digits");
170 negative = *p == '-';
171 if (*p == '-' || *p == '+') {
173 assert(p != end && "Exponent has no digits");
176 unsignedExponent = 0;
178 for (; p != end; ++p) {
181 value = decDigitValue(*p);
182 assert(value < 10U && "Invalid character in exponent");
184 unsignedExponent = unsignedExponent * 10 + value;
185 if (unsignedExponent > 32767) {
191 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
195 exponent = unsignedExponent;
197 exponent = -exponent;
198 exponent += exponentAdjustment;
199 if (exponent > 32767 || exponent < -32768)
204 exponent = negative ? -32768: 32767;
209 static StringRef::iterator
210 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
211 StringRef::iterator *dot)
213 StringRef::iterator p = begin;
215 while (*p == '0' && p != end)
221 assert(end - begin != 1 && "Significand has no digits");
223 while (*p == '0' && p != end)
230 /* Given a normal decimal floating point number of the form
234 where the decimal point and exponent are optional, fill out the
235 structure D. Exponent is appropriate if the significand is
236 treated as an integer, and normalizedExponent if the significand
237 is taken to have the decimal point after a single leading
240 If the value is zero, V->firstSigDigit points to a non-digit, and
241 the return exponent is zero.
244 const char *firstSigDigit;
245 const char *lastSigDigit;
247 int normalizedExponent;
251 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
254 StringRef::iterator dot = end;
255 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
257 D->firstSigDigit = p;
259 D->normalizedExponent = 0;
261 for (; p != end; ++p) {
263 assert(dot == end && "String contains multiple dots");
268 if (decDigitValue(*p) >= 10U)
273 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
274 assert(p != begin && "Significand has no digits");
275 assert((dot == end || p - begin != 1) && "Significand has no digits");
277 /* p points to the first non-digit in the string */
278 D->exponent = readExponent(p + 1, end);
280 /* Implied decimal point? */
285 /* If number is all zeroes accept any exponent. */
286 if (p != D->firstSigDigit) {
287 /* Drop insignificant trailing zeroes. */
292 while (p != begin && *p == '0');
293 while (p != begin && *p == '.');
296 /* Adjust the exponents for any decimal point. */
297 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
298 D->normalizedExponent = (D->exponent +
299 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
300 - (dot > D->firstSigDigit && dot < p)));
306 /* Return the trailing fraction of a hexadecimal number.
307 DIGITVALUE is the first hex digit of the fraction, P points to
310 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
311 unsigned int digitValue)
313 unsigned int hexDigit;
315 /* If the first trailing digit isn't 0 or 8 we can work out the
316 fraction immediately. */
318 return lfMoreThanHalf;
319 else if (digitValue < 8 && digitValue > 0)
320 return lfLessThanHalf;
322 /* Otherwise we need to find the first non-zero digit. */
326 assert(p != end && "Invalid trailing hexadecimal fraction!");
328 hexDigit = hexDigitValue(*p);
330 /* If we ran off the end it is exactly zero or one-half, otherwise
333 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
335 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
338 /* Return the fraction lost were a bignum truncated losing the least
339 significant BITS bits. */
341 lostFractionThroughTruncation(const integerPart *parts,
342 unsigned int partCount,
347 lsb = APInt::tcLSB(parts, partCount);
349 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
351 return lfExactlyZero;
353 return lfExactlyHalf;
354 if (bits <= partCount * integerPartWidth &&
355 APInt::tcExtractBit(parts, bits - 1))
356 return lfMoreThanHalf;
358 return lfLessThanHalf;
361 /* Shift DST right BITS bits noting lost fraction. */
363 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
365 lostFraction lost_fraction;
367 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
369 APInt::tcShiftRight(dst, parts, bits);
371 return lost_fraction;
374 /* Combine the effect of two lost fractions. */
376 combineLostFractions(lostFraction moreSignificant,
377 lostFraction lessSignificant)
379 if (lessSignificant != lfExactlyZero) {
380 if (moreSignificant == lfExactlyZero)
381 moreSignificant = lfLessThanHalf;
382 else if (moreSignificant == lfExactlyHalf)
383 moreSignificant = lfMoreThanHalf;
386 return moreSignificant;
389 /* The error from the true value, in half-ulps, on multiplying two
390 floating point numbers, which differ from the value they
391 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
392 than the returned value.
394 See "How to Read Floating Point Numbers Accurately" by William D
397 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
399 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
401 if (HUerr1 + HUerr2 == 0)
402 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
404 return inexactMultiply + 2 * (HUerr1 + HUerr2);
407 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
408 when the least significant BITS are truncated. BITS cannot be
411 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
413 unsigned int count, partBits;
414 integerPart part, boundary;
419 count = bits / integerPartWidth;
420 partBits = bits % integerPartWidth + 1;
422 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
425 boundary = (integerPart) 1 << (partBits - 1);
430 if (part - boundary <= boundary - part)
431 return part - boundary;
433 return boundary - part;
436 if (part == boundary) {
439 return ~(integerPart) 0; /* A lot. */
442 } else if (part == boundary - 1) {
445 return ~(integerPart) 0; /* A lot. */
450 return ~(integerPart) 0; /* A lot. */
453 /* Place pow(5, power) in DST, and return the number of parts used.
454 DST must be at least one part larger than size of the answer. */
456 powerOf5(integerPart *dst, unsigned int power)
458 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
460 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
461 pow5s[0] = 78125 * 5;
463 unsigned int partsCount[16] = { 1 };
464 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
466 assert(power <= maxExponent);
471 *p1 = firstEightPowers[power & 7];
477 for (unsigned int n = 0; power; power >>= 1, n++) {
482 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
484 pc = partsCount[n - 1];
485 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
487 if (pow5[pc - 1] == 0)
495 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
497 if (p2[result - 1] == 0)
500 /* Now result is in p1 with partsCount parts and p2 is scratch
502 tmp = p1, p1 = p2, p2 = tmp;
509 APInt::tcAssign(dst, p1, result);
514 /* Zero at the end to avoid modular arithmetic when adding one; used
515 when rounding up during hexadecimal output. */
516 static const char hexDigitsLower[] = "0123456789abcdef0";
517 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
518 static const char infinityL[] = "infinity";
519 static const char infinityU[] = "INFINITY";
520 static const char NaNL[] = "nan";
521 static const char NaNU[] = "NAN";
523 /* Write out an integerPart in hexadecimal, starting with the most
524 significant nibble. Write out exactly COUNT hexdigits, return
527 partAsHex (char *dst, integerPart part, unsigned int count,
528 const char *hexDigitChars)
530 unsigned int result = count;
532 assert(count != 0 && count <= integerPartWidth / 4);
534 part >>= (integerPartWidth - 4 * count);
536 dst[count] = hexDigitChars[part & 0xf];
543 /* Write out an unsigned decimal integer. */
545 writeUnsignedDecimal (char *dst, unsigned int n)
561 /* Write out a signed decimal integer. */
563 writeSignedDecimal (char *dst, int value)
567 dst = writeUnsignedDecimal(dst, -(unsigned) value);
569 dst = writeUnsignedDecimal(dst, value);
576 APFloat::initialize(const fltSemantics *ourSemantics)
580 semantics = ourSemantics;
583 significand.parts = new integerPart[count];
587 APFloat::freeSignificand()
590 delete [] significand.parts;
594 APFloat::assign(const APFloat &rhs)
596 assert(semantics == rhs.semantics);
599 category = rhs.category;
600 exponent = rhs.exponent;
601 if (isFiniteNonZero() || category == fcNaN)
602 copySignificand(rhs);
606 APFloat::copySignificand(const APFloat &rhs)
608 assert(isFiniteNonZero() || category == fcNaN);
609 assert(rhs.partCount() >= partCount());
611 APInt::tcAssign(significandParts(), rhs.significandParts(),
615 /* Make this number a NaN, with an arbitrary but deterministic value
616 for the significand. If double or longer, this is a signalling NaN,
617 which may not be ideal. If float, this is QNaN(0). */
618 void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
623 integerPart *significand = significandParts();
624 unsigned numParts = partCount();
626 // Set the significand bits to the fill.
627 if (!fill || fill->getNumWords() < numParts)
628 APInt::tcSet(significand, 0, numParts);
630 APInt::tcAssign(significand, fill->getRawData(),
631 std::min(fill->getNumWords(), numParts));
633 // Zero out the excess bits of the significand.
634 unsigned bitsToPreserve = semantics->precision - 1;
635 unsigned part = bitsToPreserve / 64;
636 bitsToPreserve %= 64;
637 significand[part] &= ((1ULL << bitsToPreserve) - 1);
638 for (part++; part != numParts; ++part)
639 significand[part] = 0;
642 unsigned QNaNBit = semantics->precision - 2;
645 // We always have to clear the QNaN bit to make it an SNaN.
646 APInt::tcClearBit(significand, QNaNBit);
648 // If there are no bits set in the payload, we have to set
649 // *something* to make it a NaN instead of an infinity;
650 // conventionally, this is the next bit down from the QNaN bit.
651 if (APInt::tcIsZero(significand, numParts))
652 APInt::tcSetBit(significand, QNaNBit - 1);
654 // We always have to set the QNaN bit to make it a QNaN.
655 APInt::tcSetBit(significand, QNaNBit);
658 // For x87 extended precision, we want to make a NaN, not a
659 // pseudo-NaN. Maybe we should expose the ability to make
661 if (semantics == &APFloat::x87DoubleExtended)
662 APInt::tcSetBit(significand, QNaNBit + 1);
665 APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
667 APFloat value(Sem, uninitialized);
668 value.makeNaN(SNaN, Negative, fill);
673 APFloat::operator=(const APFloat &rhs)
676 if (semantics != rhs.semantics) {
678 initialize(rhs.semantics);
687 APFloat::isDenormal() const {
688 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
689 (APInt::tcExtractBit(significandParts(),
690 semantics->precision - 1) == 0);
694 APFloat::isSmallest() const {
695 // The smallest number by magnitude in our format will be the smallest
696 // denormal, i.e. the floating point number with exponent being minimum
697 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
698 return isFiniteNonZero() && exponent == semantics->minExponent &&
699 significandMSB() == 0;
702 bool APFloat::isSignificandAllOnes() const {
703 // Test if the significand excluding the integral bit is all ones. This allows
704 // us to test for binade boundaries.
705 const integerPart *Parts = significandParts();
706 const unsigned PartCount = partCount();
707 for (unsigned i = 0; i < PartCount - 1; i++)
711 // Set the unused high bits to all ones when we compare.
712 const unsigned NumHighBits =
713 PartCount*integerPartWidth - semantics->precision + 1;
714 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
715 "fill than integerPartWidth");
716 const integerPart HighBitFill =
717 ~integerPart(0) << (integerPartWidth - NumHighBits);
718 if (~(Parts[PartCount - 1] | HighBitFill))
724 bool APFloat::isSignificandAllZeros() const {
725 // Test if the significand excluding the integral bit is all zeros. This
726 // allows us to test for binade boundaries.
727 const integerPart *Parts = significandParts();
728 const unsigned PartCount = partCount();
730 for (unsigned i = 0; i < PartCount - 1; i++)
734 const unsigned NumHighBits =
735 PartCount*integerPartWidth - semantics->precision + 1;
736 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
737 "clear than integerPartWidth");
738 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
740 if (Parts[PartCount - 1] & HighBitMask)
747 APFloat::isLargest() const {
748 // The largest number by magnitude in our format will be the floating point
749 // number with maximum exponent and with significand that is all ones.
750 return isFiniteNonZero() && exponent == semantics->maxExponent
751 && isSignificandAllOnes();
755 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
758 if (semantics != rhs.semantics ||
759 category != rhs.category ||
762 if (category==fcZero || category==fcInfinity)
764 else if (isFiniteNonZero() && exponent!=rhs.exponent)
768 const integerPart* p=significandParts();
769 const integerPart* q=rhs.significandParts();
770 for (; i>0; i--, p++, q++) {
778 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
779 initialize(&ourSemantics);
782 exponent = ourSemantics.precision - 1;
783 significandParts()[0] = value;
784 normalize(rmNearestTiesToEven, lfExactlyZero);
787 APFloat::APFloat(const fltSemantics &ourSemantics) {
788 initialize(&ourSemantics);
793 APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
794 // Allocates storage if necessary but does not initialize it.
795 initialize(&ourSemantics);
798 APFloat::APFloat(const fltSemantics &ourSemantics,
799 fltCategory ourCategory, bool negative) {
800 initialize(&ourSemantics);
801 category = ourCategory;
803 if (isFiniteNonZero())
805 else if (ourCategory == fcNaN)
809 APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) {
810 initialize(&ourSemantics);
811 convertFromString(text, rmNearestTiesToEven);
814 APFloat::APFloat(const APFloat &rhs) {
815 initialize(rhs.semantics);
824 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
825 void APFloat::Profile(FoldingSetNodeID& ID) const {
826 ID.Add(bitcastToAPInt());
830 APFloat::partCount() const
832 return partCountForBits(semantics->precision + 1);
836 APFloat::semanticsPrecision(const fltSemantics &semantics)
838 return semantics.precision;
842 APFloat::significandParts() const
844 return const_cast<APFloat *>(this)->significandParts();
848 APFloat::significandParts()
851 return significand.parts;
853 return &significand.part;
857 APFloat::zeroSignificand()
860 APInt::tcSet(significandParts(), 0, partCount());
863 /* Increment an fcNormal floating point number's significand. */
865 APFloat::incrementSignificand()
869 carry = APInt::tcIncrement(significandParts(), partCount());
871 /* Our callers should never cause us to overflow. */
876 /* Add the significand of the RHS. Returns the carry flag. */
878 APFloat::addSignificand(const APFloat &rhs)
882 parts = significandParts();
884 assert(semantics == rhs.semantics);
885 assert(exponent == rhs.exponent);
887 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
890 /* Subtract the significand of the RHS with a borrow flag. Returns
893 APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
897 parts = significandParts();
899 assert(semantics == rhs.semantics);
900 assert(exponent == rhs.exponent);
902 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
906 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
907 on to the full-precision result of the multiplication. Returns the
910 APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
912 unsigned int omsb; // One, not zero, based MSB.
913 unsigned int partsCount, newPartsCount, precision;
914 integerPart *lhsSignificand;
915 integerPart scratch[4];
916 integerPart *fullSignificand;
917 lostFraction lost_fraction;
920 assert(semantics == rhs.semantics);
922 precision = semantics->precision;
923 newPartsCount = partCountForBits(precision * 2);
925 if (newPartsCount > 4)
926 fullSignificand = new integerPart[newPartsCount];
928 fullSignificand = scratch;
930 lhsSignificand = significandParts();
931 partsCount = partCount();
933 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
934 rhs.significandParts(), partsCount, partsCount);
936 lost_fraction = lfExactlyZero;
937 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
938 exponent += rhs.exponent;
940 // Assume the operands involved in the multiplication are single-precision
941 // FP, and the two multiplicants are:
942 // *this = a23 . a22 ... a0 * 2^e1
943 // rhs = b23 . b22 ... b0 * 2^e2
944 // the result of multiplication is:
945 // *this = c47 c46 . c45 ... c0 * 2^(e1+e2)
946 // Note that there are two significant bits at the left-hand side of the
947 // radix point. Move the radix point toward left by one bit, and adjust
948 // exponent accordingly.
952 // The intermediate result of the multiplication has "2 * precision"
953 // signicant bit; adjust the addend to be consistent with mul result.
955 Significand savedSignificand = significand;
956 const fltSemantics *savedSemantics = semantics;
957 fltSemantics extendedSemantics;
959 unsigned int extendedPrecision;
961 /* Normalize our MSB. */
962 extendedPrecision = 2 * precision;
963 if (omsb != extendedPrecision) {
964 assert(extendedPrecision > omsb);
965 APInt::tcShiftLeft(fullSignificand, newPartsCount,
966 extendedPrecision - omsb);
967 exponent -= extendedPrecision - omsb;
970 /* Create new semantics. */
971 extendedSemantics = *semantics;
972 extendedSemantics.precision = extendedPrecision;
974 if (newPartsCount == 1)
975 significand.part = fullSignificand[0];
977 significand.parts = fullSignificand;
978 semantics = &extendedSemantics;
980 APFloat extendedAddend(*addend);
981 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
982 assert(status == opOK);
984 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
986 /* Restore our state. */
987 if (newPartsCount == 1)
988 fullSignificand[0] = significand.part;
989 significand = savedSignificand;
990 semantics = savedSemantics;
992 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
995 // Convert the result having "2 * precision" significant-bits back to the one
996 // having "precision" significant-bits. First, move the radix point from
997 // poision "2*precision - 1" to "precision - 1". The exponent need to be
998 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
999 exponent -= precision;
1001 // In case MSB resides at the left-hand side of radix point, shift the
1002 // mantissa right by some amount to make sure the MSB reside right before
1003 // the radix point (i.e. "MSB . rest-significant-bits").
1005 // Note that the result is not normalized when "omsb < precision". So, the
1006 // caller needs to call APFloat::normalize() if normalized value is expected.
1007 if (omsb > precision) {
1008 unsigned int bits, significantParts;
1011 bits = omsb - precision;
1012 significantParts = partCountForBits(omsb);
1013 lf = shiftRight(fullSignificand, significantParts, bits);
1014 lost_fraction = combineLostFractions(lf, lost_fraction);
1018 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1020 if (newPartsCount > 4)
1021 delete [] fullSignificand;
1023 return lost_fraction;
1026 /* Multiply the significands of LHS and RHS to DST. */
1028 APFloat::divideSignificand(const APFloat &rhs)
1030 unsigned int bit, i, partsCount;
1031 const integerPart *rhsSignificand;
1032 integerPart *lhsSignificand, *dividend, *divisor;
1033 integerPart scratch[4];
1034 lostFraction lost_fraction;
1036 assert(semantics == rhs.semantics);
1038 lhsSignificand = significandParts();
1039 rhsSignificand = rhs.significandParts();
1040 partsCount = partCount();
1043 dividend = new integerPart[partsCount * 2];
1047 divisor = dividend + partsCount;
1049 /* Copy the dividend and divisor as they will be modified in-place. */
1050 for (i = 0; i < partsCount; i++) {
1051 dividend[i] = lhsSignificand[i];
1052 divisor[i] = rhsSignificand[i];
1053 lhsSignificand[i] = 0;
1056 exponent -= rhs.exponent;
1058 unsigned int precision = semantics->precision;
1060 /* Normalize the divisor. */
1061 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1064 APInt::tcShiftLeft(divisor, partsCount, bit);
1067 /* Normalize the dividend. */
1068 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1071 APInt::tcShiftLeft(dividend, partsCount, bit);
1074 /* Ensure the dividend >= divisor initially for the loop below.
1075 Incidentally, this means that the division loop below is
1076 guaranteed to set the integer bit to one. */
1077 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1079 APInt::tcShiftLeft(dividend, partsCount, 1);
1080 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1083 /* Long division. */
1084 for (bit = precision; bit; bit -= 1) {
1085 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1086 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1087 APInt::tcSetBit(lhsSignificand, bit - 1);
1090 APInt::tcShiftLeft(dividend, partsCount, 1);
1093 /* Figure out the lost fraction. */
1094 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1097 lost_fraction = lfMoreThanHalf;
1099 lost_fraction = lfExactlyHalf;
1100 else if (APInt::tcIsZero(dividend, partsCount))
1101 lost_fraction = lfExactlyZero;
1103 lost_fraction = lfLessThanHalf;
1108 return lost_fraction;
1112 APFloat::significandMSB() const
1114 return APInt::tcMSB(significandParts(), partCount());
1118 APFloat::significandLSB() const
1120 return APInt::tcLSB(significandParts(), partCount());
1123 /* Note that a zero result is NOT normalized to fcZero. */
1125 APFloat::shiftSignificandRight(unsigned int bits)
1127 /* Our exponent should not overflow. */
1128 assert((ExponentType) (exponent + bits) >= exponent);
1132 return shiftRight(significandParts(), partCount(), bits);
1135 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1137 APFloat::shiftSignificandLeft(unsigned int bits)
1139 assert(bits < semantics->precision);
1142 unsigned int partsCount = partCount();
1144 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1147 assert(!APInt::tcIsZero(significandParts(), partsCount));
1152 APFloat::compareAbsoluteValue(const APFloat &rhs) const
1156 assert(semantics == rhs.semantics);
1157 assert(isFiniteNonZero());
1158 assert(rhs.isFiniteNonZero());
1160 compare = exponent - rhs.exponent;
1162 /* If exponents are equal, do an unsigned bignum comparison of the
1165 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1169 return cmpGreaterThan;
1170 else if (compare < 0)
1176 /* Handle overflow. Sign is preserved. We either become infinity or
1177 the largest finite number. */
1179 APFloat::handleOverflow(roundingMode rounding_mode)
1182 if (rounding_mode == rmNearestTiesToEven ||
1183 rounding_mode == rmNearestTiesToAway ||
1184 (rounding_mode == rmTowardPositive && !sign) ||
1185 (rounding_mode == rmTowardNegative && sign)) {
1186 category = fcInfinity;
1187 return (opStatus) (opOverflow | opInexact);
1190 /* Otherwise we become the largest finite number. */
1191 category = fcNormal;
1192 exponent = semantics->maxExponent;
1193 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1194 semantics->precision);
1199 /* Returns TRUE if, when truncating the current number, with BIT the
1200 new LSB, with the given lost fraction and rounding mode, the result
1201 would need to be rounded away from zero (i.e., by increasing the
1202 signficand). This routine must work for fcZero of both signs, and
1203 fcNormal numbers. */
1205 APFloat::roundAwayFromZero(roundingMode rounding_mode,
1206 lostFraction lost_fraction,
1207 unsigned int bit) const
1209 /* NaNs and infinities should not have lost fractions. */
1210 assert(isFiniteNonZero() || category == fcZero);
1212 /* Current callers never pass this so we don't handle it. */
1213 assert(lost_fraction != lfExactlyZero);
1215 switch (rounding_mode) {
1216 case rmNearestTiesToAway:
1217 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1219 case rmNearestTiesToEven:
1220 if (lost_fraction == lfMoreThanHalf)
1223 /* Our zeroes don't have a significand to test. */
1224 if (lost_fraction == lfExactlyHalf && category != fcZero)
1225 return APInt::tcExtractBit(significandParts(), bit);
1232 case rmTowardPositive:
1233 return sign == false;
1235 case rmTowardNegative:
1236 return sign == true;
1238 llvm_unreachable("Invalid rounding mode found");
1242 APFloat::normalize(roundingMode rounding_mode,
1243 lostFraction lost_fraction)
1245 unsigned int omsb; /* One, not zero, based MSB. */
1248 if (!isFiniteNonZero())
1251 /* Before rounding normalize the exponent of fcNormal numbers. */
1252 omsb = significandMSB() + 1;
1255 /* OMSB is numbered from 1. We want to place it in the integer
1256 bit numbered PRECISION if possible, with a compensating change in
1258 exponentChange = omsb - semantics->precision;
1260 /* If the resulting exponent is too high, overflow according to
1261 the rounding mode. */
1262 if (exponent + exponentChange > semantics->maxExponent)
1263 return handleOverflow(rounding_mode);
1265 /* Subnormal numbers have exponent minExponent, and their MSB
1266 is forced based on that. */
1267 if (exponent + exponentChange < semantics->minExponent)
1268 exponentChange = semantics->minExponent - exponent;
1270 /* Shifting left is easy as we don't lose precision. */
1271 if (exponentChange < 0) {
1272 assert(lost_fraction == lfExactlyZero);
1274 shiftSignificandLeft(-exponentChange);
1279 if (exponentChange > 0) {
1282 /* Shift right and capture any new lost fraction. */
1283 lf = shiftSignificandRight(exponentChange);
1285 lost_fraction = combineLostFractions(lf, lost_fraction);
1287 /* Keep OMSB up-to-date. */
1288 if (omsb > (unsigned) exponentChange)
1289 omsb -= exponentChange;
1295 /* Now round the number according to rounding_mode given the lost
1298 /* As specified in IEEE 754, since we do not trap we do not report
1299 underflow for exact results. */
1300 if (lost_fraction == lfExactlyZero) {
1301 /* Canonicalize zeroes. */
1308 /* Increment the significand if we're rounding away from zero. */
1309 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1311 exponent = semantics->minExponent;
1313 incrementSignificand();
1314 omsb = significandMSB() + 1;
1316 /* Did the significand increment overflow? */
1317 if (omsb == (unsigned) semantics->precision + 1) {
1318 /* Renormalize by incrementing the exponent and shifting our
1319 significand right one. However if we already have the
1320 maximum exponent we overflow to infinity. */
1321 if (exponent == semantics->maxExponent) {
1322 category = fcInfinity;
1324 return (opStatus) (opOverflow | opInexact);
1327 shiftSignificandRight(1);
1333 /* The normal case - we were and are not denormal, and any
1334 significand increment above didn't overflow. */
1335 if (omsb == semantics->precision)
1338 /* We have a non-zero denormal. */
1339 assert(omsb < semantics->precision);
1341 /* Canonicalize zeroes. */
1345 /* The fcZero case is a denormal that underflowed to zero. */
1346 return (opStatus) (opUnderflow | opInexact);
1350 APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
1352 switch (PackCategoriesIntoKey(category, rhs.category)) {
1354 llvm_unreachable(0);
1356 case PackCategoriesIntoKey(fcNaN, fcZero):
1357 case PackCategoriesIntoKey(fcNaN, fcNormal):
1358 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1359 case PackCategoriesIntoKey(fcNaN, fcNaN):
1360 case PackCategoriesIntoKey(fcNormal, fcZero):
1361 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1362 case PackCategoriesIntoKey(fcInfinity, fcZero):
1365 case PackCategoriesIntoKey(fcZero, fcNaN):
1366 case PackCategoriesIntoKey(fcNormal, fcNaN):
1367 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1369 copySignificand(rhs);
1372 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1373 case PackCategoriesIntoKey(fcZero, fcInfinity):
1374 category = fcInfinity;
1375 sign = rhs.sign ^ subtract;
1378 case PackCategoriesIntoKey(fcZero, fcNormal):
1380 sign = rhs.sign ^ subtract;
1383 case PackCategoriesIntoKey(fcZero, fcZero):
1384 /* Sign depends on rounding mode; handled by caller. */
1387 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1388 /* Differently signed infinities can only be validly
1390 if (((sign ^ rhs.sign)!=0) != subtract) {
1397 case PackCategoriesIntoKey(fcNormal, fcNormal):
1402 /* Add or subtract two normal numbers. */
1404 APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
1407 lostFraction lost_fraction;
1410 /* Determine if the operation on the absolute values is effectively
1411 an addition or subtraction. */
1412 subtract ^= (sign ^ rhs.sign) ? true : false;
1414 /* Are we bigger exponent-wise than the RHS? */
1415 bits = exponent - rhs.exponent;
1417 /* Subtraction is more subtle than one might naively expect. */
1419 APFloat temp_rhs(rhs);
1423 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1424 lost_fraction = lfExactlyZero;
1425 } else if (bits > 0) {
1426 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1427 shiftSignificandLeft(1);
1430 lost_fraction = shiftSignificandRight(-bits - 1);
1431 temp_rhs.shiftSignificandLeft(1);
1436 carry = temp_rhs.subtractSignificand
1437 (*this, lost_fraction != lfExactlyZero);
1438 copySignificand(temp_rhs);
1441 carry = subtractSignificand
1442 (temp_rhs, lost_fraction != lfExactlyZero);
1445 /* Invert the lost fraction - it was on the RHS and
1447 if (lost_fraction == lfLessThanHalf)
1448 lost_fraction = lfMoreThanHalf;
1449 else if (lost_fraction == lfMoreThanHalf)
1450 lost_fraction = lfLessThanHalf;
1452 /* The code above is intended to ensure that no borrow is
1458 APFloat temp_rhs(rhs);
1460 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1461 carry = addSignificand(temp_rhs);
1463 lost_fraction = shiftSignificandRight(-bits);
1464 carry = addSignificand(rhs);
1467 /* We have a guard bit; generating a carry cannot happen. */
1472 return lost_fraction;
1476 APFloat::multiplySpecials(const APFloat &rhs)
1478 switch (PackCategoriesIntoKey(category, rhs.category)) {
1480 llvm_unreachable(0);
1482 case PackCategoriesIntoKey(fcNaN, fcZero):
1483 case PackCategoriesIntoKey(fcNaN, fcNormal):
1484 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1485 case PackCategoriesIntoKey(fcNaN, fcNaN):
1488 case PackCategoriesIntoKey(fcZero, fcNaN):
1489 case PackCategoriesIntoKey(fcNormal, fcNaN):
1490 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1492 copySignificand(rhs);
1495 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1496 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1497 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1498 category = fcInfinity;
1501 case PackCategoriesIntoKey(fcZero, fcNormal):
1502 case PackCategoriesIntoKey(fcNormal, fcZero):
1503 case PackCategoriesIntoKey(fcZero, fcZero):
1507 case PackCategoriesIntoKey(fcZero, fcInfinity):
1508 case PackCategoriesIntoKey(fcInfinity, fcZero):
1512 case PackCategoriesIntoKey(fcNormal, fcNormal):
1518 APFloat::divideSpecials(const APFloat &rhs)
1520 switch (PackCategoriesIntoKey(category, rhs.category)) {
1522 llvm_unreachable(0);
1524 case PackCategoriesIntoKey(fcNaN, fcZero):
1525 case PackCategoriesIntoKey(fcNaN, fcNormal):
1526 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1527 case PackCategoriesIntoKey(fcNaN, fcNaN):
1528 case PackCategoriesIntoKey(fcInfinity, fcZero):
1529 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1530 case PackCategoriesIntoKey(fcZero, fcInfinity):
1531 case PackCategoriesIntoKey(fcZero, fcNormal):
1534 case PackCategoriesIntoKey(fcZero, fcNaN):
1535 case PackCategoriesIntoKey(fcNormal, fcNaN):
1536 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1538 copySignificand(rhs);
1541 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1545 case PackCategoriesIntoKey(fcNormal, fcZero):
1546 category = fcInfinity;
1549 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1550 case PackCategoriesIntoKey(fcZero, fcZero):
1554 case PackCategoriesIntoKey(fcNormal, fcNormal):
1560 APFloat::modSpecials(const APFloat &rhs)
1562 switch (PackCategoriesIntoKey(category, rhs.category)) {
1564 llvm_unreachable(0);
1566 case PackCategoriesIntoKey(fcNaN, fcZero):
1567 case PackCategoriesIntoKey(fcNaN, fcNormal):
1568 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1569 case PackCategoriesIntoKey(fcNaN, fcNaN):
1570 case PackCategoriesIntoKey(fcZero, fcInfinity):
1571 case PackCategoriesIntoKey(fcZero, fcNormal):
1572 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1575 case PackCategoriesIntoKey(fcZero, fcNaN):
1576 case PackCategoriesIntoKey(fcNormal, fcNaN):
1577 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1579 copySignificand(rhs);
1582 case PackCategoriesIntoKey(fcNormal, fcZero):
1583 case PackCategoriesIntoKey(fcInfinity, fcZero):
1584 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1585 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1586 case PackCategoriesIntoKey(fcZero, fcZero):
1590 case PackCategoriesIntoKey(fcNormal, fcNormal):
1597 APFloat::changeSign()
1599 /* Look mummy, this one's easy. */
1604 APFloat::clearSign()
1606 /* So is this one. */
1611 APFloat::copySign(const APFloat &rhs)
1617 /* Normalized addition or subtraction. */
1619 APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
1624 fs = addOrSubtractSpecials(rhs, subtract);
1626 /* This return code means it was not a simple case. */
1627 if (fs == opDivByZero) {
1628 lostFraction lost_fraction;
1630 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1631 fs = normalize(rounding_mode, lost_fraction);
1633 /* Can only be zero if we lost no fraction. */
1634 assert(category != fcZero || lost_fraction == lfExactlyZero);
1637 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1638 positive zero unless rounding to minus infinity, except that
1639 adding two like-signed zeroes gives that zero. */
1640 if (category == fcZero) {
1641 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1642 sign = (rounding_mode == rmTowardNegative);
1648 /* Normalized addition. */
1650 APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
1652 return addOrSubtract(rhs, rounding_mode, false);
1655 /* Normalized subtraction. */
1657 APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
1659 return addOrSubtract(rhs, rounding_mode, true);
1662 /* Normalized multiply. */
1664 APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
1669 fs = multiplySpecials(rhs);
1671 if (isFiniteNonZero()) {
1672 lostFraction lost_fraction = multiplySignificand(rhs, 0);
1673 fs = normalize(rounding_mode, lost_fraction);
1674 if (lost_fraction != lfExactlyZero)
1675 fs = (opStatus) (fs | opInexact);
1681 /* Normalized divide. */
1683 APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
1688 fs = divideSpecials(rhs);
1690 if (isFiniteNonZero()) {
1691 lostFraction lost_fraction = divideSignificand(rhs);
1692 fs = normalize(rounding_mode, lost_fraction);
1693 if (lost_fraction != lfExactlyZero)
1694 fs = (opStatus) (fs | opInexact);
1700 /* Normalized remainder. This is not currently correct in all cases. */
1702 APFloat::remainder(const APFloat &rhs)
1706 unsigned int origSign = sign;
1708 fs = V.divide(rhs, rmNearestTiesToEven);
1709 if (fs == opDivByZero)
1712 int parts = partCount();
1713 integerPart *x = new integerPart[parts];
1715 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1716 rmNearestTiesToEven, &ignored);
1717 if (fs==opInvalidOp)
1720 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1721 rmNearestTiesToEven);
1722 assert(fs==opOK); // should always work
1724 fs = V.multiply(rhs, rmNearestTiesToEven);
1725 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1727 fs = subtract(V, rmNearestTiesToEven);
1728 assert(fs==opOK || fs==opInexact); // likewise
1731 sign = origSign; // IEEE754 requires this
1736 /* Normalized llvm frem (C fmod).
1737 This is not currently correct in all cases. */
1739 APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
1742 fs = modSpecials(rhs);
1744 if (isFiniteNonZero() && rhs.isFiniteNonZero()) {
1746 unsigned int origSign = sign;
1748 fs = V.divide(rhs, rmNearestTiesToEven);
1749 if (fs == opDivByZero)
1752 int parts = partCount();
1753 integerPart *x = new integerPart[parts];
1755 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1756 rmTowardZero, &ignored);
1757 if (fs==opInvalidOp)
1760 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1761 rmNearestTiesToEven);
1762 assert(fs==opOK); // should always work
1764 fs = V.multiply(rhs, rounding_mode);
1765 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1767 fs = subtract(V, rounding_mode);
1768 assert(fs==opOK || fs==opInexact); // likewise
1771 sign = origSign; // IEEE754 requires this
1777 /* Normalized fused-multiply-add. */
1779 APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
1780 const APFloat &addend,
1781 roundingMode rounding_mode)
1785 /* Post-multiplication sign, before addition. */
1786 sign ^= multiplicand.sign;
1788 /* If and only if all arguments are normal do we need to do an
1789 extended-precision calculation. */
1790 if (isFiniteNonZero() &&
1791 multiplicand.isFiniteNonZero() &&
1792 addend.isFiniteNonZero()) {
1793 lostFraction lost_fraction;
1795 lost_fraction = multiplySignificand(multiplicand, &addend);
1796 fs = normalize(rounding_mode, lost_fraction);
1797 if (lost_fraction != lfExactlyZero)
1798 fs = (opStatus) (fs | opInexact);
1800 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1801 positive zero unless rounding to minus infinity, except that
1802 adding two like-signed zeroes gives that zero. */
1803 if (category == fcZero && sign != addend.sign)
1804 sign = (rounding_mode == rmTowardNegative);
1806 fs = multiplySpecials(multiplicand);
1808 /* FS can only be opOK or opInvalidOp. There is no more work
1809 to do in the latter case. The IEEE-754R standard says it is
1810 implementation-defined in this case whether, if ADDEND is a
1811 quiet NaN, we raise invalid op; this implementation does so.
1813 If we need to do the addition we can do so with normal
1816 fs = addOrSubtract(addend, rounding_mode, false);
1822 /* Rounding-mode corrrect round to integral value. */
1823 APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
1826 // If the exponent is large enough, we know that this value is already
1827 // integral, and the arithmetic below would potentially cause it to saturate
1828 // to +/-Inf. Bail out early instead.
1829 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
1832 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1833 // precision of our format, and then subtract it back off again. The choice
1834 // of rounding modes for the addition/subtraction determines the rounding mode
1835 // for our integral rounding as well.
1836 // NOTE: When the input value is negative, we do subtraction followed by
1837 // addition instead.
1838 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1839 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1840 APFloat MagicConstant(*semantics);
1841 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1842 rmNearestTiesToEven);
1843 MagicConstant.copySign(*this);
1848 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1849 bool inputSign = isNegative();
1851 fs = add(MagicConstant, rounding_mode);
1852 if (fs != opOK && fs != opInexact)
1855 fs = subtract(MagicConstant, rounding_mode);
1857 // Restore the input sign.
1858 if (inputSign != isNegative())
1865 /* Comparison requires normalized numbers. */
1867 APFloat::compare(const APFloat &rhs) const
1871 assert(semantics == rhs.semantics);
1873 switch (PackCategoriesIntoKey(category, rhs.category)) {
1875 llvm_unreachable(0);
1877 case PackCategoriesIntoKey(fcNaN, fcZero):
1878 case PackCategoriesIntoKey(fcNaN, fcNormal):
1879 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1880 case PackCategoriesIntoKey(fcNaN, fcNaN):
1881 case PackCategoriesIntoKey(fcZero, fcNaN):
1882 case PackCategoriesIntoKey(fcNormal, fcNaN):
1883 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1884 return cmpUnordered;
1886 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1887 case PackCategoriesIntoKey(fcInfinity, fcZero):
1888 case PackCategoriesIntoKey(fcNormal, fcZero):
1892 return cmpGreaterThan;
1894 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1895 case PackCategoriesIntoKey(fcZero, fcInfinity):
1896 case PackCategoriesIntoKey(fcZero, fcNormal):
1898 return cmpGreaterThan;
1902 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1903 if (sign == rhs.sign)
1908 return cmpGreaterThan;
1910 case PackCategoriesIntoKey(fcZero, fcZero):
1913 case PackCategoriesIntoKey(fcNormal, fcNormal):
1917 /* Two normal numbers. Do they have the same sign? */
1918 if (sign != rhs.sign) {
1920 result = cmpLessThan;
1922 result = cmpGreaterThan;
1924 /* Compare absolute values; invert result if negative. */
1925 result = compareAbsoluteValue(rhs);
1928 if (result == cmpLessThan)
1929 result = cmpGreaterThan;
1930 else if (result == cmpGreaterThan)
1931 result = cmpLessThan;
1938 /// APFloat::convert - convert a value of one floating point type to another.
1939 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1940 /// records whether the transformation lost information, i.e. whether
1941 /// converting the result back to the original type will produce the
1942 /// original value (this is almost the same as return value==fsOK, but there
1943 /// are edge cases where this is not so).
1946 APFloat::convert(const fltSemantics &toSemantics,
1947 roundingMode rounding_mode, bool *losesInfo)
1949 lostFraction lostFraction;
1950 unsigned int newPartCount, oldPartCount;
1953 const fltSemantics &fromSemantics = *semantics;
1955 lostFraction = lfExactlyZero;
1956 newPartCount = partCountForBits(toSemantics.precision + 1);
1957 oldPartCount = partCount();
1958 shift = toSemantics.precision - fromSemantics.precision;
1960 bool X86SpecialNan = false;
1961 if (&fromSemantics == &APFloat::x87DoubleExtended &&
1962 &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
1963 (!(*significandParts() & 0x8000000000000000ULL) ||
1964 !(*significandParts() & 0x4000000000000000ULL))) {
1965 // x86 has some unusual NaNs which cannot be represented in any other
1966 // format; note them here.
1967 X86SpecialNan = true;
1970 // If this is a truncation, perform the shift before we narrow the storage.
1971 if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
1972 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1974 // Fix the storage so it can hold to new value.
1975 if (newPartCount > oldPartCount) {
1976 // The new type requires more storage; make it available.
1977 integerPart *newParts;
1978 newParts = new integerPart[newPartCount];
1979 APInt::tcSet(newParts, 0, newPartCount);
1980 if (isFiniteNonZero() || category==fcNaN)
1981 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1983 significand.parts = newParts;
1984 } else if (newPartCount == 1 && oldPartCount != 1) {
1985 // Switch to built-in storage for a single part.
1986 integerPart newPart = 0;
1987 if (isFiniteNonZero() || category==fcNaN)
1988 newPart = significandParts()[0];
1990 significand.part = newPart;
1993 // Now that we have the right storage, switch the semantics.
1994 semantics = &toSemantics;
1996 // If this is an extension, perform the shift now that the storage is
1998 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
1999 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2001 if (isFiniteNonZero()) {
2002 fs = normalize(rounding_mode, lostFraction);
2003 *losesInfo = (fs != opOK);
2004 } else if (category == fcNaN) {
2005 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2007 // For x87 extended precision, we want to make a NaN, not a special NaN if
2008 // the input wasn't special either.
2009 if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended)
2010 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2012 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2013 // does not give you back the same bits. This is dubious, and we
2014 // don't currently do it. You're really supposed to get
2015 // an invalid operation signal at runtime, but nobody does that.
2025 /* Convert a floating point number to an integer according to the
2026 rounding mode. If the rounded integer value is out of range this
2027 returns an invalid operation exception and the contents of the
2028 destination parts are unspecified. If the rounded value is in
2029 range but the floating point number is not the exact integer, the C
2030 standard doesn't require an inexact exception to be raised. IEEE
2031 854 does require it so we do that.
2033 Note that for conversions to integer type the C standard requires
2034 round-to-zero to always be used. */
2036 APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
2038 roundingMode rounding_mode,
2039 bool *isExact) const
2041 lostFraction lost_fraction;
2042 const integerPart *src;
2043 unsigned int dstPartsCount, truncatedBits;
2047 /* Handle the three special cases first. */
2048 if (category == fcInfinity || category == fcNaN)
2051 dstPartsCount = partCountForBits(width);
2053 if (category == fcZero) {
2054 APInt::tcSet(parts, 0, dstPartsCount);
2055 // Negative zero can't be represented as an int.
2060 src = significandParts();
2062 /* Step 1: place our absolute value, with any fraction truncated, in
2065 /* Our absolute value is less than one; truncate everything. */
2066 APInt::tcSet(parts, 0, dstPartsCount);
2067 /* For exponent -1 the integer bit represents .5, look at that.
2068 For smaller exponents leftmost truncated bit is 0. */
2069 truncatedBits = semantics->precision -1U - exponent;
2071 /* We want the most significant (exponent + 1) bits; the rest are
2073 unsigned int bits = exponent + 1U;
2075 /* Hopelessly large in magnitude? */
2079 if (bits < semantics->precision) {
2080 /* We truncate (semantics->precision - bits) bits. */
2081 truncatedBits = semantics->precision - bits;
2082 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
2084 /* We want at least as many bits as are available. */
2085 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
2086 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2091 /* Step 2: work out any lost fraction, and increment the absolute
2092 value if we would round away from zero. */
2093 if (truncatedBits) {
2094 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2096 if (lost_fraction != lfExactlyZero &&
2097 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2098 if (APInt::tcIncrement(parts, dstPartsCount))
2099 return opInvalidOp; /* Overflow. */
2102 lost_fraction = lfExactlyZero;
2105 /* Step 3: check if we fit in the destination. */
2106 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2110 /* Negative numbers cannot be represented as unsigned. */
2114 /* It takes omsb bits to represent the unsigned integer value.
2115 We lose a bit for the sign, but care is needed as the
2116 maximally negative integer is a special case. */
2117 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2120 /* This case can happen because of rounding. */
2125 APInt::tcNegate (parts, dstPartsCount);
2127 if (omsb >= width + !isSigned)
2131 if (lost_fraction == lfExactlyZero) {
2138 /* Same as convertToSignExtendedInteger, except we provide
2139 deterministic values in case of an invalid operation exception,
2140 namely zero for NaNs and the minimal or maximal value respectively
2141 for underflow or overflow.
2142 The *isExact output tells whether the result is exact, in the sense
2143 that converting it back to the original floating point type produces
2144 the original value. This is almost equivalent to result==opOK,
2145 except for negative zeroes.
2148 APFloat::convertToInteger(integerPart *parts, unsigned int width,
2150 roundingMode rounding_mode, bool *isExact) const
2154 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2157 if (fs == opInvalidOp) {
2158 unsigned int bits, dstPartsCount;
2160 dstPartsCount = partCountForBits(width);
2162 if (category == fcNaN)
2167 bits = width - isSigned;
2169 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2170 if (sign && isSigned)
2171 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2177 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2178 an APSInt, whose initial bit-width and signed-ness are used to determine the
2179 precision of the conversion.
2182 APFloat::convertToInteger(APSInt &result,
2183 roundingMode rounding_mode, bool *isExact) const
2185 unsigned bitWidth = result.getBitWidth();
2186 SmallVector<uint64_t, 4> parts(result.getNumWords());
2187 opStatus status = convertToInteger(
2188 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2189 // Keeps the original signed-ness.
2190 result = APInt(bitWidth, parts);
2194 /* Convert an unsigned integer SRC to a floating point number,
2195 rounding according to ROUNDING_MODE. The sign of the floating
2196 point number is not modified. */
2198 APFloat::convertFromUnsignedParts(const integerPart *src,
2199 unsigned int srcCount,
2200 roundingMode rounding_mode)
2202 unsigned int omsb, precision, dstCount;
2204 lostFraction lost_fraction;
2206 category = fcNormal;
2207 omsb = APInt::tcMSB(src, srcCount) + 1;
2208 dst = significandParts();
2209 dstCount = partCount();
2210 precision = semantics->precision;
2212 /* We want the most significant PRECISION bits of SRC. There may not
2213 be that many; extract what we can. */
2214 if (precision <= omsb) {
2215 exponent = omsb - 1;
2216 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2218 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2220 exponent = precision - 1;
2221 lost_fraction = lfExactlyZero;
2222 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2225 return normalize(rounding_mode, lost_fraction);
2229 APFloat::convertFromAPInt(const APInt &Val,
2231 roundingMode rounding_mode)
2233 unsigned int partCount = Val.getNumWords();
2237 if (isSigned && api.isNegative()) {
2242 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2245 /* Convert a two's complement integer SRC to a floating point number,
2246 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2247 integer is signed, in which case it must be sign-extended. */
2249 APFloat::convertFromSignExtendedInteger(const integerPart *src,
2250 unsigned int srcCount,
2252 roundingMode rounding_mode)
2257 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2260 /* If we're signed and negative negate a copy. */
2262 copy = new integerPart[srcCount];
2263 APInt::tcAssign(copy, src, srcCount);
2264 APInt::tcNegate(copy, srcCount);
2265 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2269 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2275 /* FIXME: should this just take a const APInt reference? */
2277 APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2278 unsigned int width, bool isSigned,
2279 roundingMode rounding_mode)
2281 unsigned int partCount = partCountForBits(width);
2282 APInt api = APInt(width, makeArrayRef(parts, partCount));
2285 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2290 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2294 APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
2296 lostFraction lost_fraction = lfExactlyZero;
2297 integerPart *significand;
2298 unsigned int bitPos, partsCount;
2299 StringRef::iterator dot, firstSignificantDigit;
2303 category = fcNormal;
2305 significand = significandParts();
2306 partsCount = partCount();
2307 bitPos = partsCount * integerPartWidth;
2309 /* Skip leading zeroes and any (hexa)decimal point. */
2310 StringRef::iterator begin = s.begin();
2311 StringRef::iterator end = s.end();
2312 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2313 firstSignificantDigit = p;
2316 integerPart hex_value;
2319 assert(dot == end && "String contains multiple dots");
2326 hex_value = hexDigitValue(*p);
2327 if (hex_value == -1U) {
2336 /* Store the number whilst 4-bit nibbles remain. */
2339 hex_value <<= bitPos % integerPartWidth;
2340 significand[bitPos / integerPartWidth] |= hex_value;
2342 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2343 while (p != end && hexDigitValue(*p) != -1U)
2350 /* Hex floats require an exponent but not a hexadecimal point. */
2351 assert(p != end && "Hex strings require an exponent");
2352 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2353 assert(p != begin && "Significand has no digits");
2354 assert((dot == end || p - begin != 1) && "Significand has no digits");
2356 /* Ignore the exponent if we are zero. */
2357 if (p != firstSignificantDigit) {
2360 /* Implicit hexadecimal point? */
2364 /* Calculate the exponent adjustment implicit in the number of
2365 significant digits. */
2366 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2367 if (expAdjustment < 0)
2369 expAdjustment = expAdjustment * 4 - 1;
2371 /* Adjust for writing the significand starting at the most
2372 significant nibble. */
2373 expAdjustment += semantics->precision;
2374 expAdjustment -= partsCount * integerPartWidth;
2376 /* Adjust for the given exponent. */
2377 exponent = totalExponent(p + 1, end, expAdjustment);
2380 return normalize(rounding_mode, lost_fraction);
2384 APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2385 unsigned sigPartCount, int exp,
2386 roundingMode rounding_mode)
2388 unsigned int parts, pow5PartCount;
2389 fltSemantics calcSemantics = { 32767, -32767, 0 };
2390 integerPart pow5Parts[maxPowerOfFiveParts];
2393 isNearest = (rounding_mode == rmNearestTiesToEven ||
2394 rounding_mode == rmNearestTiesToAway);
2396 parts = partCountForBits(semantics->precision + 11);
2398 /* Calculate pow(5, abs(exp)). */
2399 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2401 for (;; parts *= 2) {
2402 opStatus sigStatus, powStatus;
2403 unsigned int excessPrecision, truncatedBits;
2405 calcSemantics.precision = parts * integerPartWidth - 1;
2406 excessPrecision = calcSemantics.precision - semantics->precision;
2407 truncatedBits = excessPrecision;
2409 APFloat decSig(calcSemantics, fcZero, sign);
2410 APFloat pow5(calcSemantics, fcZero, false);
2412 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2413 rmNearestTiesToEven);
2414 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2415 rmNearestTiesToEven);
2416 /* Add exp, as 10^n = 5^n * 2^n. */
2417 decSig.exponent += exp;
2419 lostFraction calcLostFraction;
2420 integerPart HUerr, HUdistance;
2421 unsigned int powHUerr;
2424 /* multiplySignificand leaves the precision-th bit set to 1. */
2425 calcLostFraction = decSig.multiplySignificand(pow5, NULL);
2426 powHUerr = powStatus != opOK;
2428 calcLostFraction = decSig.divideSignificand(pow5);
2429 /* Denormal numbers have less precision. */
2430 if (decSig.exponent < semantics->minExponent) {
2431 excessPrecision += (semantics->minExponent - decSig.exponent);
2432 truncatedBits = excessPrecision;
2433 if (excessPrecision > calcSemantics.precision)
2434 excessPrecision = calcSemantics.precision;
2436 /* Extra half-ulp lost in reciprocal of exponent. */
2437 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2440 /* Both multiplySignificand and divideSignificand return the
2441 result with the integer bit set. */
2442 assert(APInt::tcExtractBit
2443 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2445 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2447 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2448 excessPrecision, isNearest);
2450 /* Are we guaranteed to round correctly if we truncate? */
2451 if (HUdistance >= HUerr) {
2452 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2453 calcSemantics.precision - excessPrecision,
2455 /* Take the exponent of decSig. If we tcExtract-ed less bits
2456 above we must adjust our exponent to compensate for the
2457 implicit right shift. */
2458 exponent = (decSig.exponent + semantics->precision
2459 - (calcSemantics.precision - excessPrecision));
2460 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2463 return normalize(rounding_mode, calcLostFraction);
2469 APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
2474 /* Scan the text. */
2475 StringRef::iterator p = str.begin();
2476 interpretDecimal(p, str.end(), &D);
2478 /* Handle the quick cases. First the case of no significant digits,
2479 i.e. zero, and then exponents that are obviously too large or too
2480 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2481 definitely overflows if
2483 (exp - 1) * L >= maxExponent
2485 and definitely underflows to zero where
2487 (exp + 1) * L <= minExponent - precision
2489 With integer arithmetic the tightest bounds for L are
2491 93/28 < L < 196/59 [ numerator <= 256 ]
2492 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2495 if (decDigitValue(*D.firstSigDigit) >= 10U) {
2499 /* Check whether the normalized exponent is high enough to overflow
2500 max during the log-rebasing in the max-exponent check below. */
2501 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2502 fs = handleOverflow(rounding_mode);
2504 /* If it wasn't, then it also wasn't high enough to overflow max
2505 during the log-rebasing in the min-exponent check. Check that it
2506 won't overflow min in either check, then perform the min-exponent
2508 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2509 (D.normalizedExponent + 1) * 28738 <=
2510 8651 * (semantics->minExponent - (int) semantics->precision)) {
2511 /* Underflow to zero and round. */
2513 fs = normalize(rounding_mode, lfLessThanHalf);
2515 /* We can finally safely perform the max-exponent check. */
2516 } else if ((D.normalizedExponent - 1) * 42039
2517 >= 12655 * semantics->maxExponent) {
2518 /* Overflow and round. */
2519 fs = handleOverflow(rounding_mode);
2521 integerPart *decSignificand;
2522 unsigned int partCount;
2524 /* A tight upper bound on number of bits required to hold an
2525 N-digit decimal integer is N * 196 / 59. Allocate enough space
2526 to hold the full significand, and an extra part required by
2528 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2529 partCount = partCountForBits(1 + 196 * partCount / 59);
2530 decSignificand = new integerPart[partCount + 1];
2533 /* Convert to binary efficiently - we do almost all multiplication
2534 in an integerPart. When this would overflow do we do a single
2535 bignum multiplication, and then revert again to multiplication
2536 in an integerPart. */
2538 integerPart decValue, val, multiplier;
2546 if (p == str.end()) {
2550 decValue = decDigitValue(*p++);
2551 assert(decValue < 10U && "Invalid character in significand");
2553 val = val * 10 + decValue;
2554 /* The maximum number that can be multiplied by ten with any
2555 digit added without overflowing an integerPart. */
2556 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2558 /* Multiply out the current part. */
2559 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2560 partCount, partCount + 1, false);
2562 /* If we used another part (likely but not guaranteed), increase
2564 if (decSignificand[partCount])
2566 } while (p <= D.lastSigDigit);
2568 category = fcNormal;
2569 fs = roundSignificandWithExponent(decSignificand, partCount,
2570 D.exponent, rounding_mode);
2572 delete [] decSignificand;
2579 APFloat::convertFromStringSpecials(StringRef str) {
2580 if (str.equals("inf") || str.equals("INFINITY")) {
2585 if (str.equals("-inf") || str.equals("-INFINITY")) {
2590 if (str.equals("nan") || str.equals("NaN")) {
2591 makeNaN(false, false);
2595 if (str.equals("-nan") || str.equals("-NaN")) {
2596 makeNaN(false, true);
2604 APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
2606 assert(!str.empty() && "Invalid string length");
2608 // Handle special cases.
2609 if (convertFromStringSpecials(str))
2612 /* Handle a leading minus sign. */
2613 StringRef::iterator p = str.begin();
2614 size_t slen = str.size();
2615 sign = *p == '-' ? 1 : 0;
2616 if (*p == '-' || *p == '+') {
2619 assert(slen && "String has no digits");
2622 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2623 assert(slen - 2 && "Invalid string");
2624 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2628 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2631 /* Write out a hexadecimal representation of the floating point value
2632 to DST, which must be of sufficient size, in the C99 form
2633 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2634 excluding the terminating NUL.
2636 If UPPERCASE, the output is in upper case, otherwise in lower case.
2638 HEXDIGITS digits appear altogether, rounding the value if
2639 necessary. If HEXDIGITS is 0, the minimal precision to display the
2640 number precisely is used instead. If nothing would appear after
2641 the decimal point it is suppressed.
2643 The decimal exponent is always printed and has at least one digit.
2644 Zero values display an exponent of zero. Infinities and NaNs
2645 appear as "infinity" or "nan" respectively.
2647 The above rules are as specified by C99. There is ambiguity about
2648 what the leading hexadecimal digit should be. This implementation
2649 uses whatever is necessary so that the exponent is displayed as
2650 stored. This implies the exponent will fall within the IEEE format
2651 range, and the leading hexadecimal digit will be 0 (for denormals),
2652 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2653 any other digits zero).
2656 APFloat::convertToHexString(char *dst, unsigned int hexDigits,
2657 bool upperCase, roundingMode rounding_mode) const
2667 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2668 dst += sizeof infinityL - 1;
2672 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2673 dst += sizeof NaNU - 1;
2678 *dst++ = upperCase ? 'X': 'x';
2680 if (hexDigits > 1) {
2682 memset (dst, '0', hexDigits - 1);
2683 dst += hexDigits - 1;
2685 *dst++ = upperCase ? 'P': 'p';
2690 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2696 return static_cast<unsigned int>(dst - p);
2699 /* Does the hard work of outputting the correctly rounded hexadecimal
2700 form of a normal floating point number with the specified number of
2701 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2702 digits necessary to print the value precisely is output. */
2704 APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2706 roundingMode rounding_mode) const
2708 unsigned int count, valueBits, shift, partsCount, outputDigits;
2709 const char *hexDigitChars;
2710 const integerPart *significand;
2715 *dst++ = upperCase ? 'X': 'x';
2718 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2720 significand = significandParts();
2721 partsCount = partCount();
2723 /* +3 because the first digit only uses the single integer bit, so
2724 we have 3 virtual zero most-significant-bits. */
2725 valueBits = semantics->precision + 3;
2726 shift = integerPartWidth - valueBits % integerPartWidth;
2728 /* The natural number of digits required ignoring trailing
2729 insignificant zeroes. */
2730 outputDigits = (valueBits - significandLSB () + 3) / 4;
2732 /* hexDigits of zero means use the required number for the
2733 precision. Otherwise, see if we are truncating. If we are,
2734 find out if we need to round away from zero. */
2736 if (hexDigits < outputDigits) {
2737 /* We are dropping non-zero bits, so need to check how to round.
2738 "bits" is the number of dropped bits. */
2740 lostFraction fraction;
2742 bits = valueBits - hexDigits * 4;
2743 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2744 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2746 outputDigits = hexDigits;
2749 /* Write the digits consecutively, and start writing in the location
2750 of the hexadecimal point. We move the most significant digit
2751 left and add the hexadecimal point later. */
2754 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2756 while (outputDigits && count) {
2759 /* Put the most significant integerPartWidth bits in "part". */
2760 if (--count == partsCount)
2761 part = 0; /* An imaginary higher zero part. */
2763 part = significand[count] << shift;
2766 part |= significand[count - 1] >> (integerPartWidth - shift);
2768 /* Convert as much of "part" to hexdigits as we can. */
2769 unsigned int curDigits = integerPartWidth / 4;
2771 if (curDigits > outputDigits)
2772 curDigits = outputDigits;
2773 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2774 outputDigits -= curDigits;
2780 /* Note that hexDigitChars has a trailing '0'. */
2783 *q = hexDigitChars[hexDigitValue (*q) + 1];
2784 } while (*q == '0');
2787 /* Add trailing zeroes. */
2788 memset (dst, '0', outputDigits);
2789 dst += outputDigits;
2792 /* Move the most significant digit to before the point, and if there
2793 is something after the decimal point add it. This must come
2794 after rounding above. */
2801 /* Finally output the exponent. */
2802 *dst++ = upperCase ? 'P': 'p';
2804 return writeSignedDecimal (dst, exponent);
2807 hash_code llvm::hash_value(const APFloat &Arg) {
2808 if (!Arg.isFiniteNonZero())
2809 return hash_combine((uint8_t)Arg.category,
2810 // NaN has no sign, fix it at zero.
2811 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2812 Arg.semantics->precision);
2814 // Normal floats need their exponent and significand hashed.
2815 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2816 Arg.semantics->precision, Arg.exponent,
2818 Arg.significandParts(),
2819 Arg.significandParts() + Arg.partCount()));
2822 // Conversion from APFloat to/from host float/double. It may eventually be
2823 // possible to eliminate these and have everybody deal with APFloats, but that
2824 // will take a while. This approach will not easily extend to long double.
2825 // Current implementation requires integerPartWidth==64, which is correct at
2826 // the moment but could be made more general.
2828 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2829 // the actual IEEE respresentations. We compensate for that here.
2832 APFloat::convertF80LongDoubleAPFloatToAPInt() const
2834 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
2835 assert(partCount()==2);
2837 uint64_t myexponent, mysignificand;
2839 if (isFiniteNonZero()) {
2840 myexponent = exponent+16383; //bias
2841 mysignificand = significandParts()[0];
2842 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2843 myexponent = 0; // denormal
2844 } else if (category==fcZero) {
2847 } else if (category==fcInfinity) {
2848 myexponent = 0x7fff;
2849 mysignificand = 0x8000000000000000ULL;
2851 assert(category == fcNaN && "Unknown category");
2852 myexponent = 0x7fff;
2853 mysignificand = significandParts()[0];
2857 words[0] = mysignificand;
2858 words[1] = ((uint64_t)(sign & 1) << 15) |
2859 (myexponent & 0x7fffLL);
2860 return APInt(80, words);
2864 APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
2866 assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
2867 assert(partCount()==2);
2873 // Convert number to double. To avoid spurious underflows, we re-
2874 // normalize against the "double" minExponent first, and only *then*
2875 // truncate the mantissa. The result of that second conversion
2876 // may be inexact, but should never underflow.
2877 // Declare fltSemantics before APFloat that uses it (and
2878 // saves pointer to it) to ensure correct destruction order.
2879 fltSemantics extendedSemantics = *semantics;
2880 extendedSemantics.minExponent = IEEEdouble.minExponent;
2881 APFloat extended(*this);
2882 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2883 assert(fs == opOK && !losesInfo);
2886 APFloat u(extended);
2887 fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2888 assert(fs == opOK || fs == opInexact);
2890 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2892 // If conversion was exact or resulted in a special case, we're done;
2893 // just set the second double to zero. Otherwise, re-convert back to
2894 // the extended format and compute the difference. This now should
2895 // convert exactly to double.
2896 if (u.isFiniteNonZero() && losesInfo) {
2897 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2898 assert(fs == opOK && !losesInfo);
2901 APFloat v(extended);
2902 v.subtract(u, rmNearestTiesToEven);
2903 fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
2904 assert(fs == opOK && !losesInfo);
2906 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2911 return APInt(128, words);
2915 APFloat::convertQuadrupleAPFloatToAPInt() const
2917 assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
2918 assert(partCount()==2);
2920 uint64_t myexponent, mysignificand, mysignificand2;
2922 if (isFiniteNonZero()) {
2923 myexponent = exponent+16383; //bias
2924 mysignificand = significandParts()[0];
2925 mysignificand2 = significandParts()[1];
2926 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2927 myexponent = 0; // denormal
2928 } else if (category==fcZero) {
2930 mysignificand = mysignificand2 = 0;
2931 } else if (category==fcInfinity) {
2932 myexponent = 0x7fff;
2933 mysignificand = mysignificand2 = 0;
2935 assert(category == fcNaN && "Unknown category!");
2936 myexponent = 0x7fff;
2937 mysignificand = significandParts()[0];
2938 mysignificand2 = significandParts()[1];
2942 words[0] = mysignificand;
2943 words[1] = ((uint64_t)(sign & 1) << 63) |
2944 ((myexponent & 0x7fff) << 48) |
2945 (mysignificand2 & 0xffffffffffffLL);
2947 return APInt(128, words);
2951 APFloat::convertDoubleAPFloatToAPInt() const
2953 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
2954 assert(partCount()==1);
2956 uint64_t myexponent, mysignificand;
2958 if (isFiniteNonZero()) {
2959 myexponent = exponent+1023; //bias
2960 mysignificand = *significandParts();
2961 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2962 myexponent = 0; // denormal
2963 } else if (category==fcZero) {
2966 } else if (category==fcInfinity) {
2970 assert(category == fcNaN && "Unknown category!");
2972 mysignificand = *significandParts();
2975 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2976 ((myexponent & 0x7ff) << 52) |
2977 (mysignificand & 0xfffffffffffffLL))));
2981 APFloat::convertFloatAPFloatToAPInt() const
2983 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
2984 assert(partCount()==1);
2986 uint32_t myexponent, mysignificand;
2988 if (isFiniteNonZero()) {
2989 myexponent = exponent+127; //bias
2990 mysignificand = (uint32_t)*significandParts();
2991 if (myexponent == 1 && !(mysignificand & 0x800000))
2992 myexponent = 0; // denormal
2993 } else if (category==fcZero) {
2996 } else if (category==fcInfinity) {
3000 assert(category == fcNaN && "Unknown category!");
3002 mysignificand = (uint32_t)*significandParts();
3005 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
3006 (mysignificand & 0x7fffff)));
3010 APFloat::convertHalfAPFloatToAPInt() const
3012 assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
3013 assert(partCount()==1);
3015 uint32_t myexponent, mysignificand;
3017 if (isFiniteNonZero()) {
3018 myexponent = exponent+15; //bias
3019 mysignificand = (uint32_t)*significandParts();
3020 if (myexponent == 1 && !(mysignificand & 0x400))
3021 myexponent = 0; // denormal
3022 } else if (category==fcZero) {
3025 } else if (category==fcInfinity) {
3029 assert(category == fcNaN && "Unknown category!");
3031 mysignificand = (uint32_t)*significandParts();
3034 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3035 (mysignificand & 0x3ff)));
3038 // This function creates an APInt that is just a bit map of the floating
3039 // point constant as it would appear in memory. It is not a conversion,
3040 // and treating the result as a normal integer is unlikely to be useful.
3043 APFloat::bitcastToAPInt() const
3045 if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
3046 return convertHalfAPFloatToAPInt();
3048 if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
3049 return convertFloatAPFloatToAPInt();
3051 if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
3052 return convertDoubleAPFloatToAPInt();
3054 if (semantics == (const llvm::fltSemantics*)&IEEEquad)
3055 return convertQuadrupleAPFloatToAPInt();
3057 if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
3058 return convertPPCDoubleDoubleAPFloatToAPInt();
3060 assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
3062 return convertF80LongDoubleAPFloatToAPInt();
3066 APFloat::convertToFloat() const
3068 assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
3069 "Float semantics are not IEEEsingle");
3070 APInt api = bitcastToAPInt();
3071 return api.bitsToFloat();
3075 APFloat::convertToDouble() const
3077 assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
3078 "Float semantics are not IEEEdouble");
3079 APInt api = bitcastToAPInt();
3080 return api.bitsToDouble();
3083 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3084 /// does not support these bit patterns:
3085 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3086 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3087 /// exponent = 0, integer bit 1 ("pseudodenormal")
3088 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3089 /// At the moment, the first two are treated as NaNs, the second two as Normal.
3091 APFloat::initFromF80LongDoubleAPInt(const APInt &api)
3093 assert(api.getBitWidth()==80);
3094 uint64_t i1 = api.getRawData()[0];
3095 uint64_t i2 = api.getRawData()[1];
3096 uint64_t myexponent = (i2 & 0x7fff);
3097 uint64_t mysignificand = i1;
3099 initialize(&APFloat::x87DoubleExtended);
3100 assert(partCount()==2);
3102 sign = static_cast<unsigned int>(i2>>15);
3103 if (myexponent==0 && mysignificand==0) {
3104 // exponent, significand meaningless
3106 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3107 // exponent, significand meaningless
3108 category = fcInfinity;
3109 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3110 // exponent meaningless
3112 significandParts()[0] = mysignificand;
3113 significandParts()[1] = 0;
3115 category = fcNormal;
3116 exponent = myexponent - 16383;
3117 significandParts()[0] = mysignificand;
3118 significandParts()[1] = 0;
3119 if (myexponent==0) // denormal
3125 APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
3127 assert(api.getBitWidth()==128);
3128 uint64_t i1 = api.getRawData()[0];
3129 uint64_t i2 = api.getRawData()[1];
3133 // Get the first double and convert to our format.
3134 initFromDoubleAPInt(APInt(64, i1));
3135 fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3136 assert(fs == opOK && !losesInfo);
3139 // Unless we have a special case, add in second double.
3140 if (isFiniteNonZero()) {
3141 APFloat v(IEEEdouble, APInt(64, i2));
3142 fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
3143 assert(fs == opOK && !losesInfo);
3146 add(v, rmNearestTiesToEven);
3151 APFloat::initFromQuadrupleAPInt(const APInt &api)
3153 assert(api.getBitWidth()==128);
3154 uint64_t i1 = api.getRawData()[0];
3155 uint64_t i2 = api.getRawData()[1];
3156 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3157 uint64_t mysignificand = i1;
3158 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3160 initialize(&APFloat::IEEEquad);
3161 assert(partCount()==2);
3163 sign = static_cast<unsigned int>(i2>>63);
3164 if (myexponent==0 &&
3165 (mysignificand==0 && mysignificand2==0)) {
3166 // exponent, significand meaningless
3168 } else if (myexponent==0x7fff &&
3169 (mysignificand==0 && mysignificand2==0)) {
3170 // exponent, significand meaningless
3171 category = fcInfinity;
3172 } else if (myexponent==0x7fff &&
3173 (mysignificand!=0 || mysignificand2 !=0)) {
3174 // exponent meaningless
3176 significandParts()[0] = mysignificand;
3177 significandParts()[1] = mysignificand2;
3179 category = fcNormal;
3180 exponent = myexponent - 16383;
3181 significandParts()[0] = mysignificand;
3182 significandParts()[1] = mysignificand2;
3183 if (myexponent==0) // denormal
3186 significandParts()[1] |= 0x1000000000000LL; // integer bit
3191 APFloat::initFromDoubleAPInt(const APInt &api)
3193 assert(api.getBitWidth()==64);
3194 uint64_t i = *api.getRawData();
3195 uint64_t myexponent = (i >> 52) & 0x7ff;
3196 uint64_t mysignificand = i & 0xfffffffffffffLL;
3198 initialize(&APFloat::IEEEdouble);
3199 assert(partCount()==1);
3201 sign = static_cast<unsigned int>(i>>63);
3202 if (myexponent==0 && mysignificand==0) {
3203 // exponent, significand meaningless
3205 } else if (myexponent==0x7ff && mysignificand==0) {
3206 // exponent, significand meaningless
3207 category = fcInfinity;
3208 } else if (myexponent==0x7ff && mysignificand!=0) {
3209 // exponent meaningless
3211 *significandParts() = mysignificand;
3213 category = fcNormal;
3214 exponent = myexponent - 1023;
3215 *significandParts() = mysignificand;
3216 if (myexponent==0) // denormal
3219 *significandParts() |= 0x10000000000000LL; // integer bit
3224 APFloat::initFromFloatAPInt(const APInt & api)
3226 assert(api.getBitWidth()==32);
3227 uint32_t i = (uint32_t)*api.getRawData();
3228 uint32_t myexponent = (i >> 23) & 0xff;
3229 uint32_t mysignificand = i & 0x7fffff;
3231 initialize(&APFloat::IEEEsingle);
3232 assert(partCount()==1);
3235 if (myexponent==0 && mysignificand==0) {
3236 // exponent, significand meaningless
3238 } else if (myexponent==0xff && mysignificand==0) {
3239 // exponent, significand meaningless
3240 category = fcInfinity;
3241 } else if (myexponent==0xff && mysignificand!=0) {
3242 // sign, exponent, significand meaningless
3244 *significandParts() = mysignificand;
3246 category = fcNormal;
3247 exponent = myexponent - 127; //bias
3248 *significandParts() = mysignificand;
3249 if (myexponent==0) // denormal
3252 *significandParts() |= 0x800000; // integer bit
3257 APFloat::initFromHalfAPInt(const APInt & api)
3259 assert(api.getBitWidth()==16);
3260 uint32_t i = (uint32_t)*api.getRawData();
3261 uint32_t myexponent = (i >> 10) & 0x1f;
3262 uint32_t mysignificand = i & 0x3ff;
3264 initialize(&APFloat::IEEEhalf);
3265 assert(partCount()==1);
3268 if (myexponent==0 && mysignificand==0) {
3269 // exponent, significand meaningless
3271 } else if (myexponent==0x1f && mysignificand==0) {
3272 // exponent, significand meaningless
3273 category = fcInfinity;
3274 } else if (myexponent==0x1f && mysignificand!=0) {
3275 // sign, exponent, significand meaningless
3277 *significandParts() = mysignificand;
3279 category = fcNormal;
3280 exponent = myexponent - 15; //bias
3281 *significandParts() = mysignificand;
3282 if (myexponent==0) // denormal
3285 *significandParts() |= 0x400; // integer bit
3289 /// Treat api as containing the bits of a floating point number. Currently
3290 /// we infer the floating point type from the size of the APInt. The
3291 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3292 /// when the size is anything else).
3294 APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api)
3296 if (Sem == &IEEEhalf)
3297 return initFromHalfAPInt(api);
3298 if (Sem == &IEEEsingle)
3299 return initFromFloatAPInt(api);
3300 if (Sem == &IEEEdouble)
3301 return initFromDoubleAPInt(api);
3302 if (Sem == &x87DoubleExtended)
3303 return initFromF80LongDoubleAPInt(api);
3304 if (Sem == &IEEEquad)
3305 return initFromQuadrupleAPInt(api);
3306 if (Sem == &PPCDoubleDouble)
3307 return initFromPPCDoubleDoubleAPInt(api);
3309 llvm_unreachable(0);
3313 APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
3317 return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth));
3319 return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth));
3321 return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth));
3323 return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth));
3326 return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth));
3327 return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
3329 llvm_unreachable("Unknown floating bit width");
3333 /// Make this number the largest magnitude normal number in the given
3335 void APFloat::makeLargest(bool Negative) {
3336 // We want (in interchange format):
3337 // sign = {Negative}
3339 // significand = 1..1
3340 category = fcNormal;
3342 exponent = semantics->maxExponent;
3344 // Use memset to set all but the highest integerPart to all ones.
3345 integerPart *significand = significandParts();
3346 unsigned PartCount = partCount();
3347 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3349 // Set the high integerPart especially setting all unused top bits for
3350 // internal consistency.
3351 const unsigned NumUnusedHighBits =
3352 PartCount*integerPartWidth - semantics->precision;
3353 significand[PartCount - 1] = ~integerPart(0) >> NumUnusedHighBits;
3356 /// Make this number the smallest magnitude denormal number in the given
3358 void APFloat::makeSmallest(bool Negative) {
3359 // We want (in interchange format):
3360 // sign = {Negative}
3362 // significand = 0..01
3363 category = fcNormal;
3365 exponent = semantics->minExponent;
3366 APInt::tcSet(significandParts(), 1, partCount());
3370 APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
3371 // We want (in interchange format):
3372 // sign = {Negative}
3374 // significand = 1..1
3375 APFloat Val(Sem, uninitialized);
3376 Val.makeLargest(Negative);
3380 APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
3381 // We want (in interchange format):
3382 // sign = {Negative}
3384 // significand = 0..01
3385 APFloat Val(Sem, uninitialized);
3386 Val.makeSmallest(Negative);
3390 APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
3391 APFloat Val(Sem, fcNormal, Negative);
3393 // We want (in interchange format):
3394 // sign = {Negative}
3396 // significand = 10..0
3398 Val.exponent = Sem.minExponent;
3399 Val.zeroSignificand();
3400 Val.significandParts()[partCountForBits(Sem.precision)-1] |=
3401 (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
3406 APFloat::APFloat(const fltSemantics &Sem, const APInt &API) {
3407 initFromAPInt(&Sem, API);
3410 APFloat::APFloat(float f) {
3411 initFromAPInt(&IEEEsingle, APInt::floatToBits(f));
3414 APFloat::APFloat(double d) {
3415 initFromAPInt(&IEEEdouble, APInt::doubleToBits(d));
3419 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3420 Buffer.append(Str.begin(), Str.end());
3423 /// Removes data from the given significand until it is no more
3424 /// precise than is required for the desired precision.
3425 void AdjustToPrecision(APInt &significand,
3426 int &exp, unsigned FormatPrecision) {
3427 unsigned bits = significand.getActiveBits();
3429 // 196/59 is a very slight overestimate of lg_2(10).
3430 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3432 if (bits <= bitsRequired) return;
3434 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3435 if (!tensRemovable) return;
3437 exp += tensRemovable;
3439 APInt divisor(significand.getBitWidth(), 1);
3440 APInt powten(significand.getBitWidth(), 10);
3442 if (tensRemovable & 1)
3444 tensRemovable >>= 1;
3445 if (!tensRemovable) break;
3449 significand = significand.udiv(divisor);
3451 // Truncate the significand down to its active bit count.
3452 significand = significand.trunc(significand.getActiveBits());
3456 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3457 int &exp, unsigned FormatPrecision) {
3458 unsigned N = buffer.size();
3459 if (N <= FormatPrecision) return;
3461 // The most significant figures are the last ones in the buffer.
3462 unsigned FirstSignificant = N - FormatPrecision;
3465 // FIXME: this probably shouldn't use 'round half up'.
3467 // Rounding down is just a truncation, except we also want to drop
3468 // trailing zeros from the new result.
3469 if (buffer[FirstSignificant - 1] < '5') {
3470 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3473 exp += FirstSignificant;
3474 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3478 // Rounding up requires a decimal add-with-carry. If we continue
3479 // the carry, the newly-introduced zeros will just be truncated.
3480 for (unsigned I = FirstSignificant; I != N; ++I) {
3481 if (buffer[I] == '9') {
3489 // If we carried through, we have exactly one digit of precision.
3490 if (FirstSignificant == N) {
3491 exp += FirstSignificant;
3493 buffer.push_back('1');
3497 exp += FirstSignificant;
3498 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3502 void APFloat::toString(SmallVectorImpl<char> &Str,
3503 unsigned FormatPrecision,
3504 unsigned FormatMaxPadding) const {
3508 return append(Str, "-Inf");
3510 return append(Str, "+Inf");
3512 case fcNaN: return append(Str, "NaN");
3518 if (!FormatMaxPadding)
3519 append(Str, "0.0E+0");
3531 // Decompose the number into an APInt and an exponent.
3532 int exp = exponent - ((int) semantics->precision - 1);
3533 APInt significand(semantics->precision,
3534 makeArrayRef(significandParts(),
3535 partCountForBits(semantics->precision)));
3537 // Set FormatPrecision if zero. We want to do this before we
3538 // truncate trailing zeros, as those are part of the precision.
3539 if (!FormatPrecision) {
3540 // It's an interesting question whether to use the nominal
3541 // precision or the active precision here for denormals.
3543 // FormatPrecision = ceil(significandBits / lg_2(10))
3544 FormatPrecision = (semantics->precision * 59 + 195) / 196;
3547 // Ignore trailing binary zeros.
3548 int trailingZeros = significand.countTrailingZeros();
3549 exp += trailingZeros;
3550 significand = significand.lshr(trailingZeros);
3552 // Change the exponent from 2^e to 10^e.
3555 } else if (exp > 0) {
3557 significand = significand.zext(semantics->precision + exp);
3558 significand <<= exp;
3560 } else { /* exp < 0 */
3563 // We transform this using the identity:
3564 // (N)(2^-e) == (N)(5^e)(10^-e)
3565 // This means we have to multiply N (the significand) by 5^e.
3566 // To avoid overflow, we have to operate on numbers large
3567 // enough to store N * 5^e:
3568 // log2(N * 5^e) == log2(N) + e * log2(5)
3569 // <= semantics->precision + e * 137 / 59
3570 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3572 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3574 // Multiply significand by 5^e.
3575 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3576 significand = significand.zext(precision);
3577 APInt five_to_the_i(precision, 5);
3579 if (texp & 1) significand *= five_to_the_i;
3583 five_to_the_i *= five_to_the_i;
3587 AdjustToPrecision(significand, exp, FormatPrecision);
3589 SmallVector<char, 256> buffer;
3592 unsigned precision = significand.getBitWidth();
3593 APInt ten(precision, 10);
3594 APInt digit(precision, 0);
3596 bool inTrail = true;
3597 while (significand != 0) {
3598 // digit <- significand % 10
3599 // significand <- significand / 10
3600 APInt::udivrem(significand, ten, significand, digit);
3602 unsigned d = digit.getZExtValue();
3604 // Drop trailing zeros.
3605 if (inTrail && !d) exp++;
3607 buffer.push_back((char) ('0' + d));
3612 assert(!buffer.empty() && "no characters in buffer!");
3614 // Drop down to FormatPrecision.
3615 // TODO: don't do more precise calculations above than are required.
3616 AdjustToPrecision(buffer, exp, FormatPrecision);
3618 unsigned NDigits = buffer.size();
3620 // Check whether we should use scientific notation.
3621 bool FormatScientific;
3622 if (!FormatMaxPadding)
3623 FormatScientific = true;
3628 // But we shouldn't make the number look more precise than it is.
3629 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3630 NDigits + (unsigned) exp > FormatPrecision);
3632 // Power of the most significant digit.
3633 int MSD = exp + (int) (NDigits - 1);
3636 FormatScientific = false;
3638 // 765e-5 == 0.00765
3640 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3645 // Scientific formatting is pretty straightforward.
3646 if (FormatScientific) {
3647 exp += (NDigits - 1);
3649 Str.push_back(buffer[NDigits-1]);
3654 for (unsigned I = 1; I != NDigits; ++I)
3655 Str.push_back(buffer[NDigits-1-I]);
3658 Str.push_back(exp >= 0 ? '+' : '-');
3659 if (exp < 0) exp = -exp;
3660 SmallVector<char, 6> expbuf;
3662 expbuf.push_back((char) ('0' + (exp % 10)));
3665 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3666 Str.push_back(expbuf[E-1-I]);
3670 // Non-scientific, positive exponents.
3672 for (unsigned I = 0; I != NDigits; ++I)
3673 Str.push_back(buffer[NDigits-1-I]);
3674 for (unsigned I = 0; I != (unsigned) exp; ++I)
3679 // Non-scientific, negative exponents.
3681 // The number of digits to the left of the decimal point.
3682 int NWholeDigits = exp + (int) NDigits;
3685 if (NWholeDigits > 0) {
3686 for (; I != (unsigned) NWholeDigits; ++I)
3687 Str.push_back(buffer[NDigits-I-1]);
3690 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3694 for (unsigned Z = 1; Z != NZeros; ++Z)
3698 for (; I != NDigits; ++I)
3699 Str.push_back(buffer[NDigits-I-1]);
3702 bool APFloat::getExactInverse(APFloat *inv) const {
3703 // Special floats and denormals have no exact inverse.
3704 if (!isFiniteNonZero())
3707 // Check that the number is a power of two by making sure that only the
3708 // integer bit is set in the significand.
3709 if (significandLSB() != semantics->precision - 1)
3713 APFloat reciprocal(*semantics, 1ULL);
3714 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3717 // Avoid multiplication with a denormal, it is not safe on all platforms and
3718 // may be slower than a normal division.
3719 if (reciprocal.isDenormal())
3722 assert(reciprocal.isFiniteNonZero() &&
3723 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3731 bool APFloat::isSignaling() const {
3735 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3736 // first bit of the trailing significand being 0.
3737 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3740 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3742 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3743 /// appropriate sign switching before/after the computation.
3744 APFloat::opStatus APFloat::next(bool nextDown) {
3745 // If we are performing nextDown, swap sign so we have -x.
3749 // Compute nextUp(x)
3750 opStatus result = opOK;
3752 // Handle each float category separately.
3755 // nextUp(+inf) = +inf
3758 // nextUp(-inf) = -getLargest()
3762 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3763 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3764 // change the payload.
3765 if (isSignaling()) {
3766 result = opInvalidOp;
3767 // For consistency, propogate the sign of the sNaN to the qNaN.
3768 makeNaN(false, isNegative(), 0);
3772 // nextUp(pm 0) = +getSmallest()
3773 makeSmallest(false);
3776 // nextUp(-getSmallest()) = -0
3777 if (isSmallest() && isNegative()) {
3778 APInt::tcSet(significandParts(), 0, partCount());
3784 // nextUp(getLargest()) == INFINITY
3785 if (isLargest() && !isNegative()) {
3786 APInt::tcSet(significandParts(), 0, partCount());
3787 category = fcInfinity;
3788 exponent = semantics->maxExponent + 1;
3792 // nextUp(normal) == normal + inc.
3794 // If we are negative, we need to decrement the significand.
3796 // We only cross a binade boundary that requires adjusting the exponent
3798 // 1. exponent != semantics->minExponent. This implies we are not in the
3799 // smallest binade or are dealing with denormals.
3800 // 2. Our significand excluding the integral bit is all zeros.
3801 bool WillCrossBinadeBoundary =
3802 exponent != semantics->minExponent && isSignificandAllZeros();
3804 // Decrement the significand.
3806 // We always do this since:
3807 // 1. If we are dealing with a non binade decrement, by definition we
3808 // just decrement the significand.
3809 // 2. If we are dealing with a normal -> normal binade decrement, since
3810 // we have an explicit integral bit the fact that all bits but the
3811 // integral bit are zero implies that subtracting one will yield a
3812 // significand with 0 integral bit and 1 in all other spots. Thus we
3813 // must just adjust the exponent and set the integral bit to 1.
3814 // 3. If we are dealing with a normal -> denormal binade decrement,
3815 // since we set the integral bit to 0 when we represent denormals, we
3816 // just decrement the significand.
3817 integerPart *Parts = significandParts();
3818 APInt::tcDecrement(Parts, partCount());
3820 if (WillCrossBinadeBoundary) {
3821 // Our result is a normal number. Do the following:
3822 // 1. Set the integral bit to 1.
3823 // 2. Decrement the exponent.
3824 APInt::tcSetBit(Parts, semantics->precision - 1);
3828 // If we are positive, we need to increment the significand.
3830 // We only cross a binade boundary that requires adjusting the exponent if
3831 // the input is not a denormal and all of said input's significand bits
3832 // are set. If all of said conditions are true: clear the significand, set
3833 // the integral bit to 1, and increment the exponent. If we have a
3834 // denormal always increment since moving denormals and the numbers in the
3835 // smallest normal binade have the same exponent in our representation.
3836 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3838 if (WillCrossBinadeBoundary) {
3839 integerPart *Parts = significandParts();
3840 APInt::tcSet(Parts, 0, partCount());
3841 APInt::tcSetBit(Parts, semantics->precision - 1);
3842 assert(exponent != semantics->maxExponent &&
3843 "We can not increment an exponent beyond the maxExponent allowed"
3844 " by the given floating point semantics.");
3847 incrementSignificand();
3853 // If we are performing nextDown, swap sign so we have -nextUp(-x)
3861 APFloat::makeInf(bool Negative) {
3862 category = fcInfinity;
3864 exponent = semantics->maxExponent + 1;
3865 APInt::tcSet(significandParts(), 0, partCount());
3869 APFloat::makeZero(bool Negative) {
3872 exponent = semantics->minExponent-1;
3873 APInt::tcSet(significandParts(), 0, partCount());